Code Coverage
 
Lines
Functions and Methods
Classes and Traits
Total
92.31% covered (success)
92.31%
132 / 143
91.67% covered (success)
91.67%
11 / 12
CRAP
0.00% covered (danger)
0.00%
0 / 1
Parser
92.31% covered (success)
92.31%
132 / 143
91.67% covered (success)
91.67%
11 / 12
63.75
0.00% covered (danger)
0.00%
0 / 1
 getObjectStreams
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getObjectMap
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 getFonts
100.00% covered (success)
100.00%
1 / 1
100.00% covered (success)
100.00%
1 / 1
1
 parseFile
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parseData
100.00% covered (success)
100.00%
2 / 2
100.00% covered (success)
100.00%
1 / 1
1
 parse
100.00% covered (success)
100.00%
25 / 25
100.00% covered (success)
100.00%
1 / 1
15
 initFile
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
2
 initData
100.00% covered (success)
100.00%
5 / 5
100.00% covered (success)
100.00%
1 / 1
1
 mapObjects
100.00% covered (success)
100.00%
47 / 47
100.00% covered (success)
100.00%
1 / 1
9
 mapFonts
64.52% covered (warning)
64.52%
20 / 31
0.00% covered (danger)
0.00%
0 / 1
27.44
 filterPages
100.00% covered (success)
100.00%
12 / 12
100.00% covered (success)
100.00%
1 / 1
7
 getObjects
100.00% covered (success)
100.00%
8 / 8
100.00% covered (success)
100.00%
1 / 1
7
1<?php
2/**
3 * Pop PHP Framework (http://www.popphp.org/)
4 *
5 * @link       https://github.com/popphp/popphp-framework
6 * @author     Nick Sagona, III <dev@nolainteractive.com>
7 * @copyright  Copyright (c) 2009-2023 NOLA Interactive, LLC. (http://www.nolainteractive.com)
8 * @license    http://www.popphp.org/license     New BSD License
9 */
10
11/**
12 * @namespace
13 */
14namespace Pop\Pdf\Build;
15
16use Pop\Pdf\Document\AbstractDocument;
17
18/**
19 * Pdf parser class
20 *
21 * @category   Pop
22 * @package    Pop\Pdf
23 * @author     Nick Sagona, III <dev@nolainteractive.com>
24 * @copyright  Copyright (c) 2009-2023 NOLA Interactive, LLC. (http://www.nolainteractive.com)
25 * @license    http://www.popphp.org/license     New BSD License
26 * @version    4.2.0
27 */
28class Parser extends AbstractParser
29{
30
31    /**
32     * Parsed object data streams
33     * @var array
34     */
35    protected $objectStreams = [];
36
37    /**
38     * Object map
39     * @var array
40     */
41    protected $objectMap = [];
42
43    /**
44     * Document fonts
45     * @var array
46     */
47    protected $fonts = [];
48
49    /**
50     * Get the object streams
51     *
52     * @return array
53     */
54    public function getObjectStreams()
55    {
56        return $this->objectStreams;
57    }
58
59    /**
60     * Get the object map
61     *
62     * @return array
63     */
64    public function getObjectMap()
65    {
66        return $this->objectMap;
67    }
68
69    /**
70     * Get the document fonts
71     *
72     * @return array
73     */
74    public function getFonts()
75    {
76        return $this->fonts;
77    }
78
79    /**
80     * Parse from file
81     *
82     * @param  string $file
83     * @param  mixed  $pages
84     * @return AbstractDocument
85     */
86    public function parseFile($file, $pages = null)
87    {
88        $this->initFile($file);
89        return $this->parse($pages);
90    }
91
92    /**
93     * Parse from raw data stream
94     *
95     * @param  string $data
96     * @param  mixed  $pages
97     * @return AbstractDocument
98     */
99    public function parseData($data, $pages = null)
100    {
101        $this->initData($data);
102        return $this->parse($pages);
103    }
104
105    /**
106     * Parse the data stream
107     *
108     * @param  mixed  $pages
109     * @return AbstractDocument
110     */
111    public function parse($pages = null)
112    {
113        $matches = [];
114        preg_match_all('/\d*\s\d*\sobj(.*?)endobj/sm', $this->data, $matches, PREG_OFFSET_CAPTURE);
115
116        if (isset($matches[0]) && isset($matches[0][0])) {
117            foreach ($matches[0] as $match) {
118                if ((strpos($match[0], '/Linearized') === false) && (strpos($match[0], '/Type/Metadata') === false)) {
119                    $this->objectStreams[] = $match[0];
120                }
121            }
122        }
123
124        // Map the objects by parsing the object streams
125        $this->mapObjects();
126
127        if (isset($this->objectMap['pages'])) {
128            // Map fonts, if any
129            if (isset($this->objectMap['streams'])) {
130                $this->mapFonts();
131            }
132            // If certain pages are to be imported, filter out the unwanted pages
133            if (null !== $pages) {
134                $this->filterPages($pages);
135            }
136        }
137
138        $doc = new \Pop\Pdf\Document();
139
140        if (isset($this->objectMap['root']) && isset($this->objectMap['root']['object'])) {
141            $doc->setVersion($this->objectMap['root']['object']->getVersion());
142        }
143        if (isset($this->objectMap['info']) && isset($this->objectMap['info']['object'])) {
144            $doc->setMetadata($this->objectMap['info']['object']->getMetadata());
145        }
146
147        $doc->importObjects($this->getObjects());
148        $doc->importFonts($this->getFonts());
149
150        if (isset($this->objectMap['pages'])) {
151            foreach ($this->objectMap['pages'] as $i => $page) {
152                $pg = new \Pop\Pdf\Document\Page($page['width'], $page['height'], $i);
153                $pg->importPageObject($page['object']);
154                $doc->addPage($pg);
155            }
156        }
157
158        return $doc;
159    }
160
161    /**
162     * Initialize the file and get the data
163     *
164     * @param  string $file
165     * @throws Exception
166     * @return Parser
167     */
168    protected function initFile($file)
169    {
170        if (!file_exists($file)) {
171            throw new Exception('Error: That PDF file does not exist.');
172        }
173
174        $this->file = $file;
175        $this->data = file_get_contents($this->file);
176
177        $this->objectStreams = [];
178        $this->objectMap     = [];
179        $this->fonts         = [];
180
181        return $this;
182    }
183
184    /**
185     * Initialize data
186     *
187     * @param  string $data
188     * @throws Exception
189     * @return Parser
190     */
191    protected function initData($data)
192    {
193        $this->data = $data;
194
195        $this->objectStreams = [];
196        $this->objectMap     = [];
197        $this->fonts         = [];
198
199        return $this;
200    }
201
202    /**
203     * Map the objects
204     *
205     * @return void
206     */
207    protected function mapObjects()
208    {
209        foreach ($this->objectStreams as $stream) {
210            switch ($this->getStreamType($stream)) {
211                case 'root':
212                    $root = PdfObject\RootObject::parse($stream);
213                    $root->setImported(true);
214                    $root->setVersion(substr($this->data, 5, 3));
215                    $this->objectMap['root'] = [
216                        'stream' => $stream,
217                        'object' => $root,
218                        'index'  => $root->getIndex(),
219                        'parent' => $root->getParentIndex()
220                    ];
221                    break;
222                case 'parent':
223                    $parent = PdfObject\ParentObject::parse($stream);
224                    $parent->setImported(true);
225                    $this->objectMap['parent'] = [
226                        'stream' => $stream,
227                        'object' => $parent,
228                        'index'  => $parent->getIndex(),
229                        'count'  => $parent->getCount(),
230                        'kids'   => $parent->getKids()
231                    ];
232                    break;
233                case 'info':
234                    $info = PdfObject\InfoObject::parse($stream);
235                    $info->setImported(true);
236                    $this->objectMap['info'] = [
237                        'stream' => $stream,
238                        'object' => $info,
239                        'index'  => $info->getIndex(),
240                    ];
241                    break;
242                case 'page':
243                    if (!isset($this->objectMap['pages'])) {
244                        $this->objectMap['pages'] = [];
245                    }
246
247                    $page = PdfObject\PageObject::parse($stream);
248                    $page->setImported(true);
249
250                    $this->objectMap['pages'][$page->getIndex()] = [
251                        'stream'   => $stream,
252                        'object'   => $page,
253                        'index'    => $page->getIndex(),
254                        'parent'   => $page->getParentIndex(),
255                        'width'    => $page->getWidth(),
256                        'height'   => $page->getHeight(),
257                        'content'  => $page->getContent(),
258                        'annots'   => $page->getAnnots(),
259                        'fonts'    => $page->getFonts(),
260                        'xObjects' => $page->getXObjects()
261                    ];
262                    break;
263                case 'stream':
264                    if (!isset($this->objectMap['streams'])) {
265                        $this->objectMap['streams'] = [];
266                    }
267                    $stream = PdfObject\StreamObject::parse($stream);
268                    $stream->setImported(true);
269                    $this->objectMap['streams'][$stream->getIndex()] = [
270                        'stream' => $stream,
271                        'object' => $stream,
272                        'index'  => $stream->getIndex()
273                    ];
274                    break;
275            }
276        }
277    }
278
279    /**
280     * Map the fonts, if any
281     *
282     * @return void
283     */
284    protected function mapFonts()
285    {
286        foreach ($this->objectMap['pages'] as $page) {
287            if (isset($page['fonts']) && (count($page['fonts']) > 0)) {
288                foreach ($page['fonts'] as $i => $font) {
289                    if (strpos($this->objectMap['streams'][$i]['stream'], '/BaseFont') !== false) {
290                        $fontName = trim(
291                            substr(
292                                $this->objectMap['streams'][$i]['stream'],
293                                (strpos($this->objectMap['streams'][$i]['stream'], '/BaseFont') + 9)
294                            )
295                        );
296
297                        if (substr($fontName, 0, 1) == '/') {
298                            $fontName = substr($fontName, 1);
299                        }
300                        $fontName = ((strpos($fontName, '/') !== false)) ?
301                            substr($fontName, 0, strpos($fontName, '/')) :
302                            substr($fontName, 0, strpos($fontName, '>'));
303
304                        $f = [
305                            'name'  => trim($fontName),
306                            'index' => $i,
307                            'ref'   => $font
308                        ];
309
310                        if (!in_array($f, $this->fonts, true)) {
311                            $this->fonts[] = $f;
312                        }
313                    }
314                }
315            }
316        }
317
318        $fontFileObjects = [];
319        foreach ($this->objectStreams as $stream) {
320            if (strpos($stream, '/FontFile') !== false) {
321                $fontFileObject = substr($stream, strpos($stream, '/FontFile'));
322                $fontFileObject = substr($fontFileObject, (strpos($fontFileObject, ' ') + 1));
323                $fontFileObject = trim(substr($fontFileObject, 0, strpos($fontFileObject, '0 R')));
324                $fontFileObjects[] = $fontFileObject;
325            }
326        }
327
328        if (!empty($fontFileObjects)) {
329            foreach ($fontFileObjects as $fontFileObject) {
330                if (($fontFileObject == 13) && isset($this->objectMap['streams'][$fontFileObject])) {
331                    $fontFile = $this->objectMap['streams'][$fontFileObject];
332                    $contents = ($fontFile['object']->getEncoding() == 'FlateDecode') ?
333                        gzuncompress(trim($fontFile['object']->getStream())) : $fontFile['object']->getStream();
334
335                    $fontParser = new \Pop\Pdf\Build\Font\TrueType(null, $contents);
336                }
337            }
338        }
339    }
340
341    /**
342     * Filter pages
343     *
344     * @param  mixed $pages
345     * @return void
346     */
347    protected function filterPages($pages)
348    {
349        $pages = (!is_array($pages)) ? [$pages] : $pages;
350        $kids = $this->objectMap['parent']['object']->getKids();
351        $keep = [];
352        foreach ($pages as $page) {
353            if (isset($kids[$page - 1])) {
354                $keep[] = $kids[$page - 1];
355            }
356        }
357
358        $this->objectMap['parent']['object']->setKids($keep);
359        $this->objectMap['parent']['count']  = count($keep);
360        $this->objectMap['parent']['kids']   = $keep;
361
362        foreach ($kids as $kid) {
363            if (!in_array($kid, $keep) && isset($this->objectMap['pages'][$kid])) {
364                unset($this->objectMap['pages'][$kid]);
365            }
366        }
367    }
368
369    /**
370     * Get the objects for import
371     *
372     * @return array
373     */
374    protected function getObjects()
375    {
376        $objects = [];
377        foreach ($this->objectMap as $type => $object) {
378            if (($type == 'root') || ($type == 'parent') || ($type == 'info')) {
379                $objects[$object['index']] = $object['object'];
380            } else if ($type == 'streams') {
381                foreach ($object as $obj) {
382                    $objects[$obj['index']] = $obj['stream'];
383                }
384            }
385        }
386
387        return $objects;
388    }
389
390}