Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
92.31% |
132 / 143 |
|
91.67% |
11 / 12 |
CRAP | |
0.00% |
0 / 1 |
Parser | |
92.31% |
132 / 143 |
|
91.67% |
11 / 12 |
63.75 | |
0.00% |
0 / 1 |
getObjectStreams | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getObjectMap | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
getFonts | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
parseFile | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parseData | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
parse | |
100.00% |
25 / 25 |
|
100.00% |
1 / 1 |
15 | |||
initFile | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
2 | |||
initData | |
100.00% |
5 / 5 |
|
100.00% |
1 / 1 |
1 | |||
mapObjects | |
100.00% |
47 / 47 |
|
100.00% |
1 / 1 |
9 | |||
mapFonts | |
64.52% |
20 / 31 |
|
0.00% |
0 / 1 |
27.44 | |||
filterPages | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
7 | |||
getObjects | |
100.00% |
8 / 8 |
|
100.00% |
1 / 1 |
7 |
1 | <?php |
2 | /** |
3 | * Pop PHP Framework (http://www.popphp.org/) |
4 | * |
5 | * @link https://github.com/popphp/popphp-framework |
6 | * @author Nick Sagona, III <dev@nolainteractive.com> |
7 | * @copyright Copyright (c) 2009-2023 NOLA Interactive, LLC. (http://www.nolainteractive.com) |
8 | * @license http://www.popphp.org/license New BSD License |
9 | */ |
10 | |
11 | /** |
12 | * @namespace |
13 | */ |
14 | namespace Pop\Pdf\Build; |
15 | |
16 | use Pop\Pdf\Document\AbstractDocument; |
17 | |
18 | /** |
19 | * Pdf parser class |
20 | * |
21 | * @category Pop |
22 | * @package Pop\Pdf |
23 | * @author Nick Sagona, III <dev@nolainteractive.com> |
24 | * @copyright Copyright (c) 2009-2023 NOLA Interactive, LLC. (http://www.nolainteractive.com) |
25 | * @license http://www.popphp.org/license New BSD License |
26 | * @version 4.2.0 |
27 | */ |
28 | class Parser extends AbstractParser |
29 | { |
30 | |
31 | /** |
32 | * Parsed object data streams |
33 | * @var array |
34 | */ |
35 | protected $objectStreams = []; |
36 | |
37 | /** |
38 | * Object map |
39 | * @var array |
40 | */ |
41 | protected $objectMap = []; |
42 | |
43 | /** |
44 | * Document fonts |
45 | * @var array |
46 | */ |
47 | protected $fonts = []; |
48 | |
49 | /** |
50 | * Get the object streams |
51 | * |
52 | * @return array |
53 | */ |
54 | public function getObjectStreams() |
55 | { |
56 | return $this->objectStreams; |
57 | } |
58 | |
59 | /** |
60 | * Get the object map |
61 | * |
62 | * @return array |
63 | */ |
64 | public function getObjectMap() |
65 | { |
66 | return $this->objectMap; |
67 | } |
68 | |
69 | /** |
70 | * Get the document fonts |
71 | * |
72 | * @return array |
73 | */ |
74 | public function getFonts() |
75 | { |
76 | return $this->fonts; |
77 | } |
78 | |
79 | /** |
80 | * Parse from file |
81 | * |
82 | * @param string $file |
83 | * @param mixed $pages |
84 | * @return AbstractDocument |
85 | */ |
86 | public function parseFile($file, $pages = null) |
87 | { |
88 | $this->initFile($file); |
89 | return $this->parse($pages); |
90 | } |
91 | |
92 | /** |
93 | * Parse from raw data stream |
94 | * |
95 | * @param string $data |
96 | * @param mixed $pages |
97 | * @return AbstractDocument |
98 | */ |
99 | public function parseData($data, $pages = null) |
100 | { |
101 | $this->initData($data); |
102 | return $this->parse($pages); |
103 | } |
104 | |
105 | /** |
106 | * Parse the data stream |
107 | * |
108 | * @param mixed $pages |
109 | * @return AbstractDocument |
110 | */ |
111 | public function parse($pages = null) |
112 | { |
113 | $matches = []; |
114 | preg_match_all('/\d*\s\d*\sobj(.*?)endobj/sm', $this->data, $matches, PREG_OFFSET_CAPTURE); |
115 | |
116 | if (isset($matches[0]) && isset($matches[0][0])) { |
117 | foreach ($matches[0] as $match) { |
118 | if ((strpos($match[0], '/Linearized') === false) && (strpos($match[0], '/Type/Metadata') === false)) { |
119 | $this->objectStreams[] = $match[0]; |
120 | } |
121 | } |
122 | } |
123 | |
124 | // Map the objects by parsing the object streams |
125 | $this->mapObjects(); |
126 | |
127 | if (isset($this->objectMap['pages'])) { |
128 | // Map fonts, if any |
129 | if (isset($this->objectMap['streams'])) { |
130 | $this->mapFonts(); |
131 | } |
132 | // If certain pages are to be imported, filter out the unwanted pages |
133 | if (null !== $pages) { |
134 | $this->filterPages($pages); |
135 | } |
136 | } |
137 | |
138 | $doc = new \Pop\Pdf\Document(); |
139 | |
140 | if (isset($this->objectMap['root']) && isset($this->objectMap['root']['object'])) { |
141 | $doc->setVersion($this->objectMap['root']['object']->getVersion()); |
142 | } |
143 | if (isset($this->objectMap['info']) && isset($this->objectMap['info']['object'])) { |
144 | $doc->setMetadata($this->objectMap['info']['object']->getMetadata()); |
145 | } |
146 | |
147 | $doc->importObjects($this->getObjects()); |
148 | $doc->importFonts($this->getFonts()); |
149 | |
150 | if (isset($this->objectMap['pages'])) { |
151 | foreach ($this->objectMap['pages'] as $i => $page) { |
152 | $pg = new \Pop\Pdf\Document\Page($page['width'], $page['height'], $i); |
153 | $pg->importPageObject($page['object']); |
154 | $doc->addPage($pg); |
155 | } |
156 | } |
157 | |
158 | return $doc; |
159 | } |
160 | |
161 | /** |
162 | * Initialize the file and get the data |
163 | * |
164 | * @param string $file |
165 | * @throws Exception |
166 | * @return Parser |
167 | */ |
168 | protected function initFile($file) |
169 | { |
170 | if (!file_exists($file)) { |
171 | throw new Exception('Error: That PDF file does not exist.'); |
172 | } |
173 | |
174 | $this->file = $file; |
175 | $this->data = file_get_contents($this->file); |
176 | |
177 | $this->objectStreams = []; |
178 | $this->objectMap = []; |
179 | $this->fonts = []; |
180 | |
181 | return $this; |
182 | } |
183 | |
184 | /** |
185 | * Initialize data |
186 | * |
187 | * @param string $data |
188 | * @throws Exception |
189 | * @return Parser |
190 | */ |
191 | protected function initData($data) |
192 | { |
193 | $this->data = $data; |
194 | |
195 | $this->objectStreams = []; |
196 | $this->objectMap = []; |
197 | $this->fonts = []; |
198 | |
199 | return $this; |
200 | } |
201 | |
202 | /** |
203 | * Map the objects |
204 | * |
205 | * @return void |
206 | */ |
207 | protected function mapObjects() |
208 | { |
209 | foreach ($this->objectStreams as $stream) { |
210 | switch ($this->getStreamType($stream)) { |
211 | case 'root': |
212 | $root = PdfObject\RootObject::parse($stream); |
213 | $root->setImported(true); |
214 | $root->setVersion(substr($this->data, 5, 3)); |
215 | $this->objectMap['root'] = [ |
216 | 'stream' => $stream, |
217 | 'object' => $root, |
218 | 'index' => $root->getIndex(), |
219 | 'parent' => $root->getParentIndex() |
220 | ]; |
221 | break; |
222 | case 'parent': |
223 | $parent = PdfObject\ParentObject::parse($stream); |
224 | $parent->setImported(true); |
225 | $this->objectMap['parent'] = [ |
226 | 'stream' => $stream, |
227 | 'object' => $parent, |
228 | 'index' => $parent->getIndex(), |
229 | 'count' => $parent->getCount(), |
230 | 'kids' => $parent->getKids() |
231 | ]; |
232 | break; |
233 | case 'info': |
234 | $info = PdfObject\InfoObject::parse($stream); |
235 | $info->setImported(true); |
236 | $this->objectMap['info'] = [ |
237 | 'stream' => $stream, |
238 | 'object' => $info, |
239 | 'index' => $info->getIndex(), |
240 | ]; |
241 | break; |
242 | case 'page': |
243 | if (!isset($this->objectMap['pages'])) { |
244 | $this->objectMap['pages'] = []; |
245 | } |
246 | |
247 | $page = PdfObject\PageObject::parse($stream); |
248 | $page->setImported(true); |
249 | |
250 | $this->objectMap['pages'][$page->getIndex()] = [ |
251 | 'stream' => $stream, |
252 | 'object' => $page, |
253 | 'index' => $page->getIndex(), |
254 | 'parent' => $page->getParentIndex(), |
255 | 'width' => $page->getWidth(), |
256 | 'height' => $page->getHeight(), |
257 | 'content' => $page->getContent(), |
258 | 'annots' => $page->getAnnots(), |
259 | 'fonts' => $page->getFonts(), |
260 | 'xObjects' => $page->getXObjects() |
261 | ]; |
262 | break; |
263 | case 'stream': |
264 | if (!isset($this->objectMap['streams'])) { |
265 | $this->objectMap['streams'] = []; |
266 | } |
267 | $stream = PdfObject\StreamObject::parse($stream); |
268 | $stream->setImported(true); |
269 | $this->objectMap['streams'][$stream->getIndex()] = [ |
270 | 'stream' => $stream, |
271 | 'object' => $stream, |
272 | 'index' => $stream->getIndex() |
273 | ]; |
274 | break; |
275 | } |
276 | } |
277 | } |
278 | |
279 | /** |
280 | * Map the fonts, if any |
281 | * |
282 | * @return void |
283 | */ |
284 | protected function mapFonts() |
285 | { |
286 | foreach ($this->objectMap['pages'] as $page) { |
287 | if (isset($page['fonts']) && (count($page['fonts']) > 0)) { |
288 | foreach ($page['fonts'] as $i => $font) { |
289 | if (strpos($this->objectMap['streams'][$i]['stream'], '/BaseFont') !== false) { |
290 | $fontName = trim( |
291 | substr( |
292 | $this->objectMap['streams'][$i]['stream'], |
293 | (strpos($this->objectMap['streams'][$i]['stream'], '/BaseFont') + 9) |
294 | ) |
295 | ); |
296 | |
297 | if (substr($fontName, 0, 1) == '/') { |
298 | $fontName = substr($fontName, 1); |
299 | } |
300 | $fontName = ((strpos($fontName, '/') !== false)) ? |
301 | substr($fontName, 0, strpos($fontName, '/')) : |
302 | substr($fontName, 0, strpos($fontName, '>')); |
303 | |
304 | $f = [ |
305 | 'name' => trim($fontName), |
306 | 'index' => $i, |
307 | 'ref' => $font |
308 | ]; |
309 | |
310 | if (!in_array($f, $this->fonts, true)) { |
311 | $this->fonts[] = $f; |
312 | } |
313 | } |
314 | } |
315 | } |
316 | } |
317 | |
318 | $fontFileObjects = []; |
319 | foreach ($this->objectStreams as $stream) { |
320 | if (strpos($stream, '/FontFile') !== false) { |
321 | $fontFileObject = substr($stream, strpos($stream, '/FontFile')); |
322 | $fontFileObject = substr($fontFileObject, (strpos($fontFileObject, ' ') + 1)); |
323 | $fontFileObject = trim(substr($fontFileObject, 0, strpos($fontFileObject, '0 R'))); |
324 | $fontFileObjects[] = $fontFileObject; |
325 | } |
326 | } |
327 | |
328 | if (!empty($fontFileObjects)) { |
329 | foreach ($fontFileObjects as $fontFileObject) { |
330 | if (($fontFileObject == 13) && isset($this->objectMap['streams'][$fontFileObject])) { |
331 | $fontFile = $this->objectMap['streams'][$fontFileObject]; |
332 | $contents = ($fontFile['object']->getEncoding() == 'FlateDecode') ? |
333 | gzuncompress(trim($fontFile['object']->getStream())) : $fontFile['object']->getStream(); |
334 | |
335 | $fontParser = new \Pop\Pdf\Build\Font\TrueType(null, $contents); |
336 | } |
337 | } |
338 | } |
339 | } |
340 | |
341 | /** |
342 | * Filter pages |
343 | * |
344 | * @param mixed $pages |
345 | * @return void |
346 | */ |
347 | protected function filterPages($pages) |
348 | { |
349 | $pages = (!is_array($pages)) ? [$pages] : $pages; |
350 | $kids = $this->objectMap['parent']['object']->getKids(); |
351 | $keep = []; |
352 | foreach ($pages as $page) { |
353 | if (isset($kids[$page - 1])) { |
354 | $keep[] = $kids[$page - 1]; |
355 | } |
356 | } |
357 | |
358 | $this->objectMap['parent']['object']->setKids($keep); |
359 | $this->objectMap['parent']['count'] = count($keep); |
360 | $this->objectMap['parent']['kids'] = $keep; |
361 | |
362 | foreach ($kids as $kid) { |
363 | if (!in_array($kid, $keep) && isset($this->objectMap['pages'][$kid])) { |
364 | unset($this->objectMap['pages'][$kid]); |
365 | } |
366 | } |
367 | } |
368 | |
369 | /** |
370 | * Get the objects for import |
371 | * |
372 | * @return array |
373 | */ |
374 | protected function getObjects() |
375 | { |
376 | $objects = []; |
377 | foreach ($this->objectMap as $type => $object) { |
378 | if (($type == 'root') || ($type == 'parent') || ($type == 'info')) { |
379 | $objects[$object['index']] = $object['object']; |
380 | } else if ($type == 'streams') { |
381 | foreach ($object as $obj) { |
382 | $objects[$obj['index']] = $obj['stream']; |
383 | } |
384 | } |
385 | } |
386 | |
387 | return $objects; |
388 | } |
389 | |
390 | } |