Commit | Line | Data |
---|---|---|
5c386472 DW |
1 | <?php\r |
2 | //\r | |
858d457d | 3 | // FPDI - Version 1.5.4\r |
5c386472 | 4 | //\r |
858d457d | 5 | // Copyright 2004-2015 Setasign - Jan Slabon\r |
5c386472 DW |
6 | //\r |
7 | // Licensed under the Apache License, Version 2.0 (the "License");\r | |
8 | // you may not use this file except in compliance with the License.\r | |
9 | // You may obtain a copy of the License at\r | |
10 | //\r | |
11 | // http://www.apache.org/licenses/LICENSE-2.0\r | |
12 | //\r | |
13 | // Unless required by applicable law or agreed to in writing, software\r | |
14 | // distributed under the License is distributed on an "AS IS" BASIS,\r | |
15 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\r | |
16 | // See the License for the specific language governing permissions and\r | |
17 | // limitations under the License.\r | |
18 | //\r | |
19 | \r | |
858d457d AG |
20 | /**\r |
21 | * Class pdf_parser\r | |
22 | */\r | |
23 | class pdf_parser\r | |
24 | {\r | |
25 | /**\r | |
26 | * Type constant\r | |
27 | *\r | |
28 | * @var integer\r | |
29 | */\r | |
30 | const TYPE_NULL = 0;\r | |
31 | \r | |
32 | /**\r | |
33 | * Type constant\r | |
34 | *\r | |
35 | * @var integer\r | |
36 | */\r | |
37 | const TYPE_NUMERIC = 1;\r | |
38 | \r | |
39 | /**\r | |
40 | * Type constant\r | |
41 | *\r | |
42 | * @var integer\r | |
43 | */\r | |
44 | const TYPE_TOKEN = 2;\r | |
45 | \r | |
46 | /**\r | |
47 | * Type constant\r | |
48 | *\r | |
49 | * @var integer\r | |
50 | */\r | |
51 | const TYPE_HEX = 3;\r | |
52 | \r | |
53 | /**\r | |
54 | * Type constant\r | |
55 | *\r | |
56 | * @var integer\r | |
57 | */\r | |
58 | const TYPE_STRING = 4;\r | |
59 | \r | |
60 | /**\r | |
61 | * Type constant\r | |
62 | *\r | |
63 | * @var integer\r | |
64 | */\r | |
65 | const TYPE_DICTIONARY = 5;\r | |
66 | \r | |
67 | /**\r | |
68 | * Type constant\r | |
69 | *\r | |
70 | * @var integer\r | |
71 | */\r | |
72 | const TYPE_ARRAY = 6;\r | |
73 | \r | |
74 | /**\r | |
75 | * Type constant\r | |
76 | *\r | |
77 | * @var integer\r | |
78 | */\r | |
79 | const TYPE_OBJDEC = 7;\r | |
80 | \r | |
81 | /**\r | |
82 | * Type constant\r | |
83 | *\r | |
84 | * @var integer\r | |
85 | */\r | |
86 | const TYPE_OBJREF = 8;\r | |
87 | \r | |
88 | /**\r | |
89 | * Type constant\r | |
90 | *\r | |
91 | * @var integer\r | |
92 | */\r | |
93 | const TYPE_OBJECT = 9;\r | |
94 | \r | |
95 | /**\r | |
96 | * Type constant\r | |
97 | *\r | |
98 | * @var integer\r | |
99 | */\r | |
100 | const TYPE_STREAM = 10;\r | |
101 | \r | |
102 | /**\r | |
103 | * Type constant\r | |
104 | *\r | |
105 | * @var integer\r | |
106 | */\r | |
107 | const TYPE_BOOLEAN = 11;\r | |
108 | \r | |
109 | /**\r | |
110 | * Type constant\r | |
111 | *\r | |
112 | * @var integer\r | |
113 | */\r | |
114 | const TYPE_REAL = 12;\r | |
115 | \r | |
116 | /**\r | |
117 | * Define the amount of byte in which the initial keyword of a PDF document should be searched.\r | |
118 | *\r | |
119 | * @var int\r | |
120 | */\r | |
121 | static public $searchForStartxrefLength = 5500;\r | |
122 | \r | |
123 | /**\r | |
124 | * Filename\r | |
125 | *\r | |
126 | * @var string\r | |
127 | */\r | |
128 | public $filename;\r | |
129 | \r | |
130 | /**\r | |
131 | * File resource\r | |
132 | *\r | |
133 | * @var resource\r | |
134 | */\r | |
135 | protected $_f;\r | |
136 | \r | |
137 | /**\r | |
138 | * PDF Context\r | |
139 | *\r | |
140 | * @var pdf_context\r | |
141 | */\r | |
142 | protected $_c;\r | |
143 | \r | |
144 | /**\r | |
145 | * xref-Data\r | |
146 | *\r | |
147 | * @var array\r | |
148 | */\r | |
149 | protected $_xref;\r | |
150 | \r | |
151 | /**\r | |
152 | * Data of the Root object\r | |
153 | *\r | |
154 | * @var array\r | |
155 | */\r | |
156 | protected $_root;\r | |
157 | \r | |
158 | /**\r | |
159 | * PDF version of the loaded document\r | |
160 | *\r | |
161 | * @var string\r | |
162 | */\r | |
163 | protected $_pdfVersion;\r | |
164 | \r | |
165 | /**\r | |
166 | * For reading encrypted documents and xref/object streams are in use\r | |
167 | *\r | |
168 | * @var boolean\r | |
169 | */\r | |
170 | protected $_readPlain = true;\r | |
171 | \r | |
172 | /**\r | |
173 | * The current read object\r | |
174 | *\r | |
175 | * @var array\r | |
176 | */\r | |
177 | protected $_currentObj;\r | |
178 | \r | |
179 | /**\r | |
180 | * Constructor\r | |
181 | *\r | |
182 | * @param string $filename Source filename\r | |
183 | * @throws InvalidArgumentException\r | |
184 | */\r | |
185 | public function __construct($filename)\r | |
186 | {\r | |
187 | $this->filename = $filename;\r | |
188 | \r | |
189 | $this->_f = @fopen($this->filename, 'rb');\r | |
190 | \r | |
191 | if (!$this->_f) {\r | |
192 | throw new InvalidArgumentException(sprintf('Cannot open %s !', $filename));\r | |
5c386472 | 193 | }\r |
858d457d AG |
194 | \r |
195 | $this->getPdfVersion();\r | |
196 | \r | |
197 | if (!class_exists('pdf_context')) {\r | |
198 | require_once('pdf_context.php');\r | |
5c386472 | 199 | }\r |
858d457d AG |
200 | $this->_c = new pdf_context($this->_f);\r |
201 | \r | |
202 | // Read xref-Data\r | |
203 | $this->_xref = array();\r | |
204 | $this->_readXref($this->_xref, $this->_findXref());\r | |
205 | \r | |
206 | // Check for Encryption\r | |
207 | $this->getEncryption();\r | |
208 | \r | |
209 | // Read root\r | |
210 | $this->_readRoot();\r | |
211 | }\r | |
212 | \r | |
213 | /**\r | |
214 | * Destructor\r | |
215 | */\r | |
216 | public function __destruct()\r | |
217 | {\r | |
218 | $this->closeFile();\r | |
219 | }\r | |
220 | \r | |
221 | /**\r | |
222 | * Close the opened file\r | |
223 | */\r | |
224 | public function closeFile()\r | |
225 | {\r | |
226 | if (isset($this->_f) && is_resource($this->_f)) {\r | |
227 | fclose($this->_f);\r | |
228 | unset($this->_f);\r | |
5c386472 | 229 | }\r |
858d457d AG |
230 | }\r |
231 | \r | |
232 | /**\r | |
233 | * Check Trailer for Encryption\r | |
234 | *\r | |
235 | * @throws Exception\r | |
236 | */\r | |
237 | public function getEncryption()\r | |
238 | {\r | |
239 | if (isset($this->_xref['trailer'][1]['/Encrypt'])) {\r | |
240 | throw new Exception('File is encrypted!');\r | |
5c386472 | 241 | }\r |
858d457d AG |
242 | }\r |
243 | \r | |
244 | /**\r | |
245 | * Get PDF-Version\r | |
246 | *\r | |
247 | * @return string\r | |
248 | */\r | |
249 | public function getPdfVersion()\r | |
250 | {\r | |
251 | if ($this->_pdfVersion === null) {\r | |
252 | fseek($this->_f, 0);\r | |
253 | preg_match('/\d\.\d/', fread($this->_f, 16), $m);\r | |
254 | if (isset($m[0]))\r | |
255 | $this->_pdfVersion = $m[0];\r | |
5c386472 | 256 | }\r |
858d457d AG |
257 | \r |
258 | return $this->_pdfVersion;\r | |
259 | }\r | |
260 | \r | |
261 | /**\r | |
262 | * Read the /Root dictionary\r | |
263 | */\r | |
264 | protected function _readRoot()\r | |
265 | {\r | |
266 | if ($this->_xref['trailer'][1]['/Root'][0] != self::TYPE_OBJREF) {\r | |
267 | throw new Exception('Wrong Type of Root-Element! Must be an indirect reference');\r | |
5c386472 | 268 | }\r |
858d457d AG |
269 | \r |
270 | $this->_root = $this->resolveObject($this->_xref['trailer'][1]['/Root']);\r | |
271 | }\r | |
272 | \r | |
273 | /**\r | |
274 | * Find the xref table\r | |
275 | *\r | |
276 | * @return integer\r | |
277 | * @throws Exception\r | |
278 | */\r | |
279 | protected function _findXref()\r | |
280 | {\r | |
281 | $toRead = self::$searchForStartxrefLength;\r | |
282 | \r | |
283 | $stat = fseek($this->_f, -$toRead, SEEK_END);\r | |
284 | if ($stat === -1) {\r | |
285 | fseek($this->_f, 0);\r | |
5c386472 | 286 | }\r |
858d457d AG |
287 | \r |
288 | $data = fread($this->_f, $toRead);\r | |
289 | \r | |
290 | $keywordPos = strpos(strrev($data), strrev('startxref'));\r | |
291 | if (false === $keywordPos) {\r | |
292 | $keywordPos = strpos(strrev($data), strrev('startref'));\r | |
293 | }\r | |
294 | \r | |
295 | if (false === $keywordPos) {\r | |
296 | throw new Exception('Unable to find "startxref" keyword.');\r | |
297 | }\r | |
298 | \r | |
299 | $pos = strlen($data) - $keywordPos;\r | |
300 | $data = substr($data, $pos);\r | |
301 | \r | |
302 | if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {\r | |
303 | throw new Exception('Unable to find pointer to xref table.');\r | |
304 | }\r | |
305 | \r | |
306 | return (int) $matches[1];\r | |
307 | }\r | |
308 | \r | |
309 | /**\r | |
310 | * Read the xref table\r | |
311 | *\r | |
312 | * @param array $result Array of xref table entries\r | |
313 | * @param integer $offset of xref table\r | |
314 | * @return boolean\r | |
315 | * @throws Exception\r | |
316 | */\r | |
317 | protected function _readXref(&$result, $offset)\r | |
318 | {\r | |
319 | $tempPos = $offset - min(20, $offset);\r | |
320 | fseek($this->_f, $tempPos); // set some bytes backwards to fetch corrupted docs\r | |
321 | \r | |
322 | $data = fread($this->_f, 100);\r | |
323 | \r | |
324 | $xrefPos = strrpos($data, 'xref');\r | |
325 | \r | |
326 | if ($xrefPos === false) {\r | |
327 | $this->_c->reset($offset);\r | |
328 | $xrefStreamObjDec = $this->_readValue($this->_c);\r | |
329 | \r | |
330 | if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == self::TYPE_OBJDEC) {\r | |
331 | throw new Exception(\r | |
332 | sprintf(\r | |
333 | 'This document (%s) probably uses a compression technique which is not supported by the ' .\r | |
334 | 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',\r | |
335 | $this->filename\r | |
336 | )\r | |
337 | );\r | |
338 | } else {\r | |
339 | throw new Exception('Unable to find xref table.');\r | |
5c386472 | 340 | }\r |
5c386472 | 341 | }\r |
858d457d AG |
342 | \r |
343 | if (!isset($result['xrefLocation'])) {\r | |
344 | $result['xrefLocation'] = $tempPos + $xrefPos;\r | |
345 | $result['maxObject'] = 0;\r | |
346 | }\r | |
347 | \r | |
348 | $cycles = -1;\r | |
349 | $bytesPerCycle = 100;\r | |
350 | \r | |
351 | fseek($this->_f, $tempPos = $tempPos + $xrefPos + 4); // set the handle directly after the "xref"-keyword\r | |
352 | $data = fread($this->_f, $bytesPerCycle);\r | |
353 | \r | |
354 | while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cycles++, 0))) === false && !feof($this->_f)) {\r | |
355 | $data .= fread($this->_f, $bytesPerCycle);\r | |
356 | }\r | |
357 | \r | |
358 | if ($trailerPos === false) {\r | |
359 | throw new Exception('Trailer keyword not found after xref table');\r | |
360 | }\r | |
361 | \r | |
362 | $data = ltrim(substr($data, 0, $trailerPos));\r | |
363 | \r | |
364 | // get Line-Ending\r | |
365 | $found = preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for line breaks\r | |
366 | if ($found === 0) {\r | |
367 | throw new Exception('Xref table seems to be corrupted.');\r | |
368 | }\r | |
369 | $differentLineEndings = count(array_unique($m[0]));\r | |
370 | if ($differentLineEndings > 1) {\r | |
371 | $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);\r | |
372 | } else {\r | |
373 | $lines = explode($m[0][0], $data);\r | |
374 | }\r | |
375 | \r | |
376 | $data = $differentLineEndings = $m = null;\r | |
377 | unset($data, $differentLineEndings, $m);\r | |
378 | \r | |
379 | $linesCount = count($lines);\r | |
380 | \r | |
381 | $start = 1;\r | |
382 | \r | |
383 | for ($i = 0; $i < $linesCount; $i++) {\r | |
384 | $line = trim($lines[$i]);\r | |
385 | if ($line) {\r | |
386 | $pieces = explode(' ', $line);\r | |
387 | $c = count($pieces);\r | |
388 | switch($c) {\r | |
389 | case 2:\r | |
390 | $start = (int)$pieces[0];\r | |
391 | $end = $start + (int)$pieces[1];\r | |
392 | if ($end > $result['maxObject'])\r | |
393 | $result['maxObject'] = $end;\r | |
394 | break;\r | |
395 | case 3:\r | |
396 | if (!isset($result['xref'][$start]))\r | |
397 | $result['xref'][$start] = array();\r | |
398 | \r | |
399 | if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {\r | |
400 | $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;\r | |
401 | }\r | |
402 | $start++;\r | |
403 | break;\r | |
404 | default:\r | |
405 | throw new Exception('Unexpected data in xref table');\r | |
5c386472 DW |
406 | }\r |
407 | }\r | |
858d457d AG |
408 | }\r |
409 | \r | |
410 | $lines = $pieces = $line = $start = $end = $gen = null;\r | |
411 | unset($lines, $pieces, $line, $start, $end, $gen);\r | |
412 | \r | |
413 | $this->_c->reset($tempPos + $trailerPos + 7);\r | |
414 | $trailer = $this->_readValue($this->_c);\r | |
415 | \r | |
416 | if (!isset($result['trailer'])) {\r | |
417 | $result['trailer'] = $trailer;\r | |
418 | }\r | |
419 | \r | |
420 | if (isset($trailer[1]['/Prev'])) {\r | |
421 | $this->_readXref($result, $trailer[1]['/Prev'][1]);\r | |
422 | }\r | |
423 | \r | |
424 | $trailer = null;\r | |
425 | unset($trailer);\r | |
426 | \r | |
427 | return true;\r | |
428 | }\r | |
429 | \r | |
430 | /**\r | |
431 | * Reads a PDF value\r | |
432 | *\r | |
433 | * @param pdf_context $c\r | |
434 | * @param string $token A token\r | |
435 | * @return mixed\r | |
436 | * @throws Exception\r | |
437 | */\r | |
438 | protected function _readValue(&$c, $token = null)\r | |
439 | {\r | |
440 | if (is_null($token)) {\r | |
441 | $token = $this->_readToken($c);\r | |
442 | }\r | |
443 | \r | |
444 | if ($token === false) {\r | |
445 | return false;\r | |
446 | }\r | |
447 | \r | |
448 | switch ($token) {\r | |
449 | case '<':\r | |
450 | // This is a hex string.\r | |
451 | // Read the value, then the terminator\r | |
452 | \r | |
453 | $pos = $c->offset;\r | |
454 | \r | |
455 | while(1) {\r | |
456 | \r | |
457 | $match = strpos($c->buffer, '>', $pos);\r | |
458 | \r | |
459 | // If you can't find it, try\r | |
460 | // reading more data from the stream\r | |
461 | \r | |
462 | if ($match === false) {\r | |
463 | if (!$c->increaseLength()) {\r | |
464 | return false;\r | |
465 | } else {\r | |
466 | continue;\r | |
467 | }\r | |
5c386472 | 468 | }\r |
858d457d AG |
469 | \r |
470 | $result = substr($c->buffer, $c->offset, $match - $c->offset);\r | |
471 | $c->offset = $match + 1;\r | |
472 | \r | |
473 | return array (self::TYPE_HEX, $result);\r | |
5c386472 | 474 | }\r |
858d457d AG |
475 | break;\r |
476 | \r | |
477 | case '<<':\r | |
478 | // This is a dictionary.\r | |
479 | \r | |
480 | $result = array();\r | |
481 | \r | |
482 | // Recurse into this function until we reach\r | |
483 | // the end of the dictionary.\r | |
484 | while (($key = $this->_readToken($c)) !== '>>') {\r | |
485 | if ($key === false) {\r | |
486 | return false;\r | |
487 | }\r | |
488 | \r | |
489 | if (($value = $this->_readValue($c)) === false) {\r | |
490 | return false;\r | |
491 | }\r | |
492 | \r | |
493 | // Catch missing value\r | |
494 | if ($value[0] == self::TYPE_TOKEN && $value[1] == '>>') {\r | |
495 | $result[$key] = array(self::TYPE_NULL);\r | |
496 | break;\r | |
497 | }\r | |
498 | \r | |
499 | $result[$key] = $value;\r | |
500 | }\r | |
501 | \r | |
502 | return array (self::TYPE_DICTIONARY, $result);\r | |
503 | \r | |
504 | case '[':\r | |
505 | // This is an array.\r | |
506 | \r | |
507 | $result = array();\r | |
508 | \r | |
509 | // Recurse into this function until we reach\r | |
510 | // the end of the array.\r | |
511 | while (($token = $this->_readToken($c)) !== ']') {\r | |
512 | if ($token === false) {\r | |
513 | return false;\r | |
514 | }\r | |
515 | \r | |
516 | if (($value = $this->_readValue($c, $token)) === false) {\r | |
517 | return false;\r | |
518 | }\r | |
519 | \r | |
520 | $result[] = $value;\r | |
521 | }\r | |
522 | \r | |
523 | return array (self::TYPE_ARRAY, $result);\r | |
524 | \r | |
525 | case '(':\r | |
526 | // This is a string\r | |
527 | $pos = $c->offset;\r | |
528 | \r | |
529 | $openBrackets = 1;\r | |
530 | do {\r | |
531 | for (; $openBrackets != 0 && $pos < $c->length; $pos++) {\r | |
532 | switch (ord($c->buffer[$pos])) {\r | |
533 | case 0x28: // '('\r | |
534 | $openBrackets++;\r | |
535 | break;\r | |
536 | case 0x29: // ')'\r | |
537 | $openBrackets--;\r | |
538 | break;\r | |
539 | case 0x5C: // backslash\r | |
540 | $pos++;\r | |
541 | }\r | |
542 | }\r | |
543 | } while($openBrackets != 0 && $c->increaseLength());\r | |
544 | \r | |
545 | $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);\r | |
546 | $c->offset = $pos;\r | |
547 | \r | |
548 | return array (self::TYPE_STRING, $result);\r | |
549 | \r | |
550 | case 'stream':\r | |
551 | $tempPos = $c->getPos() - strlen($c->buffer);\r | |
552 | $tempOffset = $c->offset;\r | |
553 | \r | |
554 | $c->reset($startPos = $tempPos + $tempOffset);\r | |
555 | \r | |
556 | // Find the first "newline"\r | |
557 | while ($c->buffer[0] !== chr(10) && $c->buffer[0] !== chr(13)) {\r | |
558 | $c->reset(++$startPos);\r | |
559 | if ($c->ensureContent() === false) {\r | |
560 | throw new Exception(\r | |
561 | 'Unable to parse stream data. No newline followed the stream keyword.'\r | |
562 | );\r | |
563 | }\r | |
564 | }\r | |
565 | \r | |
566 | $e = 0; // ensure line breaks in front of the stream\r | |
567 | if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))\r | |
568 | $e++;\r | |
569 | if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))\r | |
570 | $e++;\r | |
571 | \r | |
572 | if ($this->_currentObj[1][1]['/Length'][0] == self::TYPE_OBJREF) {\r | |
573 | $tmpLength = $this->resolveObject($this->_currentObj[1][1]['/Length']);\r | |
574 | $length = $tmpLength[1][1];\r | |
575 | } else {\r | |
576 | $length = $this->_currentObj[1][1]['/Length'][1];\r | |
577 | }\r | |
578 | \r | |
579 | if ($length > 0) {\r | |
580 | $c->reset($startPos + $e, $length);\r | |
581 | $v = $c->buffer;\r | |
582 | } else {\r | |
583 | $v = '';\r | |
584 | }\r | |
585 | \r | |
586 | $c->reset($startPos + $e + $length);\r | |
587 | $endstream = $this->_readToken($c);\r | |
588 | \r | |
589 | if ($endstream != 'endstream') {\r | |
590 | $c->reset($startPos + $e + $length + 9); // 9 = strlen("endstream")\r | |
591 | // We don't throw an error here because the next\r | |
592 | // round trip will start at a new offset\r | |
593 | }\r | |
594 | \r | |
595 | return array(self::TYPE_STREAM, $v);\r | |
596 | \r | |
597 | default :\r | |
598 | if (is_numeric($token)) {\r | |
599 | // A numeric token. Make sure that\r | |
600 | // it is not part of something else.\r | |
601 | if (($tok2 = $this->_readToken($c)) !== false) {\r | |
602 | if (is_numeric($tok2)) {\r | |
603 | \r | |
604 | // Two numeric tokens in a row.\r | |
605 | // In this case, we're probably in\r | |
606 | // front of either an object reference\r | |
607 | // or an object specification.\r | |
608 | // Determine the case and return the data\r | |
609 | if (($tok3 = $this->_readToken($c)) !== false) {\r | |
610 | switch ($tok3) {\r | |
611 | case 'obj':\r | |
612 | return array(self::TYPE_OBJDEC, (int)$token, (int)$tok2);\r | |
613 | case 'R':\r | |
614 | return array(self::TYPE_OBJREF, (int)$token, (int)$tok2);\r | |
615 | }\r | |
616 | // If we get to this point, that numeric value up\r | |
617 | // there was just a numeric value. Push the extra\r | |
618 | // tokens back into the stack and return the value.\r | |
619 | array_push($c->stack, $tok3);\r | |
620 | }\r | |
621 | }\r | |
622 | \r | |
623 | array_push($c->stack, $tok2);\r | |
624 | }\r | |
625 | \r | |
626 | if ($token === (string)((int)$token))\r | |
627 | return array(self::TYPE_NUMERIC, (int)$token);\r | |
628 | else\r | |
629 | return array(self::TYPE_REAL, (float)$token);\r | |
630 | } else if ($token == 'true' || $token == 'false') {\r | |
631 | return array(self::TYPE_BOOLEAN, $token == 'true');\r | |
632 | } else if ($token == 'null') {\r | |
633 | return array(self::TYPE_NULL);\r | |
634 | } else {\r | |
635 | // Just a token. Return it.\r | |
636 | return array(self::TYPE_TOKEN, $token);\r | |
637 | }\r | |
638 | }\r | |
639 | }\r | |
640 | \r | |
641 | /**\r | |
642 | * Resolve an object\r | |
643 | *\r | |
644 | * @param array $objSpec The object-data\r | |
645 | * @return array|boolean\r | |
646 | * @throws Exception\r | |
647 | */\r | |
648 | public function resolveObject($objSpec)\r | |
649 | {\r | |
650 | $c = $this->_c;\r | |
651 | \r | |
652 | // Exit if we get invalid data\r | |
653 | if (!is_array($objSpec)) {\r | |
654 | return false;\r | |
655 | }\r | |
656 | \r | |
657 | if ($objSpec[0] == self::TYPE_OBJREF) {\r | |
658 | \r | |
659 | // This is a reference, resolve it\r | |
660 | if (isset($this->_xref['xref'][$objSpec[1]][$objSpec[2]])) {\r | |
661 | \r | |
662 | // Save current file position\r | |
663 | // This is needed if you want to resolve\r | |
664 | // references while you're reading another object\r | |
665 | // (e.g.: if you need to determine the length\r | |
666 | // of a stream)\r | |
667 | \r | |
668 | $oldPos = $c->getPos();\r | |
669 | \r | |
670 | // Reposition the file pointer and\r | |
671 | // load the object header.\r | |
672 | \r | |
673 | $c->reset($this->_xref['xref'][$objSpec[1]][$objSpec[2]]);\r | |
674 | \r | |
675 | $header = $this->_readValue($c);\r | |
676 | \r | |
677 | if ($header[0] != self::TYPE_OBJDEC || $header[1] != $objSpec[1] || $header[2] != $objSpec[2]) {\r | |
678 | $toSearchFor = $objSpec[1] . ' ' . $objSpec[2] . ' obj';\r | |
679 | if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {\r | |
680 | $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);\r | |
681 | // reset stack\r | |
682 | $c->stack = array();\r | |
683 | } else {\r | |
684 | throw new Exception(\r | |
685 | sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])\r | |
686 | );\r | |
687 | }\r | |
688 | }\r | |
689 | \r | |
690 | // If we're being asked to store all the information\r | |
691 | // about the object, we add the object ID and generation\r | |
692 | // number for later use\r | |
693 | $result = array (\r | |
694 | self::TYPE_OBJECT,\r | |
695 | 'obj' => $objSpec[1],\r | |
696 | 'gen' => $objSpec[2]\r | |
697 | );\r | |
698 | \r | |
699 | $this->_currentObj =& $result;\r | |
700 | \r | |
701 | // Now simply read the object data until\r | |
702 | // we encounter an end-of-object marker\r | |
703 | while (true) {\r | |
704 | $value = $this->_readValue($c);\r | |
705 | if ($value === false || count($result) > 4) {\r | |
706 | // in this case the parser couldn't find an "endobj" so we break here\r | |
707 | break;\r | |
708 | }\r | |
709 | \r | |
710 | if ($value[0] == self::TYPE_TOKEN && $value[1] === 'endobj') {\r | |
711 | break;\r | |
712 | }\r | |
713 | \r | |
714 | $result[] = $value;\r | |
715 | }\r | |
716 | \r | |
717 | $c->reset($oldPos);\r | |
718 | \r | |
719 | if (isset($result[2][0]) && $result[2][0] == self::TYPE_STREAM) {\r | |
720 | $result[0] = self::TYPE_STREAM;\r | |
721 | }\r | |
722 | \r | |
723 | } else {\r | |
724 | throw new Exception(\r | |
725 | sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])\r | |
726 | );\r | |
5c386472 | 727 | }\r |
858d457d AG |
728 | \r |
729 | return $result;\r | |
730 | } else {\r | |
731 | return $objSpec;\r | |
5c386472 | 732 | }\r |
858d457d AG |
733 | }\r |
734 | \r | |
735 | /**\r | |
736 | * Reads a token from the context\r | |
737 | *\r | |
738 | * @param pdf_context $c\r | |
739 | * @return mixed\r | |
740 | */\r | |
741 | protected function _readToken($c)\r | |
742 | {\r | |
743 | // If there is a token available\r | |
744 | // on the stack, pop it out and\r | |
745 | // return it.\r | |
746 | \r | |
747 | if (count($c->stack)) {\r | |
748 | return array_pop($c->stack);\r | |
749 | }\r | |
750 | \r | |
751 | // Strip away any whitespace\r | |
752 | \r | |
753 | do {\r | |
754 | if (!$c->ensureContent()) {\r | |
755 | return false;\r | |
756 | }\r | |
757 | $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);\r | |
758 | } while ($c->offset >= $c->length - 1);\r | |
759 | \r | |
760 | // Get the first character in the stream\r | |
761 | \r | |
762 | $char = $c->buffer[$c->offset++];\r | |
763 | \r | |
764 | switch ($char) {\r | |
765 | \r | |
766 | case '[':\r | |
767 | case ']':\r | |
768 | case '(':\r | |
769 | case ')':\r | |
770 | \r | |
771 | // This is either an array or literal string\r | |
772 | // delimiter, Return it\r | |
773 | \r | |
774 | return $char;\r | |
775 | \r | |
776 | case '<':\r | |
777 | case '>':\r | |
778 | \r | |
779 | // This could either be a hex string or\r | |
780 | // dictionary delimiter. Determine the\r | |
781 | // appropriate case and return the token\r | |
782 | \r | |
783 | if ($c->buffer[$c->offset] == $char) {\r | |
784 | if (!$c->ensureContent()) {\r | |
785 | return false;\r | |
5c386472 | 786 | }\r |
858d457d AG |
787 | $c->offset++;\r |
788 | return $char . $char;\r | |
789 | } else {\r | |
790 | return $char;\r | |
791 | }\r | |
792 | \r | |
793 | case '%':\r | |
794 | \r | |
795 | // This is a comment - jump over it!\r | |
796 | \r | |
797 | $pos = $c->offset;\r | |
798 | while(1) {\r | |
799 | $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);\r | |
800 | if ($match === 0) {\r | |
801 | if (!$c->increaseLength()) {\r | |
5c386472 | 802 | return false;\r |
858d457d AG |
803 | } else {\r |
804 | continue;\r | |
5c386472 | 805 | }\r |
5c386472 | 806 | }\r |
858d457d AG |
807 | \r |
808 | $c->offset = $m[0][1] + strlen($m[0][0]);\r | |
809 | \r | |
810 | return $this->_readToken($c);\r | |
811 | }\r | |
812 | \r | |
813 | default:\r | |
814 | \r | |
815 | // This is "another" type of token (probably\r | |
816 | // a dictionary entry or a numeric value)\r | |
817 | // Find the end and return it.\r | |
818 | \r | |
819 | if (!$c->ensureContent()) {\r | |
820 | return false;\r | |
821 | }\r | |
822 | \r | |
823 | while(1) {\r | |
824 | \r | |
825 | // Determine the length of the token\r | |
826 | \r | |
827 | $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);\r | |
828 | \r | |
829 | if ($c->offset + $pos <= $c->length - 1) {\r | |
830 | break;\r | |
831 | } else {\r | |
832 | // If the script reaches this point,\r | |
833 | // the token may span beyond the end\r | |
834 | // of the current buffer. Therefore,\r | |
835 | // we increase the size of the buffer\r | |
836 | // and try again--just to be safe.\r | |
837 | \r | |
838 | $c->increaseLength();\r | |
839 | }\r | |
840 | }\r | |
841 | \r | |
842 | $result = substr($c->buffer, $c->offset - 1, $pos + 1);\r | |
843 | \r | |
844 | $c->offset += $pos;\r | |
845 | \r | |
846 | return $result;\r | |
847 | }\r | |
848 | }\r | |
849 | \r | |
850 | /**\r | |
851 | * Un-filter a stream object\r | |
852 | *\r | |
853 | * @param array $obj\r | |
854 | * @return string\r | |
855 | * @throws Exception\r | |
856 | */\r | |
857 | protected function _unFilterStream($obj)\r | |
858 | {\r | |
859 | $filters = array();\r | |
860 | \r | |
861 | if (isset($obj[1][1]['/Filter'])) {\r | |
862 | $filter = $obj[1][1]['/Filter'];\r | |
863 | \r | |
864 | if ($filter[0] == pdf_parser::TYPE_OBJREF) {\r | |
865 | $tmpFilter = $this->resolveObject($filter);\r | |
866 | $filter = $tmpFilter[1];\r | |
867 | }\r | |
868 | \r | |
869 | if ($filter[0] == pdf_parser::TYPE_TOKEN) {\r | |
870 | $filters[] = $filter;\r | |
871 | } else if ($filter[0] == pdf_parser::TYPE_ARRAY) {\r | |
872 | $filters = $filter[1];\r | |
873 | }\r | |
5c386472 | 874 | }\r |
858d457d AG |
875 | \r |
876 | $stream = $obj[2][1];\r | |
877 | \r | |
878 | foreach ($filters AS $filter) {\r | |
879 | switch ($filter[1]) {\r | |
880 | case '/FlateDecode':\r | |
881 | case '/Fl':\r | |
882 | if (function_exists('gzuncompress')) {\r | |
883 | $oStream = $stream;\r | |
884 | $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';\r | |
885 | } else {\r | |
886 | throw new Exception(\r | |
887 | sprintf('To handle %s filter, please compile php with zlib support.', $filter[1])\r | |
888 | );\r | |
5c386472 | 889 | }\r |
858d457d AG |
890 | \r |
891 | if ($stream === false) {\r | |
892 | $tries = 0;\r | |
893 | while ($tries < 8 && ($stream === false || strlen($stream) < strlen($oStream))) {\r | |
894 | $oStream = substr($oStream, 1);\r | |
895 | $stream = @gzinflate($oStream);\r | |
896 | $tries++;\r | |
897 | }\r | |
898 | \r | |
899 | if ($stream === false) {\r | |
900 | throw new Exception('Error while decompressing stream.');\r | |
901 | }\r | |
902 | }\r | |
903 | break;\r | |
904 | case '/LZWDecode':\r | |
905 | if (!class_exists('FilterLZW')) {\r | |
906 | require_once('filters/FilterLZW.php');\r | |
907 | }\r | |
908 | $decoder = new FilterLZW();\r | |
909 | $stream = $decoder->decode($stream);\r | |
910 | break;\r | |
911 | case '/ASCII85Decode':\r | |
912 | if (!class_exists('FilterASCII85')) {\r | |
913 | require_once('filters/FilterASCII85.php');\r | |
914 | }\r | |
915 | $decoder = new FilterASCII85();\r | |
916 | $stream = $decoder->decode($stream);\r | |
917 | break;\r | |
918 | case '/ASCIIHexDecode':\r | |
919 | if (!class_exists('FilterASCIIHexDecode')) {\r | |
920 | require_once('filters/FilterASCIIHexDecode.php');\r | |
921 | }\r | |
922 | $decoder = new FilterASCIIHexDecode();\r | |
923 | $stream = $decoder->decode($stream);\r | |
924 | break;\r | |
925 | case null:\r | |
926 | break;\r | |
927 | default:\r | |
928 | throw new Exception(sprintf('Unsupported Filter: %s', $filter[1]));\r | |
929 | }\r | |
5c386472 | 930 | }\r |
858d457d AG |
931 | \r |
932 | return $stream;\r | |
5c386472 | 933 | }\r |
858d457d | 934 | } |