3 // FPDI - Version 1.5.4
\r
5 // Copyright 2004-2015 Setasign - Jan Slabon
\r
7 // Licensed under the Apache License, Version 2.0 (the "License");
\r
8 // you may not use this file except in compliance with the License.
\r
9 // You may obtain a copy of the License at
\r
11 // http://www.apache.org/licenses/LICENSE-2.0
\r
13 // Unless required by applicable law or agreed to in writing, software
\r
14 // distributed under the License is distributed on an "AS IS" BASIS,
\r
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
\r
16 // See the License for the specific language governing permissions and
\r
17 // limitations under the License.
\r
30 const TYPE_NULL = 0;
\r
37 const TYPE_NUMERIC = 1;
\r
44 const TYPE_TOKEN = 2;
\r
58 const TYPE_STRING = 4;
\r
65 const TYPE_DICTIONARY = 5;
\r
72 const TYPE_ARRAY = 6;
\r
79 const TYPE_OBJDEC = 7;
\r
86 const TYPE_OBJREF = 8;
\r
93 const TYPE_OBJECT = 9;
\r
100 const TYPE_STREAM = 10;
\r
107 const TYPE_BOOLEAN = 11;
\r
114 const TYPE_REAL = 12;
\r
117 * Define the amount of byte in which the initial keyword of a PDF document should be searched.
\r
121 static public $searchForStartxrefLength = 5500;
\r
152 * Data of the Root object
\r
159 * PDF version of the loaded document
\r
163 protected $_pdfVersion;
\r
166 * For reading encrypted documents and xref/object streams are in use
\r
170 protected $_readPlain = true;
\r
173 * The current read object
\r
177 protected $_currentObj;
\r
182 * @param string $filename Source filename
\r
183 * @throws InvalidArgumentException
\r
185 public function __construct($filename)
\r
187 $this->filename = $filename;
\r
189 $this->_f = @fopen($this->filename, 'rb');
\r
192 throw new InvalidArgumentException(sprintf('Cannot open %s !', $filename));
\r
195 $this->getPdfVersion();
\r
197 if (!class_exists('pdf_context')) {
\r
198 require_once('pdf_context.php');
\r
200 $this->_c = new pdf_context($this->_f);
\r
203 $this->_xref = array();
\r
204 $this->_readXref($this->_xref, $this->_findXref());
\r
206 // Check for Encryption
\r
207 $this->getEncryption();
\r
210 $this->_readRoot();
\r
216 public function __destruct()
\r
218 $this->closeFile();
\r
222 * Close the opened file
\r
224 public function closeFile()
\r
226 if (isset($this->_f) && is_resource($this->_f)) {
\r
233 * Check Trailer for Encryption
\r
235 * @throws Exception
\r
237 public function getEncryption()
\r
239 if (isset($this->_xref['trailer'][1]['/Encrypt'])) {
\r
240 throw new Exception('File is encrypted!');
\r
249 public function getPdfVersion()
\r
251 if ($this->_pdfVersion === null) {
\r
252 fseek($this->_f, 0);
\r
253 preg_match('/\d\.\d/', fread($this->_f, 16), $m);
\r
255 $this->_pdfVersion = $m[0];
\r
258 return $this->_pdfVersion;
\r
262 * Read the /Root dictionary
\r
264 protected function _readRoot()
\r
266 if ($this->_xref['trailer'][1]['/Root'][0] != self::TYPE_OBJREF) {
\r
267 throw new Exception('Wrong Type of Root-Element! Must be an indirect reference');
\r
270 $this->_root = $this->resolveObject($this->_xref['trailer'][1]['/Root']);
\r
274 * Find the xref table
\r
277 * @throws Exception
\r
279 protected function _findXref()
\r
281 $toRead = self::$searchForStartxrefLength;
\r
283 $stat = fseek($this->_f, -$toRead, SEEK_END);
\r
284 if ($stat === -1) {
\r
285 fseek($this->_f, 0);
\r
288 $data = fread($this->_f, $toRead);
\r
290 $keywordPos = strpos(strrev($data), strrev('startxref'));
\r
291 if (false === $keywordPos) {
\r
292 $keywordPos = strpos(strrev($data), strrev('startref'));
\r
295 if (false === $keywordPos) {
\r
296 throw new Exception('Unable to find "startxref" keyword.');
\r
299 $pos = strlen($data) - $keywordPos;
\r
300 $data = substr($data, $pos);
\r
302 if (!preg_match('/\s*(\d+).*$/s', $data, $matches)) {
\r
303 throw new Exception('Unable to find pointer to xref table.');
\r
306 return (int) $matches[1];
\r
310 * Read the xref table
\r
312 * @param array $result Array of xref table entries
\r
313 * @param integer $offset of xref table
\r
315 * @throws Exception
\r
317 protected function _readXref(&$result, $offset)
\r
319 $tempPos = $offset - min(20, $offset);
\r
320 fseek($this->_f, $tempPos); // set some bytes backwards to fetch corrupted docs
\r
322 $data = fread($this->_f, 100);
\r
324 $xrefPos = strrpos($data, 'xref');
\r
326 if ($xrefPos === false) {
\r
327 $this->_c->reset($offset);
\r
328 $xrefStreamObjDec = $this->_readValue($this->_c);
\r
330 if (is_array($xrefStreamObjDec) && isset($xrefStreamObjDec[0]) && $xrefStreamObjDec[0] == self::TYPE_OBJDEC) {
\r
331 throw new Exception(
\r
333 'This document (%s) probably uses a compression technique which is not supported by the ' .
\r
334 'free parser shipped with FPDI. (See https://www.setasign.com/fpdi-pdf-parser for more details)',
\r
339 throw new Exception('Unable to find xref table.');
\r
343 if (!isset($result['xrefLocation'])) {
\r
344 $result['xrefLocation'] = $tempPos + $xrefPos;
\r
345 $result['maxObject'] = 0;
\r
349 $bytesPerCycle = 100;
\r
351 fseek($this->_f, $tempPos = $tempPos + $xrefPos + 4); // set the handle directly after the "xref"-keyword
\r
352 $data = fread($this->_f, $bytesPerCycle);
\r
354 while (($trailerPos = strpos($data, 'trailer', max($bytesPerCycle * $cycles++, 0))) === false && !feof($this->_f)) {
\r
355 $data .= fread($this->_f, $bytesPerCycle);
\r
358 if ($trailerPos === false) {
\r
359 throw new Exception('Trailer keyword not found after xref table');
\r
362 $data = ltrim(substr($data, 0, $trailerPos));
\r
365 $found = preg_match_all("/(\r\n|\n|\r)/", substr($data, 0, 100), $m); // check the first 100 bytes for line breaks
\r
366 if ($found === 0) {
\r
367 throw new Exception('Xref table seems to be corrupted.');
\r
369 $differentLineEndings = count(array_unique($m[0]));
\r
370 if ($differentLineEndings > 1) {
\r
371 $lines = preg_split("/(\r\n|\n|\r)/", $data, -1, PREG_SPLIT_NO_EMPTY);
\r
373 $lines = explode($m[0][0], $data);
\r
376 $data = $differentLineEndings = $m = null;
\r
377 unset($data, $differentLineEndings, $m);
\r
379 $linesCount = count($lines);
\r
383 for ($i = 0; $i < $linesCount; $i++) {
\r
384 $line = trim($lines[$i]);
\r
386 $pieces = explode(' ', $line);
\r
387 $c = count($pieces);
\r
390 $start = (int)$pieces[0];
\r
391 $end = $start + (int)$pieces[1];
\r
392 if ($end > $result['maxObject'])
\r
393 $result['maxObject'] = $end;
\r
396 if (!isset($result['xref'][$start]))
\r
397 $result['xref'][$start] = array();
\r
399 if (!array_key_exists($gen = (int) $pieces[1], $result['xref'][$start])) {
\r
400 $result['xref'][$start][$gen] = $pieces[2] == 'n' ? (int) $pieces[0] : null;
\r
405 throw new Exception('Unexpected data in xref table');
\r
410 $lines = $pieces = $line = $start = $end = $gen = null;
\r
411 unset($lines, $pieces, $line, $start, $end, $gen);
\r
413 $this->_c->reset($tempPos + $trailerPos + 7);
\r
414 $trailer = $this->_readValue($this->_c);
\r
416 if (!isset($result['trailer'])) {
\r
417 $result['trailer'] = $trailer;
\r
420 if (isset($trailer[1]['/Prev'])) {
\r
421 $this->_readXref($result, $trailer[1]['/Prev'][1]);
\r
431 * Reads a PDF value
\r
433 * @param pdf_context $c
\r
434 * @param string $token A token
\r
436 * @throws Exception
\r
438 protected function _readValue(&$c, $token = null)
\r
440 if (is_null($token)) {
\r
441 $token = $this->_readToken($c);
\r
444 if ($token === false) {
\r
450 // This is a hex string.
\r
451 // Read the value, then the terminator
\r
457 $match = strpos($c->buffer, '>', $pos);
\r
459 // If you can't find it, try
\r
460 // reading more data from the stream
\r
462 if ($match === false) {
\r
463 if (!$c->increaseLength()) {
\r
470 $result = substr($c->buffer, $c->offset, $match - $c->offset);
\r
471 $c->offset = $match + 1;
\r
473 return array (self::TYPE_HEX, $result);
\r
478 // This is a dictionary.
\r
482 // Recurse into this function until we reach
\r
483 // the end of the dictionary.
\r
484 while (($key = $this->_readToken($c)) !== '>>') {
\r
485 if ($key === false) {
\r
489 if (($value = $this->_readValue($c)) === false) {
\r
493 // Catch missing value
\r
494 if ($value[0] == self::TYPE_TOKEN && $value[1] == '>>') {
\r
495 $result[$key] = array(self::TYPE_NULL);
\r
499 $result[$key] = $value;
\r
502 return array (self::TYPE_DICTIONARY, $result);
\r
505 // This is an array.
\r
509 // Recurse into this function until we reach
\r
510 // the end of the array.
\r
511 while (($token = $this->_readToken($c)) !== ']') {
\r
512 if ($token === false) {
\r
516 if (($value = $this->_readValue($c, $token)) === false) {
\r
520 $result[] = $value;
\r
523 return array (self::TYPE_ARRAY, $result);
\r
526 // This is a string
\r
531 for (; $openBrackets != 0 && $pos < $c->length; $pos++) {
\r
532 switch (ord($c->buffer[$pos])) {
\r
539 case 0x5C: // backslash
\r
543 } while($openBrackets != 0 && $c->increaseLength());
\r
545 $result = substr($c->buffer, $c->offset, $pos - $c->offset - 1);
\r
548 return array (self::TYPE_STRING, $result);
\r
551 $tempPos = $c->getPos() - strlen($c->buffer);
\r
552 $tempOffset = $c->offset;
\r
554 $c->reset($startPos = $tempPos + $tempOffset);
\r
556 // Find the first "newline"
\r
557 while ($c->buffer[0] !== chr(10) && $c->buffer[0] !== chr(13)) {
\r
558 $c->reset(++$startPos);
\r
559 if ($c->ensureContent() === false) {
\r
560 throw new Exception(
\r
561 'Unable to parse stream data. No newline followed the stream keyword.'
\r
566 $e = 0; // ensure line breaks in front of the stream
\r
567 if ($c->buffer[0] == chr(10) || $c->buffer[0] == chr(13))
\r
569 if ($c->buffer[1] == chr(10) && $c->buffer[0] != chr(10))
\r
572 if ($this->_currentObj[1][1]['/Length'][0] == self::TYPE_OBJREF) {
\r
573 $tmpLength = $this->resolveObject($this->_currentObj[1][1]['/Length']);
\r
574 $length = $tmpLength[1][1];
\r
576 $length = $this->_currentObj[1][1]['/Length'][1];
\r
580 $c->reset($startPos + $e, $length);
\r
586 $c->reset($startPos + $e + $length);
\r
587 $endstream = $this->_readToken($c);
\r
589 if ($endstream != 'endstream') {
\r
590 $c->reset($startPos + $e + $length + 9); // 9 = strlen("endstream")
\r
591 // We don't throw an error here because the next
\r
592 // round trip will start at a new offset
\r
595 return array(self::TYPE_STREAM, $v);
\r
598 if (is_numeric($token)) {
\r
599 // A numeric token. Make sure that
\r
600 // it is not part of something else.
\r
601 if (($tok2 = $this->_readToken($c)) !== false) {
\r
602 if (is_numeric($tok2)) {
\r
604 // Two numeric tokens in a row.
\r
605 // In this case, we're probably in
\r
606 // front of either an object reference
\r
607 // or an object specification.
\r
608 // Determine the case and return the data
\r
609 if (($tok3 = $this->_readToken($c)) !== false) {
\r
612 return array(self::TYPE_OBJDEC, (int)$token, (int)$tok2);
\r
614 return array(self::TYPE_OBJREF, (int)$token, (int)$tok2);
\r
616 // If we get to this point, that numeric value up
\r
617 // there was just a numeric value. Push the extra
\r
618 // tokens back into the stack and return the value.
\r
619 array_push($c->stack, $tok3);
\r
623 array_push($c->stack, $tok2);
\r
626 if ($token === (string)((int)$token))
\r
627 return array(self::TYPE_NUMERIC, (int)$token);
\r
629 return array(self::TYPE_REAL, (float)$token);
\r
630 } else if ($token == 'true' || $token == 'false') {
\r
631 return array(self::TYPE_BOOLEAN, $token == 'true');
\r
632 } else if ($token == 'null') {
\r
633 return array(self::TYPE_NULL);
\r
635 // Just a token. Return it.
\r
636 return array(self::TYPE_TOKEN, $token);
\r
642 * Resolve an object
\r
644 * @param array $objSpec The object-data
\r
645 * @return array|boolean
\r
646 * @throws Exception
\r
648 public function resolveObject($objSpec)
\r
652 // Exit if we get invalid data
\r
653 if (!is_array($objSpec)) {
\r
657 if ($objSpec[0] == self::TYPE_OBJREF) {
\r
659 // This is a reference, resolve it
\r
660 if (isset($this->_xref['xref'][$objSpec[1]][$objSpec[2]])) {
\r
662 // Save current file position
\r
663 // This is needed if you want to resolve
\r
664 // references while you're reading another object
\r
665 // (e.g.: if you need to determine the length
\r
668 $oldPos = $c->getPos();
\r
670 // Reposition the file pointer and
\r
671 // load the object header.
\r
673 $c->reset($this->_xref['xref'][$objSpec[1]][$objSpec[2]]);
\r
675 $header = $this->_readValue($c);
\r
677 if ($header[0] != self::TYPE_OBJDEC || $header[1] != $objSpec[1] || $header[2] != $objSpec[2]) {
\r
678 $toSearchFor = $objSpec[1] . ' ' . $objSpec[2] . ' obj';
\r
679 if (preg_match('/' . $toSearchFor . '/', $c->buffer)) {
\r
680 $c->offset = strpos($c->buffer, $toSearchFor) + strlen($toSearchFor);
\r
682 $c->stack = array();
\r
684 throw new Exception(
\r
685 sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
\r
690 // If we're being asked to store all the information
\r
691 // about the object, we add the object ID and generation
\r
692 // number for later use
\r
695 'obj' => $objSpec[1],
\r
696 'gen' => $objSpec[2]
\r
699 $this->_currentObj =& $result;
\r
701 // Now simply read the object data until
\r
702 // we encounter an end-of-object marker
\r
704 $value = $this->_readValue($c);
\r
705 if ($value === false || count($result) > 4) {
\r
706 // in this case the parser couldn't find an "endobj" so we break here
\r
710 if ($value[0] == self::TYPE_TOKEN && $value[1] === 'endobj') {
\r
714 $result[] = $value;
\r
717 $c->reset($oldPos);
\r
719 if (isset($result[2][0]) && $result[2][0] == self::TYPE_STREAM) {
\r
720 $result[0] = self::TYPE_STREAM;
\r
724 throw new Exception(
\r
725 sprintf("Unable to find object (%s, %s) at expected location.", $objSpec[1], $objSpec[2])
\r
736 * Reads a token from the context
\r
738 * @param pdf_context $c
\r
741 protected function _readToken($c)
\r
743 // If there is a token available
\r
744 // on the stack, pop it out and
\r
747 if (count($c->stack)) {
\r
748 return array_pop($c->stack);
\r
751 // Strip away any whitespace
\r
754 if (!$c->ensureContent()) {
\r
757 $c->offset += strspn($c->buffer, "\x20\x0A\x0C\x0D\x09\x00", $c->offset);
\r
758 } while ($c->offset >= $c->length - 1);
\r
760 // Get the first character in the stream
\r
762 $char = $c->buffer[$c->offset++];
\r
771 // This is either an array or literal string
\r
772 // delimiter, Return it
\r
779 // This could either be a hex string or
\r
780 // dictionary delimiter. Determine the
\r
781 // appropriate case and return the token
\r
783 if ($c->buffer[$c->offset] == $char) {
\r
784 if (!$c->ensureContent()) {
\r
788 return $char . $char;
\r
795 // This is a comment - jump over it!
\r
799 $match = preg_match("/(\r\n|\r|\n)/", $c->buffer, $m, PREG_OFFSET_CAPTURE, $pos);
\r
800 if ($match === 0) {
\r
801 if (!$c->increaseLength()) {
\r
808 $c->offset = $m[0][1] + strlen($m[0][0]);
\r
810 return $this->_readToken($c);
\r
815 // This is "another" type of token (probably
\r
816 // a dictionary entry or a numeric value)
\r
817 // Find the end and return it.
\r
819 if (!$c->ensureContent()) {
\r
825 // Determine the length of the token
\r
827 $pos = strcspn($c->buffer, "\x20%[]<>()/\x0A\x0C\x0D\x09\x00", $c->offset);
\r
829 if ($c->offset + $pos <= $c->length - 1) {
\r
832 // If the script reaches this point,
\r
833 // the token may span beyond the end
\r
834 // of the current buffer. Therefore,
\r
835 // we increase the size of the buffer
\r
836 // and try again--just to be safe.
\r
838 $c->increaseLength();
\r
842 $result = substr($c->buffer, $c->offset - 1, $pos + 1);
\r
844 $c->offset += $pos;
\r
851 * Un-filter a stream object
\r
853 * @param array $obj
\r
855 * @throws Exception
\r
857 protected function _unFilterStream($obj)
\r
859 $filters = array();
\r
861 if (isset($obj[1][1]['/Filter'])) {
\r
862 $filter = $obj[1][1]['/Filter'];
\r
864 if ($filter[0] == pdf_parser::TYPE_OBJREF) {
\r
865 $tmpFilter = $this->resolveObject($filter);
\r
866 $filter = $tmpFilter[1];
\r
869 if ($filter[0] == pdf_parser::TYPE_TOKEN) {
\r
870 $filters[] = $filter;
\r
871 } else if ($filter[0] == pdf_parser::TYPE_ARRAY) {
\r
872 $filters = $filter[1];
\r
876 $stream = $obj[2][1];
\r
878 foreach ($filters AS $filter) {
\r
879 switch ($filter[1]) {
\r
880 case '/FlateDecode':
\r
882 if (function_exists('gzuncompress')) {
\r
883 $oStream = $stream;
\r
884 $stream = (strlen($stream) > 0) ? @gzuncompress($stream) : '';
\r
886 throw new Exception(
\r
887 sprintf('To handle %s filter, please compile php with zlib support.', $filter[1])
\r
891 if ($stream === false) {
\r
893 while ($tries < 8 && ($stream === false || strlen($stream) < strlen($oStream))) {
\r
894 $oStream = substr($oStream, 1);
\r
895 $stream = @gzinflate($oStream);
\r
899 if ($stream === false) {
\r
900 throw new Exception('Error while decompressing stream.');
\r
905 if (!class_exists('FilterLZW')) {
\r
906 require_once('filters/FilterLZW.php');
\r
908 $decoder = new FilterLZW();
\r
909 $stream = $decoder->decode($stream);
\r
911 case '/ASCII85Decode':
\r
912 if (!class_exists('FilterASCII85')) {
\r
913 require_once('filters/FilterASCII85.php');
\r
915 $decoder = new FilterASCII85();
\r
916 $stream = $decoder->decode($stream);
\r
918 case '/ASCIIHexDecode':
\r
919 if (!class_exists('FilterASCIIHexDecode')) {
\r
920 require_once('filters/FilterASCIIHexDecode.php');
\r
922 $decoder = new FilterASCIIHexDecode();
\r
923 $stream = $decoder->decode($stream);
\r
928 throw new Exception(sprintf('Unsupported Filter: %s', $filter[1]));
\r