source-class-Com.Tecnick.Pdf.Parser.Parser

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1:   2:   3:   4:   5:   6:   7:   8:   9:  10:  11:  12:  13:  14:  15:  16:  17:  18:  19:  20:  21:  22:  23:  24:  25:  26:  27:  28:  29:  30:  31:  32:  33:  34:  35:  36:  37:  38:  39:  40:  41:  42:  43:  44:  45:  46:  47:  48:  49:  50:  51:  52:  53:  54:  55:  56:  57:  58:  59:  60:  61:  62:  63:  64:  65:  66:  67:  68:  69:  70:  71:  72:  73:  74:  75:  76:  77:  78:  79:  80:  81:  82:  83:  84:  85:  86:  87:  88:  89:  90:  91:  92:  93:  94:  95:  96:  97:  98:  99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 
<?php
/**
 * Parser.php
 *
 * @since       2011-05-23
 * @category    Library
 * @package     PdfParser
 * @author      Nicola Asuni <info@tecnick.com>
 * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 *
 * This file is part of tc-lib-pdf-parser software library.
 */

namespace Com\Tecnick\Pdf\Parser;

use \Com\Tecnick\Pdf\Parser\Exception as PPException;

/**
 * Com\Tecnick\Pdf\Parser\Parser
 *
 * PHP class for parsing PDF documents.
 *
 * @since       2011-05-23
 * @category    Library
 * @package     PdfParser
 * @author      Nicola Asuni <info@tecnick.com>
 * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 */
class Parser extends \Com\Tecnick\Pdf\Parser\Process\Xref
{
    /**
     * Raw content of the PDF document.
     *
     * @var string
     */
    protected $pdfdata = '';

    /**
     * Array of PDF objects.
     *
     * @var array
     */
    protected $objects = array();

    /**
     * Array of configuration parameters.
     *
     * @var array
     */
    private $cfg = array(
        'ignore_filter_errors'  => false,
    );

    /**
     * Initialize the PDF parser
     *
     * @param array $cfg   Array of configuration parameters:
     *          'ignore_filter_decoding_errors'  : if true ignore filter decoding errors;
     *          'ignore_missing_filter_decoders' : if true ignore missing filter decoding errors.
     */
    public function __construct($cfg = array())
    {
        if (isset($cfg['ignore_filter_errors'])) {
            $this->cfg['ignore_filter_errors'] = (bool)$cfg['ignore_filter_errors'];
        }
    }

    /**
     * Parse a PDF document into an array of objects
     *
     * @param string $data PDF data to parse.
     */
    public function parse($data)
    {
        if (empty($data)) {
            throw new PPException('Empty PDF data.');
        }
        // find the pdf header starting position
        if (($trimpos = strpos($data, '%PDF-')) === false) {
            throw new PPException('Invalid PDF data: missing %PDF header.');
        }
        // get PDF content string
        $this->pdfdata = substr($data, $trimpos);
        // get xref and trailer data
        $this->xref = $this->getXrefData();
        // parse all document objects
        $this->objects = array();
        foreach ($this->xref['xref'] as $obj => $offset) {
            if (!isset($this->objects[$obj]) && ($offset > 0)) {
                // decode objects with positive offset
                $this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
            }
        }
        // release some memory
        unset($this->pdfdata);
        return array($this->xref, $this->objects);
    }

    /**
     * Get content of indirect object.
     *
     * @param string $obj_ref  Object number and generation number separated by underscore character.
     * @param int    $offset   Object offset.
     * @param bool   $decoding If true decode streams.
     *
     * @return array Object data.
     */
    protected function getIndirectObject($obj_ref, $offset = 0, $decoding = true)
    {
        $obj = explode('_', $obj_ref);
        if (($obj === false) || (count($obj) != 2)) {
            throw new PPException('Invalid object reference: '.$obj);
        }
        $objref = $obj[0].' '.$obj[1].' obj';
        // ignore leading zeros
        $offset += strspn($this->pdfdata, '0', $offset);
        if (strpos($this->pdfdata, $objref, $offset) != $offset) {
            // an indirect reference to an undefined object shall be considered a reference to the null object
            return array('null', 'null', $offset);
        }
        // starting position of object content
        $offset += strlen($objref);
        // return raw object content
        return $this->getRawIndirectObject($offset, $decoding);
    }

    /**
     * Get content of indirect object.
     *
     * @param string $obj_ref  Object number and generation number separated by underscore character.
     * @param int    $offset   Object offset.
     * @param bool   $decoding If true decode streams.
     *
     * @return array Object data.
     */
    protected function getRawIndirectObject($offset, $decoding)
    {
        // get array of object content
        $objdata = array();
        $idx = 0; // object main index
        do {
            $oldoffset = $offset;
            // get element
            $element = $this->getRawObject($offset);
            $offset = $element[2];
            // decode stream using stream's dictionary information
            if ($decoding
                && ($element[0] == 'stream')
                && (isset($objdata[($idx - 1)][0]))
                && ($objdata[($idx - 1)][0] == '<<')
            ) {
                $element[3] = $this->decodeStream($objdata[($idx - 1)][1], $element[1]);
            }
            $objdata[$idx] = $element;
            ++$idx;
        } while (($element[0] != 'endobj') && ($offset != $oldoffset));
        // remove closing delimiter
        array_pop($objdata);
        // return raw object content
        return $objdata;
    }

    /**
     * Get the content of object, resolving indect object reference if necessary.
     *
     * @param string $obj Object value.
     *
     * @return array Object data.
     */
    protected function getObjectVal($obj)
    {
        if ($obj[0] == 'objref') {
            // reference to indirect object
            if (isset($this->objects[$obj[1]])) {
                // this object has been already parsed
                return $this->objects[$obj[1]];
            } elseif (isset($this->xref[$obj[1]])) {
                // parse new object
                $this->objects[$obj[1]] = $this->getIndirectObject($obj[1], $this->xref[$obj[1]], false);
                return $this->objects[$obj[1]];
            }
        }
        return $obj;
    }

    /**
     * Decode the specified stream.
     *
     * @param array  $sdic   Stream's dictionary array.
     * @param string $stream Stream to decode.
     *
     * @return array Decoded stream data and remaining filters.
     */
    protected function decodeStream($sdic, $stream)
    {
        // get stream length and filters
        $slength = strlen($stream);
        if ($slength <= 0) {
            return array('', array());
        }
        $filters = array();
        foreach ($sdic as $key => $val) {
            if ($val[0] == '/') {
                if (($val[1] == 'Length') && (isset($sdic[($key + 1)])) && ($sdic[($key + 1)][0] == 'numeric')) {
                    // get declared stream length
                    $this->getDeclaredStreamLength($stream, $slength, $sdic, $key);
                } elseif (($val[1] == 'Filter') && (isset($sdic[($key + 1)]))) {
                    $filters = $this->getFilters($filters, $sdic, $key);
                }
            }
        }
        return $this->getDecodedStream($filters, $stream);
    }

    /**
     * Get Filters
     *
     * @param string $stream  Stream
     * @param int    $slength Stream length
     * @param array  $sdic    Stream's dictionary array.
     * @param int    $key     Index
     *
     * @return array Array of filters
     */
    protected function getDeclaredStreamLength(&$stream, &$slength, $sdic, $key)
    {
        // get declared stream length
        $declength = intval($sdic[($key + 1)][1]);
        if ($declength < $slength) {
            $stream = substr($stream, 0, $declength);
            $slength = $declength;
        }
    }

    /**
     * Get Filters
     *
     * @param array $filters Array of Filters
     * @param array $sdic    Stream's dictionary array.
     * @param int   $key     Index
     *
     * @return array Array of filters
     */
    protected function getFilters($filters, $sdic, $key)
    {
        // resolve indirect object
        $objval = $this->getObjectVal($sdic[($key + 1)]);
        if ($objval[0] == '/') {
            // single filter
            $filters[] = $objval[1];
        } elseif ($objval[0] == '[') {
            // array of filters
            foreach ($objval[1] as $flt) {
                if ($flt[0] == '/') {
                    $filters[] = $flt[1];
                }
            }
        }
        return $filters;
    }

    /**
     * Decode the specified stream.
     *
     * @param array  $filters Array of decoding filters to apply
     * @param string $stream  Stream to decode.
     *
     * @return array Decoded stream data and remaining filters.
     */
    protected function getDecodedStream($filters, $stream)
    {
        // decode the stream
        $errorfilters = array();
        try {
            $filter = new \Com\Tecnick\Pdf\Filter\Filter;
            $stream = $filter->decodeAll($filters, $stream);
        } catch (\Com\Tecnick\Pdf\Filter\Exception $e) {
            if ($this->cfg['ignore_filter_errors']) {
                $errorfilters = $filters;
            } else {
                throw new PPException($e->getMessage());
            }
        }
        return array($stream, $errorfilters);
    }
}
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy