source-class-Com.Tecnick.Pdf.Parser.Process.Xref

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1: <?php
  2: /**
  3:  * Xref.php
  4:  *
  5:  * @since       2011-05-23
  6:  * @category    Library
  7:  * @package     PdfParser
  8:  * @author      Nicola Asuni <info@tecnick.com>
  9:  * @copyright   2011-2016 Nicola Asuni - Tecnick.com LTD
 10:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 11:  * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 12:  *
 13:  * This file is part of tc-lib-pdf-parser software library.
 14:  */
 15: 
 16: namespace Com\Tecnick\Pdf\Parser\Process;
 17: 
 18: use \Com\Tecnick\Pdf\Parser\Exception as PPException;
 19: 
 20: /**
 21:  * Com\Tecnick\Pdf\Parser\Process\Xref
 22:  *
 23:  * Process XREF
 24:  *
 25:  * @since       2011-05-23
 26:  * @category    Library
 27:  * @package     PdfParser
 28:  * @author      Nicola Asuni <info@tecnick.com>
 29:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 30:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 31:  * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 32:  */
 33: abstract class Xref extends \Com\Tecnick\Pdf\Parser\Process\XrefStream
 34: {
 35:     /**
 36:      * XREF data.
 37:      *
 38:      * @var array
 39:      */
 40:     protected $xref = array();
 41: 
 42:     /**
 43:      * Store the processed offsets
 44:      *
 45:      * @var array
 46:      */
 47:     protected $mrkoff = array();
 48: 
 49:     /**
 50:      * Get Cross-Reference (xref) table and trailer data from PDF document data.
 51:      *
 52:      * @param int   $offset Xref offset (if know).
 53:      * @param array $xref   Previous xref array (if any).
 54:      *
 55:      * @return array Xref and trailer data.
 56:      */
 57:     protected function getXrefData($offset = 0, $xref = array())
 58:     {
 59:         if (in_array($offset, $this->mrkoff)) {
 60:             throw new PPException('LOOP: this XRef offset has been already processed');
 61:         }
 62:         $this->mrkoff[] = $offset;
 63:         if ($offset == 0) {
 64:             // find last startxref
 65:             if (preg_match_all(
 66:                 '/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
 67:                 $this->pdfdata,
 68:                 $matches,
 69:                 PREG_SET_ORDER,
 70:                 $offset
 71:             ) == 0) {
 72:                 throw new PPException('Unable to find startxref');
 73:             }
 74:             $matches = array_pop($matches);
 75:             $startxref = $matches[1];
 76:         } elseif (($pos = strpos($this->pdfdata, 'xref', $offset)) <= ($offset + 4)) {
 77:             // Already pointing at the xref table
 78:             $startxref = $pos;
 79:         } elseif (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 80:             // Cross-Reference Stream object
 81:             $startxref = $offset;
 82:         } elseif (preg_match(
 83:             '/[\r\n]startxref[\s]*[\r\n]+([0-9]+)[\s]*[\r\n]+%%EOF/i',
 84:             $this->pdfdata,
 85:             $matches,
 86:             PREG_OFFSET_CAPTURE,
 87:             $offset
 88:         )) {
 89:             // startxref found
 90:             $startxref = $matches[1][0];
 91:         } else {
 92:             throw new PPException('Unable to find startxref');
 93:         }
 94:         // check xref position
 95:         if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
 96:             // Cross-Reference
 97:             $xref = $this->decodeXref($startxref, $xref);
 98:         } else {
 99:             // Cross-Reference Stream
100:             $xref = $this->decodeXrefStream($startxref, $xref);
101:         }
102:         if (empty($xref)) {
103:             throw new PPException('Unable to find xref');
104:         }
105:         return $xref;
106:     }
107: 
108:     /**
109:      * Decode the Cross-Reference section
110:      *
111:      * @param int   $startxref Offset at which the xref section starts (position of the 'xref' keyword).
112:      * @param array $xref      Previous xref array (if any).
113:      *
114:      * @return array Xref and trailer data.
115:      */
116:     protected function decodeXref($startxref, $xref = array())
117:     {
118:         $startxref += 4; // 4 is the length of the word 'xref'
119:         // skip initial white space chars:
120:         // \x00 null (NUL)
121:         // \x09 horizontal tab (HT)
122:         // \x0A line feed (LF)
123:         // \x0C form feed (FF)
124:         // \x0D carriage return (CR)
125:         // \x20 space (SP)
126:         $offset = $startxref + strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $startxref);
127:         // initialize object number
128:         $obj_num = 0;
129:         // search for cross-reference entries or subsection
130:         while (preg_match(
131:             '/([0-9]+)[\x20]([0-9]+)[\x20]?([nf]?)(\r\n|[\x20]?[\r\n])/',
132:             $this->pdfdata,
133:             $matches,
134:             PREG_OFFSET_CAPTURE,
135:             $offset
136:         ) > 0) {
137:             if ($matches[0][1] != $offset) {
138:                 // we are on another section
139:                 break;
140:             }
141:             $offset += strlen($matches[0][0]);
142:             if ($matches[3][0] == 'n') {
143:                 // create unique object index: [object number]_[generation number]
144:                 $index = $obj_num.'_'.intval($matches[2][0]);
145:                 // check if object already exist
146:                 if (!isset($xref['xref'][$index])) {
147:                     // store object offset position
148:                     $xref['xref'][$index] = intval($matches[1][0]);
149:                 }
150:                 ++$obj_num;
151:             } elseif ($matches[3][0] == 'f') {
152:                 ++$obj_num;
153:             } else {
154:                 // object number (index)
155:                 $obj_num = intval($matches[1][0]);
156:             }
157:         }
158:         // get trailer data
159:         if (!preg_match('/trailer[\s]*<<(.*)>>/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
160:             throw new PPException('Unable to find trailer');
161:         }
162:         return $this->getTrailerData($xref, $matches);
163:     }
164: 
165:     /**
166:      * Decode the Cross-Reference section
167:      *
168:      * @param array $xref    Previous xref array (if any).
169:      * @param array $matches Matches containing traile sections
170:      *
171:      * @return array Xref and trailer data.
172:      */
173:     protected function getTrailerData($xref, $matches)
174:     {
175:         $trailer_data = $matches[1][0];
176:         if (!isset($xref['trailer']) || empty($xref['trailer'])) {
177:             // get only the last updated version
178:             $xref['trailer'] = array();
179:             // parse trailer_data
180:             if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
181:                 $xref['trailer']['size'] = intval($matches[1]);
182:             }
183:             if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
184:                 $xref['trailer']['root'] = intval($matches[1]).'_'.intval($matches[2]);
185:             }
186:             if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
187:                 $xref['trailer']['encrypt'] = intval($matches[1]).'_'.intval($matches[2]);
188:             }
189:             if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
190:                 $xref['trailer']['info'] = intval($matches[1]).'_'.intval($matches[2]);
191:             }
192:             if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
193:                 $xref['trailer']['id'] = array();
194:                 $xref['trailer']['id'][0] = $matches[1];
195:                 $xref['trailer']['id'][1] = $matches[2];
196:             }
197:         }
198:         if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
199:             // get previous xref
200:             $xref = $this->getXrefData(intval($matches[1]), $xref);
201:         }
202:         return $xref;
203:     }
204: 
205:     /**
206:      * Decode the Cross-Reference Stream section
207:      *
208:      * @param int   $startxref Offset at which the xref section starts.
209:      * @param array $xref      Previous xref array (if any).
210:      *
211:      * @return array Xref and trailer data.
212:      */
213:     protected function decodeXrefStream($startxref, $xref = array())
214:     {
215:         // try to read Cross-Reference Stream
216:         $xrefobj = $this->getRawObject($startxref);
217:         $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
218:         if (!isset($xref['trailer']) || empty($xref['trailer'])) {
219:             // get only the last updated version
220:             $xref['trailer'] = array();
221:             $filltrailer = true;
222:         } else {
223:             $filltrailer = false;
224:         }
225:         if (!isset($xref['xref'])) {
226:             $xref['xref'] = array();
227:         }
228:         $valid_crs = false;
229:         $columns = 0;
230:         $sarr = $xrefcrs[0][1];
231:         if (!is_array($sarr)) {
232:             $sarr = array();
233:         }
234:         $wbt = array();
235:         $index_first = null;
236:         $prevxref = null;
237:         $this->processXrefType($sarr, $xref, $wbt, $index_first, $prevxref, $columns, $valid_crs, $filltrailer);
238:         // decode data
239:         if ($valid_crs && isset($xrefcrs[1][3][0])) {
240:             // number of bytes in a row
241:             $rowlen = ($columns + 1);
242:             // convert the stream into an array of integers
243:             $sdata = unpack('C*', $xrefcrs[1][3][0]);
244:             // split the rows
245:             $sdata = array_chunk($sdata, $rowlen);
246:             // initialize decoded array
247:             $ddata = array();
248:             // initialize first row with zeros
249:             $prev_row = array_fill(0, $rowlen, 0);
250:             $this->pngUnpredictor($sdata, $ddata, $columns, $prev_row);
251:             // complete decoding
252:             $sdata = array();
253:             $this->processDdata($sdata, $ddata, $wbt);
254:             $ddata = array();
255:             // fill xref
256:             if ($index_first !== null) {
257:                 $obj_num = $index_first;
258:             } else {
259:                 $obj_num = 0;
260:             }
261:             $this->processObjIndexes($xref, $obj_num, $sdata);
262:         } // end decoding data
263:         if ($prevxref != null) {
264:             // get previous xref
265:             $xref = $this->getXrefData($prevxref, $xref);
266:         }
267:         return $xref;
268:     }
269: 
270:     /**
271:      * Process ddata
272:      *
273:      * @param array $sdata
274:      * @param array $ddata
275:      * @param array $wbt
276:      */
277:     protected function processDdata(&$sdata, $ddata, $wbt)
278:     {
279:         // for every row
280:         foreach ($ddata as $key => $row) {
281:             // initialize new row
282:             $sdata[$key] = array(0, 0, 0);
283:             if ($wbt[0] == 0) {
284:                 // default type field
285:                 $sdata[$key][0] = 1;
286:             }
287:             $idx = 0; // count bytes in the row
288:             // for every column
289:             for ($col = 0; $col < 3; ++$col) {
290:                 // for every byte on the column
291:                 for ($byte = 0; $byte < $wbt[$col]; ++$byte) {
292:                     if (isset($row[$idx])) {
293:                         $sdata[$key][$col] += ($row[$idx] << (($wbt[$col] - 1 - $byte) * 8));
294:                     }
295:                     ++$idx;
296:                 }
297:             }
298:         }
299:     }
300: }
301: 
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy