source-class-Com.Tecnick.Pdf.Parser.Process.RawObject

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1: <?php
  2: /**
  3:  * RawObject.php
  4:  *
  5:  * @since       2011-05-23
  6:  * @category    Library
  7:  * @package     PdfParser
  8:  * @author      Nicola Asuni <info@tecnick.com>
  9:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 10:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 11:  * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 12:  *
 13:  * This file is part of tc-lib-pdf-parser software library.
 14:  */
 15: 
 16: namespace Com\Tecnick\Pdf\Parser\Process;
 17: 
 18: use \Com\Tecnick\Pdf\Parser\Exception as PPException;
 19: 
 20: /**
 21:  * Com\Tecnick\Pdf\Parser\Process\RawObject
 22:  *
 23:  * Process Raw Objects
 24:  *
 25:  * @since       2011-05-23
 26:  * @category    Library
 27:  * @package     PdfParser
 28:  * @author      Nicola Asuni <info@tecnick.com>
 29:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 30:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 31:  * @link        https://github.com/tecnickcom/tc-lib-pdf-parser
 32:  */
 33: abstract class RawObject
 34: {
 35:     /**
 36:      * Get object type, raw value and offset to next object
 37:      *
 38:      * @param int $offset Object offset.
 39:      *
 40:      * @return array Array containing object type, raw value and offset to next object
 41:      */
 42:     protected function getRawObject($offset = 0)
 43:     {
 44:         $objtype = ''; // object type to be returned
 45:         $objval = ''; // object value to be returned
 46:         // skip initial white space chars:
 47:         // \x00 null (NUL)
 48:         // \x09 horizontal tab (HT)
 49:         // \x0A line feed (LF)
 50:         // \x0C form feed (FF)
 51:         // \x0D carriage return (CR)
 52:         // \x20 space (SP)
 53:         $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
 54:         // get first char
 55:         $char = $this->pdfdata[$offset];
 56:         if ($char == '%') { // \x25 PERCENT SIGN
 57:             // skip comment and search for next token
 58:             $next = strcspn($this->pdfdata, "\r\n", $offset);
 59:             if ($next > 0) {
 60:                 $offset += $next;
 61:                 return $this->getRawObject($offset);
 62:             }
 63:         }
 64:         // map symbols with corresponding processing methods
 65:         $map = array(
 66:             '/' => 'Solidus',     // \x2F SOLIDUS
 67:             '(' => 'Parenthesis', // \x28 LEFT PARENTHESIS
 68:             ')' => 'Parenthesis', // \x29 RIGHT PARENTHESIS
 69:             '[' => 'Bracket',     // \x5B LEFT SQUARE BRACKET
 70:             ']' => 'Bracket',     // \x5D RIGHT SQUARE BRACKET
 71:             '<' => 'Angular',     // \x3C LESS-THAN SIGN
 72:             '>' => 'Angular',     // \x3E GREATER-THAN SIGN
 73:         );
 74:         if (isset($map[$char])) {
 75:             $method = 'process'.$map[$char];
 76:             $this->$method($char, $offset, $objtype, $objval);
 77:         } else {
 78:             if ($this->processDefaultName($offset, $objtype, $objval) === false) {
 79:                 $this->processDefault($offset, $objtype, $objval);
 80:             }
 81:         }
 82:         return array($objtype, $objval, $offset);
 83:     }
 84: 
 85:     /**
 86:      * Process name object
 87:      * \x2F SOLIDUS
 88:      *
 89:      * @param string $char    Symbol to process
 90:      * @param int    $offset  Offset
 91:      * @param string $objtype Object type
 92:      * @param string $objval  Object content
 93:      */
 94:     protected function processSolidus($char, &$offset, &$objtype, &$objval)
 95:     {
 96:         $objtype = $char;
 97:         ++$offset;
 98:         if (preg_match(
 99:             '/^([^\x00\x09\x0a\x0c\x0d\x20\s\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25]+)/',
100:             substr($this->pdfdata, $offset, 256),
101:             $matches
102:         ) == 1
103:         ) {
104:             $objval = $matches[1]; // unescaped value
105:             $offset += strlen($objval);
106:         }
107:     }
108: 
109:     /**
110:      * Process literal string object
111:      * \x28 LEFT PARENTHESIS and \x29 RIGHT PARENTHESIS
112:      *
113:      * @param string $char    Symbol to process
114:      * @param int    $offset  Offset
115:      * @param string $objtype Object type
116:      * @param string $objval  Object content
117:      */
118:     protected function processParenthesis($char, &$offset, &$objtype, &$objval)
119:     {
120:         $objtype = $char;
121:         ++$offset;
122:         $strpos = $offset;
123:         if ($char == '(') {
124:             $open_bracket = 1;
125:             while ($open_bracket > 0) {
126:                 if (!isset($this->pdfdata[$strpos])) {
127:                     break;
128:                 }
129:                 $chr = $this->pdfdata[$strpos];
130:                 switch ($chr) {
131:                     case '\\':
132:                         // REVERSE SOLIDUS (5Ch) (Backslash)
133:                         // skip next character
134:                         ++$strpos;
135:                         break;
136:                     case '(':
137:                         // LEFT PARENHESIS (28h)
138:                         ++$open_bracket;
139:                         break;
140:                     case ')':
141:                         // RIGHT PARENTHESIS (29h)
142:                         --$open_bracket;
143:                         break;
144:                 }
145:                 ++$strpos;
146:             }
147:             $objval = substr($this->pdfdata, $offset, ($strpos - $offset - 1));
148:             $offset = $strpos;
149:         }
150:     }
151: 
152:     /**
153:      * Process array content
154:      * \x5B LEFT SQUARE BRACKET and \x5D RIGHT SQUARE BRACKET
155:      *
156:      * @param string $char    Symbol to process
157:      * @param int    $offset  Offset
158:      * @param string $objtype Object type
159:      * @param string $objval  Object content
160:      */
161:     protected function processBracket($char, &$offset, &$objtype, &$objval)
162:     {
163:         // array object
164:         $objtype = $char;
165:         ++$offset;
166:         if ($char == '[') {
167:             // get array content
168:             $objval = array();
169:             do {
170:                 // get element
171:                 $element = $this->getRawObject($offset);
172:                 $offset = $element[2];
173:                 $objval[] = $element;
174:             } while ($element[0] != ']');
175:             // remove closing delimiter
176:             array_pop($objval);
177:         }
178:     }
179: 
180:     /**
181:      * Process \x3C LESS-THAN SIGN and \x3E GREATER-THAN SIGN
182:      *
183:      * @param string $char    Symbol to process
184:      * @param int    $offset  Offset
185:      * @param string $objtype Object type
186:      * @param string $objval  Object content
187:      */
188:     protected function processAngular($char, &$offset, &$objtype, &$objval)
189:     {
190:         if (isset($this->pdfdata[($offset + 1)]) && ($this->pdfdata[($offset + 1)] == $char)) {
191:             // dictionary object
192:             $objtype = $char.$char;
193:             $offset += 2;
194:             if ($char == '<') {
195:                 // get array content
196:                 $objval = array();
197:                 do {
198:                     // get element
199:                     $element = $this->getRawObject($offset);
200:                     $offset = $element[2];
201:                     $objval[] = $element;
202:                 } while ($element[0] != '>>');
203:                 // remove closing delimiter
204:                 array_pop($objval);
205:             }
206:         } else {
207:             // hexadecimal string object
208:             $objtype = $char;
209:             ++$offset;
210:             if (($char == '<')
211:                 && (preg_match(
212:                     '/^([0-9A-Fa-f\x09\x0a\x0c\x0d\x20]+)>/iU',
213:                     substr($this->pdfdata, $offset),
214:                     $matches
215:                 ) == 1)
216:                 ) {
217:                 // remove white space characters
218:                 $objval = strtr($matches[1], "\x09\x0a\x0c\x0d\x20", '');
219:                 $offset += strlen($matches[0]);
220:             } elseif (($endpos = strpos($this->pdfdata, '>', $offset)) !== false) {
221:                 $offset = $endpos + 1;
222:             }
223:         }
224:     }
225: 
226:     /**
227:      * Process default
228:      *
229:      * @param int    $offset  Offset
230:      * @param string $objtype Object type
231:      * @param string $objval  Object content
232:      *
233:      * @return bool True in case of match, flase otherwise
234:      */
235:     protected function processDefaultName(&$offset, &$objtype, &$objval)
236:     {
237:         $status = false;
238:         if (substr($this->pdfdata, $offset, 6) == 'endobj') {
239:             // indirect object
240:             $objtype = 'endobj';
241:             $offset += 6;
242:             $status = true;
243:         } elseif (substr($this->pdfdata, $offset, 4) == 'null') {
244:             // null object
245:             $objtype = 'null';
246:             $offset += 4;
247:             $objval = 'null';
248:             $status = true;
249:         } elseif (substr($this->pdfdata, $offset, 4) == 'true') {
250:             // boolean true object
251:             $objtype = 'boolean';
252:             $offset += 4;
253:             $objval = 'true';
254:             $status = true;
255:         } elseif (substr($this->pdfdata, $offset, 5) == 'false') {
256:             // boolean false object
257:             $objtype = 'boolean';
258:             $offset += 5;
259:             $objval = 'false';
260:             $status = true;
261:         } elseif (substr($this->pdfdata, $offset, 6) == 'stream') {
262:             // start stream object
263:             $objtype = 'stream';
264:             $offset += 6;
265:             if (preg_match('/^([\r]?[\n])/isU', substr($this->pdfdata, $offset), $matches) == 1) {
266:                 $offset += strlen($matches[0]);
267:                 if (preg_match(
268:                     '/(endstream)[\x09\x0a\x0c\x0d\x20]/isU',
269:                     substr($this->pdfdata, $offset),
270:                     $matches,
271:                     PREG_OFFSET_CAPTURE
272:                 ) == 1) {
273:                     $objval = substr($this->pdfdata, $offset, $matches[0][1]);
274:                     $offset += $matches[1][1];
275:                 }
276:             }
277:             $status = true;
278:         } elseif (substr($this->pdfdata, $offset, 9) == 'endstream') {
279:             // end stream object
280:             $objtype = 'endstream';
281:             $offset += 9;
282:             $status = true;
283:         }
284:         return $status;
285:     }
286: 
287:     /**
288:      * Process default
289:      *
290:      * @param int    $offset  Offset
291:      * @param string $objtype Object type
292:      * @param string $objval  Object content
293:      */
294:     protected function processDefault(&$offset, &$objtype, &$objval)
295:     {
296:         if (preg_match(
297:             '/^([0-9]+)[\s]+([0-9]+)[\s]+R/iU',
298:             substr($this->pdfdata, $offset, 33),
299:             $matches
300:         ) == 1) {
301:             // indirect object reference
302:             $objtype = 'objref';
303:             $offset += strlen($matches[0]);
304:             $objval = intval($matches[1]).'_'.intval($matches[2]);
305:         } elseif (preg_match(
306:             '/^([0-9]+)[\s]+([0-9]+)[\s]+obj/iU',
307:             substr($this->pdfdata, $offset, 33),
308:             $matches
309:         ) == 1) {
310:             // object start
311:             $objtype = 'obj';
312:             $objval = intval($matches[1]).'_'.intval($matches[2]);
313:             $offset += strlen($matches[0]);
314:         } elseif (($numlen = strspn($this->pdfdata, '+-.0123456789', $offset)) > 0) {
315:             // numeric object
316:             $objtype = 'numeric';
317:             $objval = substr($this->pdfdata, $offset, $numlen);
318:             $offset += $numlen;
319:         }
320:     }
321: }
322: 
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy