1: <?php
2: /**
3: * StepX.php
4: *
5: * @since 2011-05-23
6: * @category Library
7: * @package Unicode
8: * @author Nicola Asuni <info@tecnick.com>
9: * @copyright 2011-2015 Nicola Asuni - Tecnick.com LTD
10: * @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
11: * @link https://github.com/tecnickcom/tc-lib-unicode
12: *
13: * This file is part of tc-lib-unicode software library.
14: */
15:
16: namespace Com\Tecnick\Unicode\Bidi;
17:
18: use \Com\Tecnick\Unicode\Bidi\StepP;
19: use \Com\Tecnick\Unicode\Data\Type as UniType;
20: use \Com\Tecnick\Unicode\Data\Constant as UniConstant;
21:
22: /**
23: * Com\Tecnick\Unicode\Bidi\StepX
24: *
25: * @since 2015-07-13
26: * @category Library
27: * @package Unicode
28: * @author Nicola Asuni <info@tecnick.com>
29: * @copyright 2011-2015 Nicola Asuni - Tecnick.com LTD
30: * @license http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
31: * @link https://github.com/tecnickcom/tc-lib-unicode
32: */
33: class StepX
34: {
35: /**
36: * Maximum embedding level
37: */
38: const MAX_DEPTH = 125;
39:
40: /**
41: * Directional Status Stack
42: *
43: * @var array
44: */
45: protected $dss = array();
46:
47: /**
48: * Overflow Isolate Count
49: *
50: * @var int
51: */
52: protected $oic = 0;
53:
54: /**
55: * Overflow Embedding Count
56: *
57: * @var int
58: */
59: protected $oec = 0;
60:
61: /**
62: * Valid Isolate Count
63: *
64: * @var int
65: */
66: protected $vic = 0;
67:
68: /**
69: * Array of characters data to return
70: *
71: * @var array
72: */
73: protected $chardata = array();
74:
75: /**
76: * Array of UTF-8 codepoints
77: *
78: * @var array
79: */
80: protected $ordarr = array();
81:
82: /**
83: * X Steps for Bidirectional algorithm
84: * Explicit Levels and Directions
85: *
86: * @param array $ordarr Array of UTF-8 codepoints
87: * @param int $pel Paragraph embedding level
88: */
89: public function __construct($ordarr, $pel)
90: {
91: $this->ordarr = $ordarr;
92: $this->dss = array();
93: $this->chardata = array();
94: // X1. At the beginning of a paragraph, perform the following steps:
95: // - Set the stack to empty.
96: $this->dss = array();
97: // - Push onto the stack an entry consisting of the paragraph embedding level,
98: // a neutral directional override status, and a false directional isolate status.
99: $this->dss[] = array(
100: 'cel' => $pel,
101: 'dos' => 'NI',
102: 'dis' => false
103: );
104: // - Set the overflow isolate count to zero.
105: $this->oic = 0;
106: // - Set the overflow embedding count to zero.
107: $this->oec = 0;
108: // - Set the valid isolate count to zero.
109: $this->vic = 0;
110: // - Process each character iteratively, applying rules X2 through X8.
111: // Only embedding levels from 0 through max_depth are valid in this phase.
112: // (Note that in the resolution of levels in rules I1 and I2,
113: // the maximum embedding level of max_depth+1 can be reached.)
114: $this->processX();
115: }
116:
117: /**
118: * Returns the processed array
119: *
120: * @return array
121: */
122: public function getChrData()
123: {
124: return $this->chardata;
125: }
126:
127: /**
128: * Calculate the Least Even
129: *
130: * @param int $num Number to process
131: *
132: * @return int
133: */
134: protected function getLEven($num)
135: {
136: return (2 + $num - ($num % 2));
137: }
138:
139: /**
140: * Calculate the Least Odd
141: *
142: * @param int $num Number to process
143: *
144: * @return int
145: */
146: protected function getLOdd($num)
147: {
148: return (1 + $num + ($num % 2));
149: }
150:
151: /**
152: * Process X1
153: */
154: protected function processX()
155: {
156: foreach ($this->ordarr as $key => $ord) {
157: $this->processXcase($key, $ord);
158: }
159: }
160:
161: /**
162: * Process X1 case
163: *
164: * @param int $pos Original character position in the input string
165: * @param int $ord Char code
166: *
167: * @SuppressWarnings(PHPMD.CyclomaticComplexity)
168: */
169: protected function processXcase($pos, $ord)
170: {
171: $edss = end($this->dss);
172: switch ($ord) {
173: case UniConstant::RLE:
174: // X2
175: $this->setDss($this->getLOdd($edss['cel']), UniConstant::RLE, 'NI');
176: break;
177: case UniConstant::LRE:
178: // X3
179: $this->setDss($this->getLEven($edss['cel']), UniConstant::LRE, 'NI');
180: break;
181: case UniConstant::RLO:
182: // X4
183: $this->setDss($this->getLOdd($edss['cel']), UniConstant::RLO, 'R');
184: break;
185: case UniConstant::LRO:
186: // X5
187: $this->setDss($this->getLEven($edss['cel']), UniConstant::LRO, 'L');
188: break;
189: case UniConstant::RLI:
190: // X5a
191: $this->processChar($pos, $ord, $edss);
192: $this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
193: break;
194: case UniConstant::LRI:
195: // X5b
196: $this->processChar($pos, $ord, $edss);
197: $this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
198: break;
199: case UniConstant::FSI:
200: // X5c
201: $this->processChar($pos, $ord, $edss);
202: $this->processFsiCase($pos, $edss);
203: break;
204: case UniConstant::PDI:
205: // X6a
206: $this->processPdiCase($pos, $ord, $edss);
207: break;
208: case UniConstant::PDF:
209: // X7
210: $this->processPdfCase($edss);
211: break;
212: default:
213: // X6
214: $this->processChar($pos, $ord, $edss);
215: break;
216: }
217: }
218:
219: /**
220: * Set temporary data (X2 to X5)
221: *
222: * @param int $cel Embedding Level
223: * @param int $ord Char code
224: * @param string $dos Directional override status
225: * @param bool $dis Directional isolate status
226: * @param string $isolate True if Isolate initiator
227: * @param int $ivic increment for the valid isolate count
228: */
229: protected function setDss($cel, $ord, $dos, $dis = false, $isolate = false, $ivic = 0)
230: {
231: // X2 to X5
232: // - Compute the least odd|even embedding level greater than the embedding level of the last entry
233: // on the directional status stack.
234: // - If this new level would be valid, and the overflow isolate count and overflow embedding
235: // count are both zero, then this RLE is valid. Push an entry consisting of the new embedding
236: // level, neutral|left|right directional override status, and false directional isolate status onto the
237: // directional status stack.
238: // - Otherwise, this is an overflow RLE. If the overflow isolate count is zero, increment the
239: // overflow embedding|isolate count by one. Leave all other variables unchanged.
240: if (($cel >= self::MAX_DEPTH) || ($this->oic != 0) || ($this->oec != 0)) {
241: if ($isolate) {
242: ++$this->oic;
243: } elseif ($this->oic == 0) {
244: ++$this->oec;
245: }
246: return;
247: }
248: $this->vic += $ivic;
249: $this->dss[] = array(
250: 'ord' => $ord,
251: 'cel' => $cel,
252: 'dos' => $dos,
253: 'dis' => $dis
254: );
255: }
256:
257: /**
258: * Push a char on the stack
259: *
260: * @param int $pos Original character position in the input string
261: * @param int $ord Char code
262: * @param array $edss Last entry in the Directional Status Stack
263: */
264: protected function pushChar($pos, $ord, $edss)
265: {
266: $unitype = (isset(UniType::$uni[$ord]) ? UniType::$uni[$ord] : $edss['dos']);
267: $this->chardata[] = array(
268: 'pos' => $pos,
269: 'char' => $ord,
270: 'level' => $edss['cel'],
271: 'type' => (($edss['dos'] !== 'NI') ? $edss['dos'] : $unitype),
272: 'otype' => $unitype // original type
273: );
274: }
275:
276: /**
277: * Process normal char (X6)
278: *
279: * @param int $pos Original character position in the input string
280: * @param int $ord Char code
281: * @param array $edss Last entry in the Directional Status Stack
282: */
283: protected function processChar($pos, $ord, $edss)
284: {
285: // X6. For all types besides B, BN, RLE, LRE, RLO, LRO, PDF, RLI, LRI, FSI, and PDI:
286: // - Set the current character’s embedding level to the embedding level
287: // of the last entry on the directional status stack.
288: // - Whenever the directional override status of the last entry on the directional status stack
289: // is not neutral, reset the current character type according to the directional override
290: // status of the last entry on the directional status stack.
291: if (isset(UniType::$uni[$ord]) && ((UniType::$uni[$ord] == 'B') || (UniType::$uni[$ord] == 'BN'))) {
292: return;
293: }
294: $this->pushChar($pos, $ord, $edss);
295: }
296:
297: /**
298: * Process the PDF type character
299: *
300: * @param array $edss Last entry in the Directional Status Stack
301: */
302: protected function processPdfCase($edss)
303: {
304: // X7. With each PDF, perform the following steps:
305: // - If the overflow isolate count is greater than zero, do nothing. (This PDF is within the
306: // scope of an overflow isolate initiator. It either matches and terminates the scope of an
307: // overflow embedding initiator within that overflow isolate, or does not match any
308: // embedding initiator.)
309: if ($this->oic > 0) {
310: return;
311: }
312: // - Otherwise, if the overflow embedding count is greater than zero, decrement it by one.
313: // (This PDF matches and terminates the scope of an overflow embedding initiator that is not
314: // within the scope of an overflow isolate initiator.)
315: if ($this->oec > 0) {
316: --$this->oec;
317: return;
318: }
319: // - Otherwise, if the directional isolate status of the last entry on the directional status
320: // stack is false, and the directional status stack contains at least two entries, pop the
321: // last entry from the directional status stack. (This PDF matches and terminates the scope
322: // of a valid embedding initiator. Since the stack has at least two entries, this pop does
323: // not leave the stack empty.)
324: if (($edss['dis'] === false) && (count($this->dss) > 1)) {
325: array_pop($this->dss);
326: }
327: // - Otherwise, do nothing. (This PDF does not match any embedding initiator.)
328: }
329:
330: /**
331: * Process the PDI type character
332: *
333: * @param int $pos Original character position in the input string
334: * @param int $ord Char code
335: * @param array $edss Last entry in the Directional Status Stack
336: */
337: protected function processPdiCase($pos, $ord, $edss)
338: {
339: // X6a. With each PDI, perform the following steps:
340: // - If the overflow isolate count is greater than zero, this PDI matches an overflow isolate
341: // initiator. Decrement the overflow isolate count by one.
342: if ($this->oic > 0) {
343: --$this->oic;
344: return;
345: }
346: // - Otherwise, if the valid isolate count is zero, this PDI does not match any isolate
347: // initiator, valid or overflow. Do nothing.
348: if ($this->vic == 0) {
349: return;
350: }
351: // - Otherwise, this PDI matches a valid isolate initiator. Perform the following steps:
352: // - Reset the overflow embedding count to zero. (This terminates the scope of those overflow
353: // embedding initiators within the scope of the matched isolate initiator whose scopes have
354: // not been terminated by a matching PDF, and which thus lack a matching PDF.)
355: $this->oec = 0;
356: // - While the directional isolate status of the last entry on the stack is false, pop the
357: // last entry from the directional status stack. (This terminates the scope of those valid
358: // embedding initiators within the scope of the matched isolate initiator whose scopes have
359: // not been terminated by a matching PDF, and which thus lack a matching PDF. Given that the
360: // valid isolate count is non-zero, the directional status stack before this step is
361: // executed must contain an entry with directional isolate status true, and thus after this
362: // step is executed the last entry on the stack will indeed have a true directional isolate
363: // status, i.e. represent the scope of the matched isolate initiator. This cannot be the
364: // stack's first entry, which always belongs to the paragraph level and has a false
365: // directional status, so there is at least one more entry below it on the stack.)
366: while (($edss['dis'] === false) && (count($this->dss) > 1)) {
367: array_pop($this->dss);
368: $edss = end($this->dss);
369: }
370: // - Pop the last entry from the directional status stack and decrement the valid isolate
371: // count by one. (This terminates the scope of the matched isolate initiator. Since the
372: // preceding step left the stack with at least two entries, this pop does not leave the
373: // stack empty.)
374: array_pop($this->dss);
375: $edss = end($this->dss);
376: --$this->vic;
377: // - In all cases, look up the last entry on the directional status stack left after the
378: // steps above and:
379: // - Set the PDI’s level to the entry's embedding level.
380: // - If the entry's directional override status is not neutral, reset the current character type
381: // from PDI to L if the override status is left-to-right, and to R if the override status is
382: // right-to-left.
383: $this->pushChar($pos, $ord, $edss);
384: }
385:
386: /**
387: * Process the PDF type character
388: *
389: * @param int $pos Original character position in the input string
390: * @param array $edss Last entry in the Directional Status Stack
391: */
392: protected function processFsiCase($pos, $edss)
393: {
394: // X5c. With each FSI, apply rules P2 and P3 to the sequence of characters between the FSI and its
395: // matching PDI, or if there is no matching PDI, the end of the paragraph, as if this sequence
396: // of characters were a paragraph. If these rules decide on paragraph embedding level 1, treat
397: // the FSI as an RLI in rule X5a. Otherwise, treat it as an LRI in rule X5b.
398: $stepp = new StepP(array_slice($this->ordarr, $pos));
399: if ($stepp->getPel() == 0) {
400: $this->setDss($this->getLEven($edss['cel']), UniConstant::LRI, 'NI', true, true, 1);
401: } else {
402: $this->setDss($this->getLOdd($edss['cel']), UniConstant::RLI, 'NI', true, true, 1);
403: }
404: }
405: }
406: