source-class-Com.Tecnick.Unicode.Bidi.StepN

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1: <?php
  2: /**
  3:  * StepN.php
  4:  *
  5:  * @since       2011-05-23
  6:  * @category    Library
  7:  * @package     Unicode
  8:  * @author      Nicola Asuni <info@tecnick.com>
  9:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 10:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 11:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 12:  *
 13:  * This file is part of tc-lib-unicode software library.
 14:  */
 15: 
 16: namespace Com\Tecnick\Unicode\Bidi;
 17: 
 18: use \Com\Tecnick\Unicode\Data\Bracket as UniBracket;
 19: 
 20: /**
 21:  * Com\Tecnick\Unicode\Bidi\StepN
 22:  *
 23:  * @since       2015-07-13
 24:  * @category    Library
 25:  * @package     Unicode
 26:  * @author      Nicola Asuni <info@tecnick.com>
 27:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 28:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 29:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 30:  */
 31: class StepN extends \Com\Tecnick\Unicode\Bidi\StepBase
 32: {
 33:     /**
 34:      * List or bracket pairs positions
 35:      *
 36:      * @var array
 37:      */
 38:     protected $brackets= array();
 39: 
 40:     /**
 41:      * Stack used to store bracket positions
 42:      *
 43:      * @var array
 44:      */
 45:     protected $bstack= array();
 46: 
 47:     /**
 48:      * Process N steps
 49:      * Resolving Neutral and Isolate Formatting Types
 50:      *
 51:      * Neutral and isolate formatting (i.e. NI) characters are resolved one isolating run sequence at a time.
 52:      * Its results are that all NIs become either R or L. Generally, NIs take on the direction of the surrounding text.
 53:      * In case of a conflict, they take on the embedding direction.
 54:      * At isolating run sequence boundaries where the type of the character on the other side of the boundary
 55:      * is required, the type assigned to sos or eos is used.
 56:      *
 57:      * Bracket pairs within an isolating run sequence are processed as units so that both the opening and the closing
 58:      * paired bracket in a pair resolve to the same direction. Note that this rule is applied based on the current
 59:      * bidirectional character type of each paired bracket and not the original type, as this could have changed under
 60:      * X6. The current bidirectional character type may also have changed under a previous iteration of the for loop in
 61:      * N0 in the case of nested bracket pairs.
 62:      */
 63:     protected function process()
 64:     {
 65:         $this->processStep('getBracketPairs');
 66:         $this->processN0();
 67:         $this->processStep('processN1');
 68:         $this->processStep('processN2');
 69:     }
 70: 
 71:     /**
 72:      * BD16. Find all bracket pairs
 73:      */
 74:     protected function getBracketPairs($idx)
 75:     {
 76:         $char = $this->seq['item'][$idx]['char'];
 77:         if (isset(UniBracket::$open[$char])) {
 78:             // process open bracket
 79:             if ($char == 0x3008) {
 80:                 $char = 0x2329;
 81:             }
 82:             $this->bstack[] = array($idx, $char);
 83:         } elseif (isset(UniBracket::$close[$char])) {
 84:             // process closign bracket
 85:             if ($char == 0x3009) {
 86:                 $char = 0x232A;
 87:             }
 88:             // find matching opening bracket
 89:             $tmpstack = $this->bstack;
 90:             while (!empty($tmpstack)) {
 91:                 $item = array_pop($tmpstack);
 92:                 if ($char == UniBracket::$open[$item[1]]) {
 93:                     $this->brackets[$item[0]] = $idx;
 94:                     $this->bstack = $tmpstack;
 95:                 }
 96:             }
 97:         }
 98:         // Sort the list of pairs of text positions in ascending order
 99:         // based on the text position of the opening paired bracket.
100:         ksort($this->brackets);
101:     }
102: 
103:     /**
104:      * Return the normalized chat type for the N0 step
105:      * Within this scope, bidirectional types EN and AN are treated as R.
106:      *
107:      * @param string $type Char type
108:      *
109:      * @return string
110:      */
111:     protected function getN0Type($type)
112:     {
113:         return ((($type == 'AN') || ($type == 'EN')) ? 'R' : $type);
114:     }
115: 
116:     /**
117:      * N0. Process bracket pairs in an isolating run sequence sequentially in the logical order of the text positions
118:      *     of the opening paired brackets.
119:      */
120:     protected function processN0()
121:     {
122:         $odir = (($this->seq['edir'] == 'L') ? 'R' : 'L');
123:         // For each bracket-pair element in the list of pairs of text positions
124:         foreach ($this->brackets as $open => $close) {
125:             if ($this->processInsideBrackets($open, $close, $odir)) {
126:                 for ($jdx = ($open - 1); $jdx >= 0; --$jdx) {
127:                     $btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
128:                     if ($btype == $odir) {
129:                         // 1. If the preceding strong type is also opposite the embedding direction,
130:                         //    context is established, so set the type for both brackets in the pair to that direction.
131:                         $this->setBracketsType($open, $close, $odir);
132:                         break;
133:                     } elseif ($btype == $this->seq['edir']) {
134:                         // 2. Otherwise set the type for both brackets in the pair to the embedding direction.
135:                         $this->setBracketsType($open, $close, $this->seq['edir']);
136:                         break;
137:                     }
138:                 }
139:                 if ($jdx < 0) {
140:                     $this->setBracketsType($open, $close, $this->seq['sos']);
141:                 }
142:             }
143:             // d. Otherwise, there are no strong types within the bracket pair. Therefore, do not set the type for that
144:             //    bracket pair. Note that if the enclosed text contains no strong types the bracket pairs will both
145:             //    resolve to the same level when resolved individually using rules N1 and N2.
146:         }
147:     }
148: 
149:     /**
150:      * Inspect the bidirectional types of the characters enclosed within the bracket pair.
151:      *
152:      * @param int    $open  Open bracket entry
153:      * @param int    $close Close bracket entry
154:      * @param string $odir  Opposite direction (L or R)
155:      *
156:      * @return bool True if type has not been found
157:      */
158:     protected function processInsideBrackets($open, $close, $odir)
159:     {
160:         $opposite = false;
161:         // a. Inspect the bidirectional types of the characters enclosed within the bracket pair.
162:         for ($jdx = ($open + 1); $jdx < $close; ++$jdx) {
163:             $btype = $this->getN0Type($this->seq['item'][$jdx]['type']);
164:             // b. If any strong type (either L or R) matching the embedding direction is found,
165:             // set the type for both brackets in the pair to match the embedding direction.
166:             if ($btype == $this->seq['edir']) {
167:                 $this->setBracketsType($open, $close, $this->seq['edir']);
168:                 break;
169:             } elseif ($btype == $odir) {
170:                 // c. Otherwise, if there is a strong type it must be opposite the embedding direction.
171:                 $opposite = true;
172:             }
173:         }
174:         // Therefore, test for an established context with a preceding strong type by checking backwards before
175:         // the opening paired bracket until the first strong type (L, R, or sos) is found.
176:         return (($jdx == $close) && $opposite);
177:     }
178: 
179:     /**
180:      * Set the brackets type
181:      *
182:      * @param int    $open  Open bracket entry
183:      * @param int    $close Close bracket entry
184:      * @param string $type  Type
185:      *
186:      * @return bool True if type has not been found
187:      */
188:     protected function setBracketsType($open, $close, $type)
189:     {
190:         $this->seq['item'][$open]['type'] = $type;
191:         $this->seq['item'][$close]['type'] = $type;
192: 
193:         // Any number of characters that had original bidirectional character type NSM
194:         // prior to the application of W1 that immediately follow a paired bracket which
195:         // changed to L or R under N0 should change to match the type of their preceding bracket.
196:         $next = ($close + 1);
197:         while (isset($this->seq['item'][$next]['otype']) && ($this->seq['item'][$next]['otype'] == 'NSM')) {
198:             $this->seq['item'][$next]['type'] = $type;
199:             ++$next;
200:         }
201:     }
202: 
203:     /**
204:      * N1. A sequence of NIs takes the direction of the surrounding strong text if the text on both sides has the same
205:      *     direction. European and Arabic numbers act as if they were R in terms of their influence on NIs.
206:      *     The start-of-sequence (sos) and end-of-sequence (eos) types are used at isolating run sequence boundaries.
207:      *
208:      * @param int $idx Current character position
209:      */
210:     protected function processN1($idx)
211:     {
212:         if ($this->seq['item'][$idx]['type'] == 'NI') {
213:             $bdx = ($idx - 1);
214:             $prev = $this->processN1prev($bdx);
215:             if (empty($prev)) {
216:                 return;
217:             }
218:             $jdx = $this->getNextN1Char($idx);
219:             $next = $this->processN1next($jdx);
220:             if (empty($next)) {
221:                 return;
222:             }
223:             if ($next == $prev) {
224:                 for ($bdx = $idx; (($bdx < $jdx) && ($bdx < $this->seq['length'])); ++$bdx) {
225:                     $this->seq['item'][$bdx]['type'] = $next;
226:                 }
227:             }
228:         }
229:     }
230: 
231:     /**
232:      * Get the next direction
233:      *
234:      * @param int $bdx Position of the preceding character
235:      *
236:      * @return string Previous position
237:      */
238:     protected function processN1prev(&$bdx)
239:     {
240:         if ($bdx < 0) {
241:             $bdx = 0;
242:             return $this->seq['sos'];
243:         }
244:         if (in_array($this->seq['item'][$bdx]['type'], array('R','AN','EN'))) {
245:             return 'R';
246:         }
247:         if ($this->seq['item'][$bdx]['type'] == 'L') {
248:             return 'L';
249:         }
250:         return '';
251:     }
252: 
253:     /**
254:      * Get the next direction
255:      *
256:      * @param int $jdx Position of the next character
257:      *
258:      * @return string Previous position
259:      */
260:     protected function processN1next(&$jdx)
261:     {
262:         if ($jdx >= $this->seq['length']) {
263:             $jdx = $this->seq['length'];
264:             return $this->seq['eos'];
265:         }
266:         if (in_array($this->seq['item'][$jdx]['type'], array('R','AN','EN'))) {
267:             return 'R';
268:         }
269:         if ($this->seq['item'][$jdx]['type'] == 'L') {
270:             return 'L';
271:         }
272:         return '';
273:     }
274: 
275:     /**
276:      * Return the index of the next valid char for N1
277:      *
278:      * @param int $idx Start index
279:      *
280:      * @return int
281:      */
282:     protected function getNextN1Char($idx)
283:     {
284:         $jdx = ($idx + 1);
285:         while (($jdx < $this->seq['length']) && ($this->seq['item'][$jdx]['type'] == 'NI')) {
286:             ++$jdx;
287:         }
288:         return $jdx;
289:     }
290: 
291:     /**
292:      * N2. Any remaining NIs take the embedding direction.
293:      *
294:      * @param int $idx Current character position
295:      */
296:     protected function processN2($idx)
297:     {
298:         if ($this->seq['item'][$idx]['type'] == 'NI') {
299:             $this->seq['item'][$idx]['type'] = $this->seq['edir'];
300:         }
301:     }
302: }
303: 
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy