source-class-Com.Tecnick.Unicode.Bidi.StepXten

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1: <?php
  2: /**
  3:  * StepXten.php
  4:  *
  5:  * @since       2011-05-23
  6:  * @category    Library
  7:  * @package     Unicode
  8:  * @author      Nicola Asuni <info@tecnick.com>
  9:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 10:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 11:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 12:  *
 13:  * This file is part of tc-lib-unicode software library.
 14:  */
 15: 
 16: namespace Com\Tecnick\Unicode\Bidi;
 17: 
 18: use \Com\Tecnick\Unicode\Data\Constant as UniConstant;
 19: 
 20: /**
 21:  * Com\Tecnick\Unicode\Bidi\StepXten
 22:  *
 23:  * @since       2015-07-13
 24:  * @category    Library
 25:  * @package     Unicode
 26:  * @author      Nicola Asuni <info@tecnick.com>
 27:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 28:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 29:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 30:  */
 31: class StepXten
 32: {
 33:     /**
 34:      * Array of characters data to return
 35:      *
 36:      * @var array
 37:      */
 38:     protected $chardata = array();
 39: 
 40:     /**
 41:      * Paragraph Embedding Level
 42:      *
 43:      * @var int
 44:      */
 45:     protected $pel = 0;
 46: 
 47:     /**
 48:      * Number of characters
 49:      *
 50:      * @var int
 51:      */
 52:     protected $numchars = 0;
 53: 
 54:     /**
 55:      * Array of Level Run sequences
 56:      *
 57:      * @var array
 58:      */
 59:     protected $runseq = array();
 60: 
 61:     /**
 62:      * Number of Level Run sequences
 63:      *
 64:      * @var int
 65:      */
 66:     protected $numrunseq = 0;
 67: 
 68:     /**
 69:      * Array of Isolated Level Run sequences
 70:      *
 71:      * @var array
 72:      */
 73:     protected $ilrs = array();
 74: 
 75:     /**
 76:      * X Steps for Bidirectional algorithm
 77:      *
 78:      * @param array  $chardata  Array of UTF-8 codepoints
 79:      * @param int    $pel       Paragraph Embedding Level
 80:      */
 81:     public function __construct($chardata, $pel)
 82:     {
 83:         $this->chardata = $chardata;
 84:         $this->numchars = count($chardata);
 85:         $this->pel = $pel;
 86:         $this->setIsolatedLevelRunSequences();
 87:     }
 88: 
 89:     /**
 90:      * Get the Isolated Run Sequences
 91:      *
 92:      * @return array
 93:      */
 94:     public function getIsolatedLevelRunSequences()
 95:     {
 96:         return $this->ilrs;
 97:     }
 98: 
 99:     /**
100:      * Get the embedded direction (L or R)
101:      *
102:      * @param int $level
103:      *
104:      * @return string
105:      */
106:     protected function getEmbeddedDirection($level)
107:     {
108:         return ((($level % 2) == 0) ? 'L' : 'R');
109:     }
110: 
111:     /**
112:      * Set Level Run Sequences
113:      */
114:     protected function setLevelRunSequences()
115:     {
116:         $start = 0;
117:         while ($start < $this->numchars) {
118:             $end = ($start + 1);
119:             while (($end < $this->numchars) && ($this->chardata[$end]['level'] == $this->chardata[$start]['level'])) {
120:                 ++$end;
121:             }
122:             --$end;
123:             $this->runseq[] = array(
124:                 'start' => $start,
125:                 'end'   => $end,
126:                 'e'     => $this->chardata[$start]['level']
127:             );
128:             ++$this->numrunseq;
129:             $start = ($end + 1);
130:         }
131:     }
132: 
133:     /**
134:      * returns true if the input char is an Isolate Initiator
135:      *
136:      * @return bool
137:      */
138:     protected function isIsolateInitiator($ord)
139:     {
140:         return (($ord == UniConstant::RLI) || ($ord == UniConstant::LRI) || ($ord == UniConstant::FSI));
141:     }
142: 
143:     /**
144:      * Set level Isolated Level Run Sequences
145:      *
146:      * @return array
147:      */
148:     protected function setIsolatedLevelRunSequences()
149:     {
150:         $this->setLevelRunSequences();
151:         $numiso = 0;
152:         foreach ($this->runseq as $idx => $seq) {
153:             // Create a new level run sequence, and initialize it to contain just that level run
154:             $isorun = array(
155:                 'e'      => $seq['e'],
156:                 'edir'   => $this->getEmbeddedDirection($seq['e']), // embedded direction
157:                 'start'  => $seq['start'], // position of the first char
158:                 'end'    => $seq['end'],   // position of the last char
159:                 'length' => ($seq['end'] - $seq['start'] + 1),
160:                 'sos'    => '', // start-of-sequence
161:                 'eos'    => '', // end-of-sequence
162:                 'item'   => array()
163:             );
164:             for ($jdx = 0; $jdx < $isorun['length']; ++$jdx) {
165:                 $isorun['item'][$jdx] = $this->chardata[($seq['start'] + $jdx)];
166:             }
167:             $endchar = $isorun['item'][($jdx - 1)]['char'];
168: 
169:             // While the level run currently last in the sequence ends with an isolate initiator that has a
170:             // matching PDI, append the level run containing the matching PDI to the sequence.
171:             // (Note that this matching PDI must be the first character of its level run.)
172:             $pdimatch = -1;
173:             if ($this->isIsolateInitiator($endchar)) {
174:                 // find the next sequence with the same level that starts with a PDI
175:                 for ($kdx = ($idx + 1); $kdx < $this->numrunseq; ++$kdx) {
176:                     if (($this->runseq[$kdx]['e'] == $isorun['e'])
177:                         && ($this->chardata[$this->runseq[$kdx]['start']]['char'] == UniConstant::PDI)
178:                     ) {
179:                         $pdimatch = $this->runseq[$kdx]['start'];
180:                         $this->chardata[$pdimatch]['pdimatch'] = $numiso;
181:                         break;
182:                     }
183:                 }
184:             }
185: 
186:             // For each level run in the paragraph whose first character is not a PDI,
187:             // or is a PDI that does not match any isolate initiator
188:             if (isset($this->chardata[$seq['start']]['pdimatch'])) {
189:                 $parent = $this->chardata[$seq['start']]['pdimatch'];
190:                 $this->ilrs[$parent]['item'] = array_merge($this->ilrs[$parent]['item'], $isorun['item']);
191:                 $this->ilrs[$parent]['length'] += $isorun['length'];
192:                 $this->ilrs[$parent]['end'] += $isorun['end'];
193:                 if ($pdimatch >= 0) {
194:                     $this->chardata[$pdimatch]['pdimatch'] = $parent;
195:                 }
196:             } else {
197:                 $this->ilrs[$numiso] = $isorun;
198:                 ++$numiso;
199:             }
200:         }
201:         $this->setStartEndOfSequence();
202:     }
203: 
204:     /**
205:      * Determine the start-of-sequence (sos) and end-of-sequence (eos) types, either L or R,
206:      * for each isolating run sequence.
207:      */
208:     protected function setStartEndOfSequence()
209:     {
210:         foreach ($this->ilrs as $key => $seq) {
211:             // For sos, compare the level of the first character in the sequence with the level of the character
212:             // preceding it in the paragraph (not counting characters removed by X9), and if there is none,
213:             // with the paragraph embedding level.
214:             $lev = $seq['item'][0]['level'];
215:             if ($seq['start'] == 0) {
216:                 $prev = $this->pel;
217:             } else {
218:                 $lastchr = $this->chardata[($seq['start'] - 1)];
219:                 $prev = $lastchr['level'];
220:             }
221:             $this->ilrs[$key]['sos'] = $this->getEmbeddedDirection(($prev > $lev) ? $prev : $lev);
222: 
223:             // For eos, compare the level of the last character in the sequence with the level of the character
224:             // following it in the paragraph (not counting characters removed by X9), and if there is none or the
225:             // last character of the sequence is an isolate initiator (lacking a matching PDI), with the paragraph
226:             // embedding level.
227:             $lastchr = end($seq['item']);
228:             $lev = $lastchr['level'];
229:             if (!isset($this->chardata[($seq['end'] + 1)]['level']) || $this->isIsolateInitiator($lastchr['char'])) {
230:                 $next = $this->pel;
231:             } else {
232:                 $next = $this->chardata[($seq['end'] + 1)]['level'];
233:             }
234:             $this->ilrs[$key]['eos'] = $this->getEmbeddedDirection(($next > $lev) ? $next : $lev);
235:             
236:             // If the higher level is odd, the sos or eos is R; otherwise, it is L.
237:         }
238:     }
239: }
240: 
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy