source-class-Com.Tecnick.Unicode.Bidi

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1: <?php
  2: /**
  3:  * Bidi.php
  4:  *
  5:  * @since       2011-05-23
  6:  * @category    Library
  7:  * @package     Unicode
  8:  * @author      Nicola Asuni <info@tecnick.com>
  9:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 10:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 11:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 12:  *
 13:  * This file is part of tc-lib-unicode software library.
 14:  */
 15: 
 16: namespace Com\Tecnick\Unicode;
 17: 
 18: use \Com\Tecnick\Unicode\Exception as UnicodeException;
 19: 
 20: use \Com\Tecnick\Unicode\Convert;
 21: use \Com\Tecnick\Unicode\Bidi\StepP;
 22: use \Com\Tecnick\Unicode\Bidi\StepX;
 23: use \Com\Tecnick\Unicode\Bidi\StepXten;
 24: use \Com\Tecnick\Unicode\Bidi\StepW;
 25: use \Com\Tecnick\Unicode\Bidi\StepN;
 26: use \Com\Tecnick\Unicode\Bidi\StepI;
 27: use \Com\Tecnick\Unicode\Bidi\Shaping;
 28: use \Com\Tecnick\Unicode\Bidi\StepL;
 29: use \Com\Tecnick\Unicode\Data\Pattern as UniPattern;
 30: use \Com\Tecnick\Unicode\Data\Type as UniType;
 31: use \Com\Tecnick\Unicode\Data\Constant as UniConstant;
 32: 
 33: /**
 34:  * Com\Tecnick\Unicode\Bidi
 35:  *
 36:  * @since       2015-07-13
 37:  * @category    Library
 38:  * @package     Unicode
 39:  * @author      Nicola Asuni <info@tecnick.com>
 40:  * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 41:  * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 42:  * @link        https://github.com/tecnickcom/tc-lib-unicode
 43:  */
 44: class Bidi
 45: {
 46:     /**
 47:      * String to process
 48:      *
 49:      * @var string
 50:      */
 51:     protected $str = '';
 52: 
 53:     /**
 54:      * Array of UTF-8 chars
 55:      *
 56:      * @var array
 57:      */
 58:     protected $chrarr = array();
 59: 
 60:     /**
 61:      * Array of UTF-8 codepoints
 62:      *
 63:      * @var array
 64:      */
 65:     protected $ordarr = array();
 66: 
 67:     /**
 68:      * Processed string
 69:      *
 70:      * @var string
 71:      */
 72:     protected $bidistr = '';
 73: 
 74:     /**
 75:      * Array of processed UTF-8 chars
 76:      *
 77:      * @var array
 78:      */
 79:     protected $bidichrarr = array();
 80: 
 81:     /**
 82:      * Array of processed UTF-8 codepoints
 83:      *
 84:      * @var array
 85:      */
 86:     protected $bidiordarr = array();
 87: 
 88:     /**
 89:      * If true force processign the string in RTL mode
 90:      *
 91:      * @var bool
 92:      */
 93:     protected $forcertl = false;
 94: 
 95:     /**
 96:      * If true enable shaping
 97:      *
 98:      * @var bool
 99:      */
100:     protected $shaping = true;
101: 
102:     /**
103:      * True if the string contains arabic characters
104:      *
105:      * @var bool
106:      */
107:     protected $arabic = false;
108: 
109:     /**
110:      * Array of character data
111:      *
112:      * @var array
113:      */
114:     protected $chardata = array();
115: 
116:     /**
117:      * Convert object
118:      *
119:      * @var Convert
120:      */
121:     protected $conv;
122: 
123:     /**
124:      * Reverse the RLT substrings using the Bidirectional Algorithm
125:      * http://unicode.org/reports/tr9/
126:      *
127:      * @param string $str      String to convert (if null it will be generated from $chrarr or $ordarr)
128:      * @param array  $chrarr   Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
129:      * @param array  $ordarr   Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
130:      * @param mixed  $forcertl If 'R' forces RTL, if 'L' forces LTR
131:      * @param bool   $shaping  If true enable the shaping algorithm
132:      */
133:     public function __construct($str = null, $chrarr = null, $ordarr = null, $forcertl = false, $shaping = true)
134:     {
135:         if (($str === null) && empty($chrarr) && empty($ordarr)) {
136:             throw new UnicodeException('empty input');
137:         }
138:         $this->conv = new Convert();
139:         $this->setInput($str, $chrarr, $ordarr, $forcertl);
140: 
141:         if (!$this->isRtlMode()) {
142:             $this->bidistr = $this->str;
143:             $this->bidichrarr = $this->chrarr;
144:             $this->bidiordarr = $this->ordarr;
145:             return;
146:         }
147: 
148:         $this->shaping = ($shaping && $this->arabic);
149: 
150:         $this->process();
151:     }
152:     
153: 
154:     /**
155:      * Set Input data
156:      *
157:      * @param string $str      String to convert (if null it will be generated from $chrarr or $ordarr)
158:      * @param array  $chrarr   Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
159:      * @param array  $ordarr   Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
160:      * @param mixed  $forcertl If 'R' forces RTL, if 'L' forces LTR
161:      */
162:     protected function setInput($str = null, $chrarr = null, $ordarr = null, $forcertl = false)
163:     {
164:         if ($str === null) {
165:             if (empty($chrarr)) {
166:                 $chrarr = $this->conv->ordArrToChrArr($ordarr);
167:             }
168:             $str = implode($chrarr);
169:         } elseif (empty($chrarr)) {
170:             $chrarr = $this->conv->strToChrArr($str);
171:         }
172:         if (empty($ordarr)) {
173:             $ordarr = $this->conv->chrArrToOrdArr($chrarr);
174:         }
175: 
176:         $this->str = $str;
177:         $this->chrarr = $chrarr;
178:         $this->ordarr = $ordarr;
179:         $this->forcertl = (($forcertl === false) ? false : strtoupper($forcertl[0]));
180:     }
181: 
182:     /**
183:      * Returns the processed array of UTF-8 codepoints
184:      *
185:      * @return array
186:      */
187:     public function getOrdArray()
188:     {
189:         return $this->bidiordarr;
190:     }
191: 
192:     /**
193:      * Returns the processed array of UTF-8 chars
194:      *
195:      * @return array
196:      */
197:     public function getChrArray()
198:     {
199:         if (empty($this->bidichrarr)) {
200:             $this->bidichrarr = $this->conv->ordArrToChrArr($this->bidiordarr);
201:         }
202:         return $this->bidichrarr;
203:     }
204: 
205:     /**
206:      * Returns the number of characters in the processed string
207:      *
208:      * @return int
209:      */
210:     public function getNumChars()
211:     {
212:         return count($this->getChrArray());
213:     }
214: 
215:     /**
216:      * Returns the processed string
217:      *
218:      * @return string
219:      */
220:     public function getString()
221:     {
222:         if (empty($this->bidistr)) {
223:             $this->bidistr = implode($this->getChrArray());
224:         }
225:         return $this->bidistr;
226:     }
227: 
228:     /**
229:      * Returns an array with processed chars as keys
230:      *
231:      * @return array
232:      */
233:     public function getCharKeys()
234:     {
235:         return array_fill_keys(array_values($this->bidiordarr), true);
236:     }
237: 
238:     /**
239:      * P1. Split the text into separate paragraphs.
240:      *     A paragraph separator is kept with the previous paragraph.
241:      *
242:      * @return array
243:      */
244:     protected function getParagraphs()
245:     {
246:         
247:         $paragraph = array(0 => array());
248:         $pdx = 0; // paragraphs index
249:         foreach ($this->ordarr as $ord) {
250:             $paragraph[$pdx][] = $ord;
251:             if (isset(UniType::$uni[$ord]) && (UniType::$uni[$ord] == 'B')) {
252:                 ++$pdx;
253:                 $paragraph[$pdx] = array();
254:             }
255:         }
256:         return $paragraph;
257:     }
258: 
259:     /**
260:      * Process the string
261:      */
262:     protected function process()
263:     {
264:         // split the text into separate paragraphs.
265:         $paragraph = $this->getParagraphs();
266: 
267:         // Within each paragraph, apply all the other rules of this algorithm.
268:         foreach ($paragraph as $par) {
269:             $pel = $this->getPel($par);
270:             $stepx = new StepX($par, $pel);
271:             $stepx10 = new StepXten($stepx->getChrData(), $pel);
272:             $ilrs = $stepx10->getIsolatedLevelRunSequences();
273:             $chardata = array();
274:             foreach ($ilrs as $seq) {
275:                 $stepw = new StepW($seq);
276:                 $stepn = new StepN($stepw->getSequence());
277:                 $stepi = new StepI($stepn->getSequence());
278:                 $seq = $stepi->getSequence();
279:                 if ($this->shaping) {
280:                     $shaping = new Shaping($seq);
281:                     $seq = $shaping->getSequence();
282:                 }
283:                 $chardata = array_merge($chardata, $seq['item']);
284:             }
285:             $stepl = new StepL($chardata, $pel, (isset($seq['maxlevel']) ? $seq['maxlevel'] : 0));
286:             $chardata = $stepl->getChrData();
287:             foreach ($chardata as $chd) {
288:                 $this->bidiordarr[] = $chd['char'];
289:             }
290:             // add back the paragraph separators
291:             $lastchar = end($par);
292:             if (isset(UniType::$uni[$lastchar]) && (UniType::$uni[$lastchar] == 'B')) {
293:                 $this->bidiordarr[] = $lastchar;
294:             }
295:         }
296:     }
297: 
298:     /**
299:      * Get the paragraph embedding level
300:      *
301:      * @param array $par Paragraph
302:      *
303:      * @return int
304:      */
305:     protected function getPel($par)
306:     {
307:         if ($this->forcertl === 'R') {
308:             return 1;
309:         }
310:         if ($this->forcertl === 'L') {
311:             return 0;
312:         }
313:         $stepp = new StepP($par);
314:         return $stepp->getPel();
315:     }
316: 
317:     /**
318:      * Check if the input string contains RTL characters to process
319:      *
320:      * @return boolean
321:      */
322:     protected function isRtlMode()
323:     {
324:         $this->arabic = preg_match(UniPattern::ARABIC, $this->str);
325:         return (($this->forcertl !== false) || $this->arabic || preg_match(UniPattern::RTL, $this->str));
326:     }
327: }
328: 
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy