source-class-Com.Tecnick.Unicode.Bidi

It appears that you are using AdBlocking software. The cost of running this website is covered by advertisements. If you like it please feel free to a small amount of money to secure the future of this website.
  1:   2:   3:   4:   5:   6:   7:   8:   9:  10:  11:  12:  13:  14:  15:  16:  17:  18:  19:  20:  21:  22:  23:  24:  25:  26:  27:  28:  29:  30:  31:  32:  33:  34:  35:  36:  37:  38:  39:  40:  41:  42:  43:  44:  45:  46:  47:  48:  49:  50:  51:  52:  53:  54:  55:  56:  57:  58:  59:  60:  61:  62:  63:  64:  65:  66:  67:  68:  69:  70:  71:  72:  73:  74:  75:  76:  77:  78:  79:  80:  81:  82:  83:  84:  85:  86:  87:  88:  89:  90:  91:  92:  93:  94:  95:  96:  97:  98:  99: 100: 101: 102: 103: 104: 105: 106: 107: 108: 109: 110: 111: 112: 113: 114: 115: 116: 117: 118: 119: 120: 121: 122: 123: 124: 125: 126: 127: 128: 129: 130: 131: 132: 133: 134: 135: 136: 137: 138: 139: 140: 141: 142: 143: 144: 145: 146: 147: 148: 149: 150: 151: 152: 153: 154: 155: 156: 157: 158: 159: 160: 161: 162: 163: 164: 165: 166: 167: 168: 169: 170: 171: 172: 173: 174: 175: 176: 177: 178: 179: 180: 181: 182: 183: 184: 185: 186: 187: 188: 189: 190: 191: 192: 193: 194: 195: 196: 197: 198: 199: 200: 201: 202: 203: 204: 205: 206: 207: 208: 209: 210: 211: 212: 213: 214: 215: 216: 217: 218: 219: 220: 221: 222: 223: 224: 225: 226: 227: 228: 229: 230: 231: 232: 233: 234: 235: 236: 237: 238: 239: 240: 241: 242: 243: 244: 245: 246: 247: 248: 249: 250: 251: 252: 253: 254: 255: 256: 257: 258: 259: 260: 261: 262: 263: 264: 265: 266: 267: 268: 269: 270: 271: 272: 273: 274: 275: 276: 277: 278: 279: 280: 281: 282: 283: 284: 285: 286: 287: 288: 289: 290: 291: 292: 293: 294: 295: 296: 297: 298: 299: 300: 301: 302: 303: 304: 305: 306: 307: 308: 309: 310: 311: 312: 313: 314: 315: 316: 317: 318: 319: 320: 321: 322: 323: 324: 325: 326: 327: 328: 
<?php
/**
 * Bidi.php
 *
 * @since       2011-05-23
 * @category    Library
 * @package     Unicode
 * @author      Nicola Asuni <info@tecnick.com>
 * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 * @link        https://github.com/tecnickcom/tc-lib-unicode
 *
 * This file is part of tc-lib-unicode software library.
 */

namespace Com\Tecnick\Unicode;

use \Com\Tecnick\Unicode\Exception as UnicodeException;

use \Com\Tecnick\Unicode\Convert;
use \Com\Tecnick\Unicode\Bidi\StepP;
use \Com\Tecnick\Unicode\Bidi\StepX;
use \Com\Tecnick\Unicode\Bidi\StepXten;
use \Com\Tecnick\Unicode\Bidi\StepW;
use \Com\Tecnick\Unicode\Bidi\StepN;
use \Com\Tecnick\Unicode\Bidi\StepI;
use \Com\Tecnick\Unicode\Bidi\Shaping;
use \Com\Tecnick\Unicode\Bidi\StepL;
use \Com\Tecnick\Unicode\Data\Pattern as UniPattern;
use \Com\Tecnick\Unicode\Data\Type as UniType;
use \Com\Tecnick\Unicode\Data\Constant as UniConstant;

/**
 * Com\Tecnick\Unicode\Bidi
 *
 * @since       2015-07-13
 * @category    Library
 * @package     Unicode
 * @author      Nicola Asuni <info@tecnick.com>
 * @copyright   2011-2015 Nicola Asuni - Tecnick.com LTD
 * @license     http://www.gnu.org/copyleft/lesser.html GNU-LGPL v3 (see LICENSE.TXT)
 * @link        https://github.com/tecnickcom/tc-lib-unicode
 */
class Bidi
{
    /**
     * String to process
     *
     * @var string
     */
    protected $str = '';

    /**
     * Array of UTF-8 chars
     *
     * @var array
     */
    protected $chrarr = array();

    /**
     * Array of UTF-8 codepoints
     *
     * @var array
     */
    protected $ordarr = array();

    /**
     * Processed string
     *
     * @var string
     */
    protected $bidistr = '';

    /**
     * Array of processed UTF-8 chars
     *
     * @var array
     */
    protected $bidichrarr = array();

    /**
     * Array of processed UTF-8 codepoints
     *
     * @var array
     */
    protected $bidiordarr = array();

    /**
     * If true force processign the string in RTL mode
     *
     * @var bool
     */
    protected $forcertl = false;

    /**
     * If true enable shaping
     *
     * @var bool
     */
    protected $shaping = true;

    /**
     * True if the string contains arabic characters
     *
     * @var bool
     */
    protected $arabic = false;

    /**
     * Array of character data
     *
     * @var array
     */
    protected $chardata = array();

    /**
     * Convert object
     *
     * @var Convert
     */
    protected $conv;

    /**
     * Reverse the RLT substrings using the Bidirectional Algorithm
     * http://unicode.org/reports/tr9/
     *
     * @param string $str      String to convert (if null it will be generated from $chrarr or $ordarr)
     * @param array  $chrarr   Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
     * @param array  $ordarr   Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
     * @param mixed  $forcertl If 'R' forces RTL, if 'L' forces LTR
     * @param bool   $shaping  If true enable the shaping algorithm
     */
    public function __construct($str = null, $chrarr = null, $ordarr = null, $forcertl = false, $shaping = true)
    {
        if (($str === null) && empty($chrarr) && empty($ordarr)) {
            throw new UnicodeException('empty input');
        }
        $this->conv = new Convert();
        $this->setInput($str, $chrarr, $ordarr, $forcertl);

        if (!$this->isRtlMode()) {
            $this->bidistr = $this->str;
            $this->bidichrarr = $this->chrarr;
            $this->bidiordarr = $this->ordarr;
            return;
        }

        $this->shaping = ($shaping && $this->arabic);

        $this->process();
    }
    

    /**
     * Set Input data
     *
     * @param string $str      String to convert (if null it will be generated from $chrarr or $ordarr)
     * @param array  $chrarr   Array of UTF-8 chars (if empty it will be generated from $str or $ordarr)
     * @param array  $ordarr   Array of UTF-8 codepoints (if empty it will be generated from $str or $chrarr)
     * @param mixed  $forcertl If 'R' forces RTL, if 'L' forces LTR
     */
    protected function setInput($str = null, $chrarr = null, $ordarr = null, $forcertl = false)
    {
        if ($str === null) {
            if (empty($chrarr)) {
                $chrarr = $this->conv->ordArrToChrArr($ordarr);
            }
            $str = implode($chrarr);
        } elseif (empty($chrarr)) {
            $chrarr = $this->conv->strToChrArr($str);
        }
        if (empty($ordarr)) {
            $ordarr = $this->conv->chrArrToOrdArr($chrarr);
        }

        $this->str = $str;
        $this->chrarr = $chrarr;
        $this->ordarr = $ordarr;
        $this->forcertl = (($forcertl === false) ? false : strtoupper($forcertl[0]));
    }

    /**
     * Returns the processed array of UTF-8 codepoints
     *
     * @return array
     */
    public function getOrdArray()
    {
        return $this->bidiordarr;
    }

    /**
     * Returns the processed array of UTF-8 chars
     *
     * @return array
     */
    public function getChrArray()
    {
        if (empty($this->bidichrarr)) {
            $this->bidichrarr = $this->conv->ordArrToChrArr($this->bidiordarr);
        }
        return $this->bidichrarr;
    }

    /**
     * Returns the number of characters in the processed string
     *
     * @return int
     */
    public function getNumChars()
    {
        return count($this->getChrArray());
    }

    /**
     * Returns the processed string
     *
     * @return string
     */
    public function getString()
    {
        if (empty($this->bidistr)) {
            $this->bidistr = implode($this->getChrArray());
        }
        return $this->bidistr;
    }

    /**
     * Returns an array with processed chars as keys
     *
     * @return array
     */
    public function getCharKeys()
    {
        return array_fill_keys(array_values($this->bidiordarr), true);
    }

    /**
     * P1. Split the text into separate paragraphs.
     *     A paragraph separator is kept with the previous paragraph.
     *
     * @return array
     */
    protected function getParagraphs()
    {
        
        $paragraph = array(0 => array());
        $pdx = 0; // paragraphs index
        foreach ($this->ordarr as $ord) {
            $paragraph[$pdx][] = $ord;
            if (isset(UniType::$uni[$ord]) && (UniType::$uni[$ord] == 'B')) {
                ++$pdx;
                $paragraph[$pdx] = array();
            }
        }
        return $paragraph;
    }

    /**
     * Process the string
     */
    protected function process()
    {
        // split the text into separate paragraphs.
        $paragraph = $this->getParagraphs();

        // Within each paragraph, apply all the other rules of this algorithm.
        foreach ($paragraph as $par) {
            $pel = $this->getPel($par);
            $stepx = new StepX($par, $pel);
            $stepx10 = new StepXten($stepx->getChrData(), $pel);
            $ilrs = $stepx10->getIsolatedLevelRunSequences();
            $chardata = array();
            foreach ($ilrs as $seq) {
                $stepw = new StepW($seq);
                $stepn = new StepN($stepw->getSequence());
                $stepi = new StepI($stepn->getSequence());
                $seq = $stepi->getSequence();
                if ($this->shaping) {
                    $shaping = new Shaping($seq);
                    $seq = $shaping->getSequence();
                }
                $chardata = array_merge($chardata, $seq['item']);
            }
            $stepl = new StepL($chardata, $pel, (isset($seq['maxlevel']) ? $seq['maxlevel'] : 0));
            $chardata = $stepl->getChrData();
            foreach ($chardata as $chd) {
                $this->bidiordarr[] = $chd['char'];
            }
            // add back the paragraph separators
            $lastchar = end($par);
            if (isset(UniType::$uni[$lastchar]) && (UniType::$uni[$lastchar] == 'B')) {
                $this->bidiordarr[] = $lastchar;
            }
        }
    }

    /**
     * Get the paragraph embedding level
     *
     * @param array $par Paragraph
     *
     * @return int
     */
    protected function getPel($par)
    {
        if ($this->forcertl === 'R') {
            return 1;
        }
        if ($this->forcertl === 'L') {
            return 0;
        }
        $stepp = new StepP($par);
        return $stepp->getPel();
    }

    /**
     * Check if the input string contains RTL characters to process
     *
     * @return boolean
     */
    protected function isRtlMode()
    {
        $this->arabic = preg_match(UniPattern::ARABIC, $this->str);
        return (($this->forcertl !== false) || $this->arabic || preg_match(UniPattern::RTL, $this->str));
    }
}
 

© 2004-2017 – Nicola Asuni - Tecnick.com - All rights reserved.
about - disclaimer - privacy