[ Index ] |
|
Code source de DokuWiki 2006-11-06 |
1 <?php 2 /** 3 * UTF8 helper functions 4 * 5 * @license LGPL (http://www.gnu.org/copyleft/lesser.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 9 /** 10 * check for mb_string support 11 */ 12 if(!defined('UTF8_MBSTRING')){ 13 if(function_exists('mb_substr') && !defined('UTF8_NOMBSTRING')){ 14 define('UTF8_MBSTRING',1); 15 }else{ 16 define('UTF8_MBSTRING',0); 17 } 18 } 19 20 if(UTF8_MBSTRING){ mb_internal_encoding('UTF-8'); } 21 22 23 /** 24 * URL-Encode a filename to allow unicodecharacters 25 * 26 * Slashes are not encoded 27 * 28 * When the second parameter is true the string will 29 * be encoded only if non ASCII characters are detected - 30 * This makes it safe to run it multiple times on the 31 * same string (default is true) 32 * 33 * @author Andreas Gohr <andi@splitbrain.org> 34 * @see urlencode 35 */ 36 function utf8_encodeFN($file,$safe=true){ 37 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){ 38 return $file; 39 } 40 $file = urlencode($file); 41 $file = str_replace('%2F','/',$file); 42 return $file; 43 } 44 45 /** 46 * URL-Decode a filename 47 * 48 * This is just a wrapper around urldecode 49 * 50 * @author Andreas Gohr <andi@splitbrain.org> 51 * @see urldecode 52 */ 53 function utf8_decodeFN($file){ 54 $file = urldecode($file); 55 return $file; 56 } 57 58 /** 59 * Checks if a string contains 7bit ASCII only 60 * 61 * @author Andreas Gohr <andi@splitbrain.org> 62 */ 63 function utf8_isASCII($str){ 64 for($i=0; $i<strlen($str); $i++){ 65 if(ord($str{$i}) >127) return false; 66 } 67 return true; 68 } 69 70 /** 71 * Strips all highbyte chars 72 * 73 * Returns a pure ASCII7 string 74 * 75 * @author Andreas Gohr <andi@splitbrain.org> 76 */ 77 function utf8_strip($str){ 78 $ascii = ''; 79 for($i=0; $i<strlen($str); $i++){ 80 if(ord($str{$i}) <128){ 81 $ascii .= $str{$i}; 82 } 83 } 84 return $ascii; 85 } 86 87 /** 88 * Tries to detect if a string is in Unicode encoding 89 * 90 * @author <bmorel@ssi.fr> 91 * @link http://www.php.net/manual/en/function.utf8-encode.php 92 */ 93 function utf8_check($Str) { 94 for ($i=0; $i<strlen($Str); $i++) { 95 $b = ord($Str[$i]); 96 if ($b < 0x80) continue; # 0bbbbbbb 97 elseif (($b & 0xE0) == 0xC0) $n=1; # 110bbbbb 98 elseif (($b & 0xF0) == 0xE0) $n=2; # 1110bbbb 99 elseif (($b & 0xF8) == 0xF0) $n=3; # 11110bbb 100 elseif (($b & 0xFC) == 0xF8) $n=4; # 111110bb 101 elseif (($b & 0xFE) == 0xFC) $n=5; # 1111110b 102 else return false; # Does not match any model 103 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 104 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80)) 105 return false; 106 } 107 } 108 return true; 109 } 110 111 /** 112 * Unicode aware replacement for strlen() 113 * 114 * utf8_decode() converts characters that are not in ISO-8859-1 115 * to '?', which, for the purpose of counting, is alright - It's 116 * even faster than mb_strlen. 117 * 118 * @author <chernyshevsky at hotmail dot com> 119 * @see strlen() 120 * @see utf8_decode() 121 */ 122 function utf8_strlen($string){ 123 return strlen(utf8_decode($string)); 124 } 125 126 /** 127 * UTF-8 aware alternative to substr 128 * 129 * Return part of a string given character offset (and optionally length) 130 * 131 * @author Harry Fuecks <hfuecks@gmail.com> 132 * @author Chris Smith <chris@jalakai.co.uk> 133 * @param string 134 * @param integer number of UTF-8 characters offset (from left) 135 * @param integer (optional) length in UTF-8 characters from offset 136 * @return mixed string or FALSE if failure 137 */ 138 function utf8_substr($str, $offset, $length = null) { 139 if(UTF8_MBSTRING){ 140 if( $length === null ){ 141 return mb_substr($str, $offset); 142 }else{ 143 return mb_substr($str, $offset, $length); 144 } 145 } 146 147 /* 148 * Notes: 149 * 150 * no mb string support, so we'll use pcre regex's with 'u' flag 151 * pcre only supports repetitions of less than 65536, in order to accept up to MAXINT values for 152 * offset and length, we'll repeat a group of 65535 characters when needed (ok, up to MAXINT-65536) 153 * 154 * substr documentation states false can be returned in some cases (e.g. offset > string length) 155 * mb_substr never returns false, it will return an empty string instead. 156 * 157 * calculating the number of characters in the string is a relatively expensive operation, so 158 * we only carry it out when necessary. It isn't necessary for +ve offsets and no specified length 159 */ 160 161 // cast parameters to appropriate types to avoid multiple notices/warnings 162 $str = (string)$str; // generates E_NOTICE for PHP4 objects, but not PHP5 objects 163 $offset = (int)$offset; 164 if (!is_null($length)) $length = (int)$length; 165 166 // handle trivial cases 167 if ($length === 0) return ''; 168 if ($offset < 0 && $length < 0 && $length < $offset) return ''; 169 170 $offset_pattern = ''; 171 $length_pattern = ''; 172 173 // normalise -ve offsets (we could use a tail anchored pattern, but they are horribly slow!) 174 if ($offset < 0) { 175 $strlen = strlen(utf8_decode($str)); // see notes 176 $offset = $strlen + $offset; 177 if ($offset < 0) $offset = 0; 178 } 179 180 // establish a pattern for offset, a non-captured group equal in length to offset 181 if ($offset > 0) { 182 $Ox = (int)($offset/65535); 183 $Oy = $offset%65535; 184 185 if ($Ox) $offset_pattern = '(?:.{65535}){'.$Ox.'}'; 186 $offset_pattern = '^(?:'.$offset_pattern.'.{'.$Oy.'})'; 187 } else { 188 $offset_pattern = '^'; // offset == 0; just anchor the pattern 189 } 190 191 // establish a pattern for length 192 if (is_null($length)) { 193 $length_pattern = '(.*)$'; // the rest of the string 194 } else { 195 196 if (!isset($strlen)) $strlen = strlen(utf8_decode($str)); // see notes 197 if ($offset > $strlen) return ''; // another trivial case 198 199 if ($length > 0) { 200 201 $length = min($strlen-$offset, $length); // reduce any length that would go passed the end of the string 202 203 $Lx = (int)($length/65535); 204 $Ly = $length%65535; 205 206 // +ve length requires ... a captured group of length characters 207 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 208 $length_pattern = '('.$length_pattern.'.{'.$Ly.'})'; 209 210 } else if ($length < 0) { 211 212 if ($length < ($offset - $strlen)) return ''; 213 214 $Lx = (int)((-$length)/65535); 215 $Ly = (-$length)%65535; 216 217 // -ve length requires ... capture everything except a group of -length characters 218 // anchored at the tail-end of the string 219 if ($Lx) $length_pattern = '(?:.{65535}){'.$Lx.'}'; 220 $length_pattern = '(.*)(?:'.$length_pattern.'.{'.$Ly.'})$'; 221 } 222 } 223 224 if (!preg_match('#'.$offset_pattern.$length_pattern.'#us',$str,$match)) return ''; 225 return $match[1]; 226 } 227 228 /** 229 * Unicode aware replacement for substr_replace() 230 * 231 * @author Andreas Gohr <andi@splitbrain.org> 232 * @see substr_replace() 233 */ 234 function utf8_substr_replace($string, $replacement, $start , $length=0 ){ 235 $ret = ''; 236 if($start>0) $ret .= utf8_substr($string, 0, $start); 237 $ret .= $replacement; 238 $ret .= utf8_substr($string, $start+$length); 239 return $ret; 240 } 241 242 /** 243 * Unicode aware replacement for explode 244 * 245 * @TODO support third limit arg 246 * @author Harry Fuecks <hfuecks@gmail.com> 247 * @see explode(); 248 */ 249 function utf8_explode($sep, $str) { 250 if ( $sep == '' ) { 251 trigger_error('Empty delimiter',E_USER_WARNING); 252 return FALSE; 253 } 254 255 return preg_split('!'.preg_quote($sep,'!').'!u',$str); 256 } 257 258 /** 259 * Unicode aware replacement for strrepalce() 260 * 261 * @todo support PHP5 count (fourth arg) 262 * @author Harry Fuecks <hfuecks@gmail.com> 263 * @see strreplace(); 264 */ 265 function utf8_str_replace($s,$r,$str){ 266 if(!is_array($s)){ 267 $s = '!'.preg_quote($s,'!').'!u'; 268 }else{ 269 foreach ($s as $k => $v) { 270 $s[$k] = '!'.preg_quote($v).'!u'; 271 } 272 } 273 return preg_replace($s,$r,$str); 274 } 275 276 /** 277 * Unicode aware replacement for ltrim() 278 * 279 * @author Andreas Gohr <andi@splitbrain.org> 280 * @see ltrim() 281 * @return string 282 */ 283 function utf8_ltrim($str,$charlist=''){ 284 if($charlist == '') return ltrim($str); 285 286 //quote charlist for use in a characterclass 287 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\$1}',$charlist); 288 289 return preg_replace('/^['.$charlist.']+/u','',$str); 290 } 291 292 /** 293 * Unicode aware replacement for rtrim() 294 * 295 * @author Andreas Gohr <andi@splitbrain.org> 296 * @see rtrim() 297 * @return string 298 */ 299 function utf8_rtrim($str,$charlist=''){ 300 if($charlist == '') return rtrim($str); 301 302 //quote charlist for use in a characterclass 303 $charlist = preg_replace('!([\\\\\\-\\]\\[/])!','\\\$1}',$charlist); 304 305 return preg_replace('/['.$charlist.']+$/u','',$str); 306 } 307 308 /** 309 * Unicode aware replacement for trim() 310 * 311 * @author Andreas Gohr <andi@splitbrain.org> 312 * @see trim() 313 * @return string 314 */ 315 function utf8_trim($str,$charlist='') { 316 if($charlist == '') return trim($str); 317 318 return utf8_ltrim(utf8_rtrim($str)); 319 } 320 321 322 /** 323 * This is a unicode aware replacement for strtolower() 324 * 325 * Uses mb_string extension if available 326 * 327 * @author Andreas Gohr <andi@splitbrain.org> 328 * @see strtolower() 329 * @see utf8_strtoupper() 330 */ 331 function utf8_strtolower($string){ 332 if(UTF8_MBSTRING) return mb_strtolower($string,'utf-8'); 333 334 global $UTF8_UPPER_TO_LOWER; 335 $uni = utf8_to_unicode($string); 336 $cnt = count($uni); 337 for ($i=0; $i < $cnt; $i++){ 338 if($UTF8_UPPER_TO_LOWER[$uni[$i]]){ 339 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]]; 340 } 341 } 342 return unicode_to_utf8($uni); 343 } 344 345 /** 346 * This is a unicode aware replacement for strtoupper() 347 * 348 * Uses mb_string extension if available 349 * 350 * @author Andreas Gohr <andi@splitbrain.org> 351 * @see strtoupper() 352 * @see utf8_strtoupper() 353 */ 354 function utf8_strtoupper($string){ 355 if(UTF8_MBSTRING) return mb_strtoupper($string,'utf-8'); 356 357 global $UTF8_LOWER_TO_UPPER; 358 $uni = utf8_to_unicode($string); 359 $cnt = count($uni); 360 for ($i=0; $i < $cnt; $i++){ 361 if($UTF8_LOWER_TO_UPPER[$uni[$i]]){ 362 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]]; 363 } 364 } 365 return unicode_to_utf8($uni); 366 } 367 368 /** 369 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents 370 * 371 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1) 372 * letters. Default is to deaccent both cases ($case = 0) 373 * 374 * @author Andreas Gohr <andi@splitbrain.org> 375 */ 376 function utf8_deaccent($string,$case=0){ 377 if($case <= 0){ 378 global $UTF8_LOWER_ACCENTS; 379 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string); 380 } 381 if($case >= 0){ 382 global $UTF8_UPPER_ACCENTS; 383 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string); 384 } 385 return $string; 386 } 387 388 /** 389 * Romanize a non-latin string 390 * 391 * @author Andreas Gohr <andi@splitbrain.org> 392 */ 393 function utf8_romanize($string){ 394 if(utf8_isASCII($string)) return $string; //nothing to do 395 396 global $UTF8_ROMANIZATION; 397 return strtr($string,$UTF8_ROMANIZATION); 398 } 399 400 /** 401 * Removes special characters (nonalphanumeric) from a UTF-8 string 402 * 403 * This function adds the controlchars 0x00 to 0x19 to the array of 404 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS) 405 * 406 * @author Andreas Gohr <andi@splitbrain.org> 407 * @param string $string The UTF8 string to strip of special chars 408 * @param string $repl Replace special with this string 409 * @param string $additional Additional chars to strip (used in regexp char class) 410 */ 411 function utf8_stripspecials($string,$repl='',$additional=''){ 412 global $UTF8_SPECIAL_CHARS; 413 global $UTF8_SPECIAL_CHARS2; 414 415 static $specials = null; 416 if(is_null($specials)){ 417 # $specials = preg_quote(unicode_to_utf8($UTF8_SPECIAL_CHARS), '/'); 418 $specials = preg_quote($UTF8_SPECIAL_CHARS2, '/'); 419 } 420 421 return preg_replace('/['.$additional.'\x00-\x19'.$specials.']/u',$repl,$string); 422 } 423 424 /** 425 * This is an Unicode aware replacement for strpos 426 * 427 * Uses mb_string extension if available 428 * 429 * @author Harry Fuecks <hfuecks@gmail.com> 430 * @see strpos() 431 */ 432 function utf8_strpos($haystack, $needle,$offset=0) { 433 if(UTF8_MBSTRING) return mb_strpos($haystack,$needle,$offset,'utf-8'); 434 435 if(!$offset){ 436 $ar = utf8_explode($needle, $haystack); 437 if ( count($ar) > 1 ) { 438 return utf8_strlen($ar[0]); 439 } 440 return false; 441 }else{ 442 if ( !is_int($offset) ) { 443 trigger_error('Offset must be an integer',E_USER_WARNING); 444 return false; 445 } 446 447 $haystack = utf8_substr($haystack, $offset); 448 449 if ( false !== ($pos = utf8_strpos($haystack,$needle))){ 450 return $pos + $offset; 451 } 452 return false; 453 } 454 } 455 456 /** 457 * Encodes UTF-8 characters to HTML entities 458 * 459 * @author <vpribish at shopping dot com> 460 * @link http://www.php.net/manual/en/function.utf8-decode.php 461 */ 462 function utf8_tohtml ($str) { 463 $ret = ''; 464 $max = strlen($str); 465 $last = 0; // keeps the index of the last regular character 466 for ($i=0; $i<$max; $i++) { 467 $c = $str{$i}; 468 $c1 = ord($c); 469 if ($c1>>5 == 6) { // 110x xxxx, 110 prefix for 2 bytes unicode 470 $ret .= substr($str, $last, $i-$last); // append all the regular characters we've passed 471 $c1 &= 31; // remove the 3 bit two bytes prefix 472 $c2 = ord($str{++$i}); // the next byte 473 $c2 &= 63; // remove the 2 bit trailing byte prefix 474 $c2 |= (($c1 & 3) << 6); // last 2 bits of c1 become first 2 of c2 475 $c1 >>= 2; // c1 shifts 2 to the right 476 $ret .= '&#' . ($c1 * 100 + $c2) . ';'; // this is the fastest string concatenation 477 $last = $i+1; 478 } 479 } 480 return $ret . substr($str, $last, $i); // append the last batch of regular characters 481 } 482 483 /** 484 * Takes an UTF-8 string and returns an array of ints representing the 485 * Unicode characters. Astral planes are supported ie. the ints in the 486 * output can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 487 * are not allowed. 488 * 489 * If $strict is set to true the function returns false if the input 490 * string isn't a valid UTF-8 octet sequence and raises a PHP error at 491 * level E_USER_WARNING 492 * 493 * Note: this function has been modified slightly in this library to 494 * trigger errors on encountering bad bytes 495 * 496 * @author <hsivonen@iki.fi> 497 * @author Harry Fuecks <hfuecks@gmail.com> 498 * @param string UTF-8 encoded string 499 * @param boolean Check for invalid sequences? 500 * @return mixed array of unicode code points or FALSE if UTF-8 invalid 501 * @see unicode_to_utf8 502 * @link http://hsivonen.iki.fi/php-utf8/ 503 * @link http://sourceforge.net/projects/phputf8/ 504 */ 505 function utf8_to_unicode($str,$strict=false) { 506 $mState = 0; // cached expected number of octets after the current octet 507 // until the beginning of the next UTF8 character sequence 508 $mUcs4 = 0; // cached Unicode character 509 $mBytes = 1; // cached expected number of octets in the current sequence 510 511 $out = array(); 512 513 $len = strlen($str); 514 515 for($i = 0; $i < $len; $i++) { 516 517 $in = ord($str{$i}); 518 519 if ( $mState == 0) { 520 521 // When mState is zero we expect either a US-ASCII character or a 522 // multi-octet sequence. 523 if (0 == (0x80 & ($in))) { 524 // US-ASCII, pass straight through. 525 $out[] = $in; 526 $mBytes = 1; 527 528 } else if (0xC0 == (0xE0 & ($in))) { 529 // First octet of 2 octet sequence 530 $mUcs4 = ($in); 531 $mUcs4 = ($mUcs4 & 0x1F) << 6; 532 $mState = 1; 533 $mBytes = 2; 534 535 } else if (0xE0 == (0xF0 & ($in))) { 536 // First octet of 3 octet sequence 537 $mUcs4 = ($in); 538 $mUcs4 = ($mUcs4 & 0x0F) << 12; 539 $mState = 2; 540 $mBytes = 3; 541 542 } else if (0xF0 == (0xF8 & ($in))) { 543 // First octet of 4 octet sequence 544 $mUcs4 = ($in); 545 $mUcs4 = ($mUcs4 & 0x07) << 18; 546 $mState = 3; 547 $mBytes = 4; 548 549 } else if (0xF8 == (0xFC & ($in))) { 550 /* First octet of 5 octet sequence. 551 * 552 * This is illegal because the encoded codepoint must be either 553 * (a) not the shortest form or 554 * (b) outside the Unicode range of 0-0x10FFFF. 555 * Rather than trying to resynchronize, we will carry on until the end 556 * of the sequence and let the later error handling code catch it. 557 */ 558 $mUcs4 = ($in); 559 $mUcs4 = ($mUcs4 & 0x03) << 24; 560 $mState = 4; 561 $mBytes = 5; 562 563 } else if (0xFC == (0xFE & ($in))) { 564 // First octet of 6 octet sequence, see comments for 5 octet sequence. 565 $mUcs4 = ($in); 566 $mUcs4 = ($mUcs4 & 1) << 30; 567 $mState = 5; 568 $mBytes = 6; 569 570 } elseif($strict) { 571 /* Current octet is neither in the US-ASCII range nor a legal first 572 * octet of a multi-octet sequence. 573 */ 574 trigger_error( 575 'utf8_to_unicode: Illegal sequence identifier '. 576 'in UTF-8 at byte '.$i, 577 E_USER_WARNING 578 ); 579 return FALSE; 580 581 } 582 583 } else { 584 585 // When mState is non-zero, we expect a continuation of the multi-octet 586 // sequence 587 if (0x80 == (0xC0 & ($in))) { 588 589 // Legal continuation. 590 $shift = ($mState - 1) * 6; 591 $tmp = $in; 592 $tmp = ($tmp & 0x0000003F) << $shift; 593 $mUcs4 |= $tmp; 594 595 /** 596 * End of the multi-octet sequence. mUcs4 now contains the final 597 * Unicode codepoint to be output 598 */ 599 if (0 == --$mState) { 600 601 /* 602 * Check for illegal sequences and codepoints. 603 */ 604 // From Unicode 3.1, non-shortest form is illegal 605 if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || 606 ((3 == $mBytes) && ($mUcs4 < 0x0800)) || 607 ((4 == $mBytes) && ($mUcs4 < 0x10000)) || 608 (4 < $mBytes) || 609 // From Unicode 3.2, surrogate characters are illegal 610 (($mUcs4 & 0xFFFFF800) == 0xD800) || 611 // Codepoints outside the Unicode range are illegal 612 ($mUcs4 > 0x10FFFF)) { 613 614 if($strict){ 615 trigger_error( 616 'utf8_to_unicode: Illegal sequence or codepoint '. 617 'in UTF-8 at byte '.$i, 618 E_USER_WARNING 619 ); 620 621 return FALSE; 622 } 623 624 } 625 626 if (0xFEFF != $mUcs4) { 627 // BOM is legal but we don't want to output it 628 $out[] = $mUcs4; 629 } 630 631 //initialize UTF8 cache 632 $mState = 0; 633 $mUcs4 = 0; 634 $mBytes = 1; 635 } 636 637 } elseif($strict) { 638 /** 639 *((0xC0 & (*in) != 0x80) && (mState != 0)) 640 * Incomplete multi-octet sequence. 641 */ 642 trigger_error( 643 'utf8_to_unicode: Incomplete multi-octet '. 644 ' sequence in UTF-8 at byte '.$i, 645 E_USER_WARNING 646 ); 647 648 return FALSE; 649 } 650 } 651 } 652 return $out; 653 } 654 655 /** 656 * Takes an array of ints representing the Unicode characters and returns 657 * a UTF-8 string. Astral planes are supported ie. the ints in the 658 * input can be > 0xFFFF. Occurrances of the BOM are ignored. Surrogates 659 * are not allowed. 660 * 661 * If $strict is set to true the function returns false if the input 662 * array contains ints that represent surrogates or are outside the 663 * Unicode range and raises a PHP error at level E_USER_WARNING 664 * 665 * Note: this function has been modified slightly in this library to use 666 * output buffering to concatenate the UTF-8 string (faster) as well as 667 * reference the array by it's keys 668 * 669 * @param array of unicode code points representing a string 670 * @param boolean Check for invalid sequences? 671 * @return mixed UTF-8 string or FALSE if array contains invalid code points 672 * @author <hsivonen@iki.fi> 673 * @author Harry Fuecks <hfuecks@gmail.com> 674 * @see utf8_to_unicode 675 * @link http://hsivonen.iki.fi/php-utf8/ 676 * @link http://sourceforge.net/projects/phputf8/ 677 */ 678 function unicode_to_utf8($arr,$strict=false) { 679 if (!is_array($arr)) return ''; 680 ob_start(); 681 682 foreach (array_keys($arr) as $k) { 683 684 # ASCII range (including control chars) 685 if ( ($arr[$k] >= 0) && ($arr[$k] <= 0x007f) ) { 686 687 echo chr($arr[$k]); 688 689 # 2 byte sequence 690 } else if ($arr[$k] <= 0x07ff) { 691 692 echo chr(0xc0 | ($arr[$k] >> 6)); 693 echo chr(0x80 | ($arr[$k] & 0x003f)); 694 695 # Byte order mark (skip) 696 } else if($arr[$k] == 0xFEFF) { 697 698 // nop -- zap the BOM 699 700 # Test for illegal surrogates 701 } else if ($arr[$k] >= 0xD800 && $arr[$k] <= 0xDFFF) { 702 703 // found a surrogate 704 if($strict){ 705 trigger_error( 706 'unicode_to_utf8: Illegal surrogate '. 707 'at index: '.$k.', value: '.$arr[$k], 708 E_USER_WARNING 709 ); 710 return FALSE; 711 } 712 713 # 3 byte sequence 714 } else if ($arr[$k] <= 0xffff) { 715 716 echo chr(0xe0 | ($arr[$k] >> 12)); 717 echo chr(0x80 | (($arr[$k] >> 6) & 0x003f)); 718 echo chr(0x80 | ($arr[$k] & 0x003f)); 719 720 # 4 byte sequence 721 } else if ($arr[$k] <= 0x10ffff) { 722 723 echo chr(0xf0 | ($arr[$k] >> 18)); 724 echo chr(0x80 | (($arr[$k] >> 12) & 0x3f)); 725 echo chr(0x80 | (($arr[$k] >> 6) & 0x3f)); 726 echo chr(0x80 | ($arr[$k] & 0x3f)); 727 728 } elseif($strict) { 729 730 trigger_error( 731 'unicode_to_utf8: Codepoint out of Unicode range '. 732 'at index: '.$k.', value: '.$arr[$k], 733 E_USER_WARNING 734 ); 735 736 // out of range 737 return FALSE; 738 } 739 } 740 741 $result = ob_get_contents(); 742 ob_end_clean(); 743 return $result; 744 } 745 746 /** 747 * UTF-8 to UTF-16BE conversion. 748 * 749 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 750 */ 751 function utf8_to_utf16be(&$str, $bom = false) { 752 $out = $bom ? "\xFE\xFF" : ''; 753 if(UTF8_MBSTRING) return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8'); 754 755 $uni = utf8_to_unicode($str); 756 foreach($uni as $cp){ 757 $out .= pack('n',$cp); 758 } 759 return $out; 760 } 761 762 /** 763 * UTF-8 to UTF-16BE conversion. 764 * 765 * Maybe really UCS-2 without mb_string due to utf8_to_unicode limits 766 */ 767 function utf16be_to_utf8(&$str) { 768 $uni = unpack('n*',$str); 769 return unicode_to_utf8($uni); 770 } 771 772 /** 773 * Replace bad bytes with an alternative character 774 * 775 * ASCII character is recommended for replacement char 776 * 777 * PCRE Pattern to locate bad bytes in a UTF-8 string 778 * Comes from W3 FAQ: Multilingual Forms 779 * Note: modified to include full ASCII range including control chars 780 * 781 * @author Harry Fuecks <hfuecks@gmail.com> 782 * @see http://www.w3.org/International/questions/qa-forms-utf-8 783 * @param string to search 784 * @param string to replace bad bytes with (defaults to '?') - use ASCII 785 * @return string 786 */ 787 function utf8_bad_replace($str, $replace = '') { 788 $UTF8_BAD = 789 '([\x00-\x7F]'. # ASCII (including control chars) 790 '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte 791 '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs 792 '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte 793 '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates 794 '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 795 '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 796 '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 797 '|(.{1}))'; # invalid byte 798 ob_start(); 799 while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { 800 if ( !isset($matches[2])) { 801 echo $matches[0]; 802 } else { 803 echo $replace; 804 } 805 $str = substr($str,strlen($matches[0])); 806 } 807 $result = ob_get_contents(); 808 ob_end_clean(); 809 return $result; 810 } 811 812 /** 813 * adjust a byte index into a utf8 string to a utf8 character boundary 814 * 815 * @param $str string utf8 character string 816 * @param $i int byte index into $str 817 * @param $next bool direction to search for boundary, 818 * false = up (current character) 819 * true = down (next character) 820 * 821 * @return int byte index into $str now pointing to a utf8 character boundary 822 * 823 * @author chris smith <chris@jalakai.co.uk> 824 */ 825 function utf8_correctIdx(&$str,$i,$next=false) { 826 827 if ($i <= 0) return 0; 828 829 $limit = strlen($str); 830 if ($i>=$limit) return $limit; 831 832 if ($next) { 833 while (($i<$limit) && ((ord($str[$i]) & 0xC0) == 0x80)) $i++; 834 } else { 835 while ($i && ((ord($str[$i]) & 0xC0) == 0x80)) $i--; 836 } 837 838 return $i; 839 } 840 841 // only needed if no mb_string available 842 if(!UTF8_MBSTRING){ 843 844 /** 845 * UTF-8 Case lookup table 846 * 847 * This lookuptable defines the upper case letters to their correspponding 848 * lower case letter in UTF-8 849 * 850 * @author Andreas Gohr <andi@splitbrain.org> 851 */ 852 global $UTF8_LOWER_TO_UPPER; 853 $UTF8_LOWER_TO_UPPER = array( 854 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042, 855 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100, 856 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393, 857 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C, 858 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F, 859 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E, 860 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3, 861 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A, 862 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9, 863 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C, 864 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4, 865 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164, 866 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156, 867 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118, 868 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128, 869 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428, 870 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055, 871 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A, 872 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC, 873 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0, 874 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D, 875 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0, 876 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5, 877 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA, 878 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045, 879 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F, 880 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048, 881 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6, 882 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407, 883 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395, 884 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396, 885 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051, 886 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408, 887 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F, 888 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126, 889 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C, 890 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E, 891 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB, 892 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421, 893 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A, 894 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102, 895 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9, 896 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122, 897 ); 898 899 /** 900 * UTF-8 Case lookup table 901 * 902 * This lookuptable defines the lower case letters to their correspponding 903 * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER) 904 * 905 * @author Andreas Gohr <andi@splitbrain.org> 906 */ 907 global $UTF8_UPPER_TO_LOWER; 908 $UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER); 909 910 } // end of case lookup tables 911 912 913 /** 914 * UTF-8 lookup table for lower case accented letters 915 * 916 * This lookuptable defines replacements for accented characters from the ASCII-7 917 * range. This are lower case letters only. 918 * 919 * @author Andreas Gohr <andi@splitbrain.org> 920 * @see utf8_deaccent() 921 */ 922 global $UTF8_LOWER_ACCENTS; 923 $UTF8_LOWER_ACCENTS = array( 924 'à ' => 'a', 'ô' => 'o', 'Ä' => 'd', 'ḟ' => 'f', 'ë' => 'e', 'Å¡' => 's', 'Æ¡' => 'o', 925 'ß' => 'ss', 'ă' => 'a', 'Å™' => 'r', 'È›' => 't', 'ň' => 'n', 'Ä' => 'a', 'Ä·' => 'k', 926 'Å' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'á¹—' => 'p', 'ó' => 'o', 927 'ú' => 'u', 'Ä›' => 'e', 'é' => 'e', 'ç' => 'c', 'áº' => 'w', 'Ä‹' => 'c', 'õ' => 'o', 928 'ṡ' => 's', 'ø' => 'o', 'Ä£' => 'g', 'ŧ' => 't', 'È™' => 's', 'Ä—' => 'e', 'ĉ' => 'c', 929 'Å›' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'Ä™' => 'e', 'ŵ' => 'w', 'ṫ' => 't', 930 'Å«' => 'u', 'Ä' => 'c', 'ö' => 'oe', 'è' => 'e', 'Å·' => 'y', 'Ä…' => 'a', 'Å‚' => 'l', 931 'ų' => 'u', 'ů' => 'u', 'ÅŸ' => 's', 'ÄŸ' => 'g', 'ļ' => 'l', 'Æ’' => 'f', 'ž' => 'z', 932 'ẃ' => 'w', 'ḃ' => 'b', 'Ã¥' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'Å¥' => 't', 933 'Å—' => 'r', 'ä' => 'ae', 'Ã' => 'i', 'Å•' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o', 934 'Ä“' => 'e', 'ñ' => 'n', 'Å„' => 'n', 'Ä¥' => 'h', 'Ä' => 'g', 'Ä‘' => 'd', 'ĵ' => 'j', 935 'ÿ' => 'y', 'Å©' => 'u', 'Å' => 'u', 'ư' => 'u', 'Å£' => 't', 'ý' => 'y', 'Å‘' => 'o', 936 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'Ä«' => 'i', 'ã' => 'a', 'Ä¡' => 'g', 937 'á¹' => 'm', 'Å' => 'o', 'Ä©' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a', 938 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u', 'Ä•' => 'e', 939 ); 940 941 /** 942 * UTF-8 lookup table for upper case accented letters 943 * 944 * This lookuptable defines replacements for accented characters from the ASCII-7 945 * range. This are upper case letters only. 946 * 947 * @author Andreas Gohr <andi@splitbrain.org> 948 * @see utf8_deaccent() 949 */ 950 global $UTF8_UPPER_ACCENTS; 951 $UTF8_UPPER_ACCENTS = array( 952 'À' => 'A', 'Ô' => 'O', 'ÄŽ' => 'D', 'Ḟ' => 'F', 'Ë' => 'E', 'Å ' => 'S', 'Æ ' => 'O', 953 'Ä‚' => 'A', 'Ř' => 'R', 'Èš' => 'T', 'Ň' => 'N', 'Ä€' => 'A', 'Ķ' => 'K', 954 'Åœ' => 'S', 'Ỳ' => 'Y', 'Å…' => 'N', 'Ĺ' => 'L', 'Ħ' => 'H', 'á¹–' => 'P', 'Ó' => 'O', 955 'Ú' => 'U', 'Äš' => 'E', 'É' => 'E', 'Ç' => 'C', 'Ẁ' => 'W', 'ÄŠ' => 'C', 'Õ' => 'O', 956 'á¹ ' => 'S', 'Ø' => 'O', 'Ä¢' => 'G', 'Ŧ' => 'T', 'Ș' => 'S', 'Ä–' => 'E', 'Ĉ' => 'C', 957 'Åš' => 'S', 'ÃŽ' => 'I', 'Ű' => 'U', 'Ć' => 'C', 'Ę' => 'E', 'Å´' => 'W', 'Ṫ' => 'T', 958 'Ū' => 'U', 'ÄŒ' => 'C', 'Ö' => 'Oe', 'È' => 'E', 'Ŷ' => 'Y', 'Ä„' => 'A', 'Å' => 'L', 959 'Ų' => 'U', 'Å®' => 'U', 'Åž' => 'S', 'Äž' => 'G', 'Ä»' => 'L', 'Æ‘' => 'F', 'Ž' => 'Z', 960 'Ẃ' => 'W', 'Ḃ' => 'B', 'Ã…' => 'A', 'ÃŒ' => 'I', 'Ã' => 'I', 'Ḋ' => 'D', 'Ť' => 'T', 961 'Å–' => 'R', 'Ä' => 'Ae', 'Ã' => 'I', 'Å”' => 'R', 'Ê' => 'E', 'Ü' => 'Ue', 'Ã’' => 'O', 962 'Ä’' => 'E', 'Ñ' => 'N', 'Ń' => 'N', 'Ĥ' => 'H', 'Äœ' => 'G', 'Ä' => 'D', 'Ä´' => 'J', 963 'Ÿ' => 'Y', 'Ũ' => 'U', 'Ŭ' => 'U', 'Ư' => 'U', 'Å¢' => 'T', 'Ã' => 'Y', 'Å' => 'O', 964 'Â' => 'A', 'Ľ' => 'L', 'Ẅ' => 'W', 'Å»' => 'Z', 'Ī' => 'I', 'Ã' => 'A', 'Ä ' => 'G', 965 'á¹€' => 'M', 'ÅŒ' => 'O', 'Ĩ' => 'I', 'Ù' => 'U', 'Ä®' => 'I', 'Ź' => 'Z', 'Ã' => 'A', 966 'Û' => 'U', 'Þ' => 'Th', 'Ã' => 'Dh', 'Æ' => 'Ae', 'Ä”' => 'E', 967 ); 968 969 /** 970 * UTF-8 array of common special characters 971 * 972 * This array should contain all special characters (not a letter or digit) 973 * defined in the various local charsets - it's not a complete list of non-alphanum 974 * characters in UTF-8. It's not perfect but should match most cases of special 975 * chars. 976 * 977 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is! 978 * These chars are _not_ in the array either: _ (0x5f), : 0x3a, . 0x2e, - 0x2d, * 0x2a 979 * 980 * @author Andreas Gohr <andi@splitbrain.org> 981 * @see utf8_stripspecials() 982 */ 983 global $UTF8_SPECIAL_CHARS; 984 $UTF8_SPECIAL_CHARS = array( 985 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023, 986 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002b, 0x002c, 987 0x002f, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b, 988 0x005c, 0x005d, 0x005e, 0x0060, 0x007b, 0x007c, 0x007d, 0x007e, 989 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088, 990 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092, 991 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c, 992 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6, 993 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0, 994 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba, 995 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9, 996 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384, 997 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1, 998 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc, 999 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c, 1000 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651, 1001 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015, 1002 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022, 1003 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab, 1004 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193, 1005 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202, 1006 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212, 1007 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229, 1008 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265, 1009 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310, 1010 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514, 1011 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553, 1012 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d, 1013 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567, 1014 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590, 1015 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7, 1016 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702, 1017 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f, 1018 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719, 1019 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723, 1020 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e, 1021 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738, 1022 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742, 1023 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d, 1024 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c, 1025 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f, 1026 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e, 1027 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8, 1028 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3, 1029 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd, 1030 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc, 1031 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6, 1032 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0, 1033 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa, 1034 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d, 1035 ); 1036 1037 // utf8 version of above data 1038 global $UTF8_SPECIAL_CHARS2; 1039 $UTF8_SPECIAL_CHARS2 = 1040 ' !"#$%&\'()+,/;<=>?@[\]^`{|}~€Â‚ƒ„…†‡ˆ‰Š‹ŒÂŽ‘’“”•�'. 1041 '�—˜™š›œÂžŸ ¡¢£¤¥¦§¨©ª«¬Â®¯°±²³´µ¶·¸¹º»¼½�'. 1042 '�¿×÷ˇ˘˙˚˛˜ËÌ€Ị̀̃̉΄΅·βφϑϒϕϖְֱֲֳִֵֶַָֹֻּֽ־ֿ�'. 1043 '�×ׂ׃׳״،؛؟ـًٌÙÙŽÙÙّْ٪฿‌â€â€Žâ€â€“—―‗‘’‚“â€ï¿½'. 1044 '��†‡•…‰′″‹›â„₧₪₫€№℘™Ωℵâ†â†‘→↓↔↕↵'. 1045 'â‡â‡‘⇒⇓⇔∀∂∃∅∆∇∈∉∋âˆâˆ‘−∕∗∙√âˆâˆžâˆ ∧∨�'. 1046 '�∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅âŒâŒ ⌡〈〉⑩─�'. 1047 '��┌â”└┘├┤┬┴┼â•║╒╓╔╕╖╗╘╙╚╛╜â•╞╟╠'. 1048 '╡╢╣╤╥╦╧╨╩╪╫╬▀▄█▌â–░▒▓■▲▼◆◊â—�'. 1049 '�★☎☛☞♠♣♥♦âœâœ‚✃✄✆✇✈✉✌âœâœŽâœâœâœ‘✒✓✔✕�'. 1050 '��✗✘✙✚✛✜âœâœžâœŸâœ ✡✢✣✤✥✦✧✩✪✫✬âœâœ®âœ¯âœ°âœ±'. 1051 '✲✳✴✵✶✷✸✹✺✻✼✽✾✿â€ââ‚âƒâ„â…â†â‡âˆâ‰âŠâ‹ï¿½'. 1052 '�âââ‘â’â–â˜â™âšâ›âœââžâ¡â¢â£â¤â¥â¦â§â¿âž‰âž“➔➘➙➚�'. 1053 '��➜âžâžžâžŸâž ➡➢➣➤➥➦➧➨➩➪➫➬âžâž®âž¯âž±âž²âž³âž´âžµâž¶'. 1054 '➷➸➹➺➻➼➽➾ï£ï£žï£Ÿï£ �'. 1055 '�ï£ï£®ï£¯ï£°ï£±ï£²ï£³ï£´ï£µï£¶ï£·ï£¸ï£¹ï£ºï£»ï£¼ï£½ï£¾ï¹¼ï¹½'; 1056 1057 /** 1058 * Romanization lookup table 1059 * 1060 * This lookup tables provides a way to transform strings written in a language 1061 * different from the ones based upon latin letters into plain ASCII. 1062 * 1063 * Please note: this is not a scientific transliteration table. It only works 1064 * oneway from nonlatin to ASCII and it works by simple character replacement 1065 * only. Specialities of each language are not supported. 1066 * 1067 * @author Andreas Gohr <andi@splitbrain.org> 1068 * @author Vitaly Blokhin <vitinfo@vitn.com> 1069 * @link http://www.uconv.com/translit.htm 1070 * @author Bisqwit <bisqwit@iki.fi> 1071 * @link http://kanjidict.stc.cx/hiragana.php?src=2 1072 * @link http://www.translatum.gr/converter/greek-transliteration.htm 1073 * @link http://en.wikipedia.org/wiki/Royal_Thai_General_System_of_Transcription 1074 * @link http://www.btranslations.com/resources/romanization/korean.asp 1075 */ 1076 global $UTF8_ROMANIZATION; 1077 $UTF8_ROMANIZATION = array( 1078 //russian cyrillic 1079 'а'=>'a','Ð'=>'A','б'=>'b','Б'=>'B','в'=>'v','Ð’'=>'V','г'=>'g','Г'=>'G', 1080 'д'=>'d','Д'=>'D','е'=>'e','Е'=>'E','Ñ‘'=>'jo','Ð'=>'Jo','ж'=>'zh','Ж'=>'Zh', 1081 'з'=>'z','З'=>'Z','и'=>'i','И'=>'I','й'=>'j','Й'=>'J','к'=>'k','К'=>'K', 1082 'л'=>'l','Л'=>'L','м'=>'m','М'=>'M','н'=>'n','Ð'=>'N','о'=>'o','О'=>'O', 1083 'п'=>'p','П'=>'P','Ñ€'=>'r','Ð '=>'R','Ñ'=>'s','С'=>'S','Ñ‚'=>'t','Т'=>'T', 1084 'у'=>'u','У'=>'U','Ñ„'=>'f','Ф'=>'F','Ñ…'=>'x','Ð¥'=>'X','ц'=>'c','Ц'=>'C', 1085 'ч'=>'ch','Ч'=>'Ch','ш'=>'sh','Ш'=>'Sh','щ'=>'sch','Щ'=>'Sch','ÑŠ'=>'', 1086 'Ъ'=>'','Ñ‹'=>'y','Ы'=>'Y','ÑŒ'=>'','Ь'=>'','Ñ'=>'eh','Ð'=>'Eh','ÑŽ'=>'ju', 1087 'Ю'=>'Ju','Ñ'=>'ja','Я'=>'Ja', 1088 // Ukrainian cyrillic 1089 'Ò'=>'Gh','Ò‘'=>'gh','Є'=>'Je','Ñ”'=>'je','І'=>'I','Ñ–'=>'i','Ї'=>'Ji','Ñ—'=>'ji', 1090 // Georgian 1091 'áƒ'=>'a','ბ'=>'b','გ'=>'g','დ'=>'d','ე'=>'e','ვ'=>'v','ზ'=>'z','თ'=>'th', 1092 'ი'=>'i','კ'=>'p','ლ'=>'l','მ'=>'m','ნ'=>'n','áƒ'=>'o','პ'=>'p','ჟ'=>'zh', 1093 'რ'=>'r','ს'=>'s','ტ'=>'t','უ'=>'u','ფ'=>'ph','ქ'=>'kh','ღ'=>'gh','ყ'=>'q', 1094 'შ'=>'sh','ჩ'=>'ch','ც'=>'c','ძ'=>'dh','წ'=>'w','áƒ'=>'j','ხ'=>'x','ჯ'=>'jh', 1095 'ჰ'=>'xh', 1096 //Sanskrit 1097 'अ'=>'a','आ'=>'ah','इ'=>'i','ई'=>'ih','उ'=>'u','ऊ'=>'uh','ऋ'=>'ry', 1098 'ॠ'=>'ryh','ऌ'=>'ly','ॡ'=>'lyh','à¤'=>'e','à¤'=>'ay','ओ'=>'o','औ'=>'aw', 1099 'अं'=>'amh','अः'=>'aq','क'=>'k','ख'=>'kh','ग'=>'g','घ'=>'gh','ङ'=>'nh', 1100 'च'=>'c','छ'=>'ch','ज'=>'j','à¤'=>'jh','ञ'=>'ny','ट'=>'tq','ठ'=>'tqh', 1101 'ड'=>'dq','ढ'=>'dqh','ण'=>'nq','त'=>'t','थ'=>'th','द'=>'d','ध'=>'dh', 1102 'न'=>'n','प'=>'p','फ'=>'ph','ब'=>'b','à¤'=>'bh','म'=>'m','य'=>'z','र'=>'r', 1103 'ल'=>'l','व'=>'v','श'=>'sh','ष'=>'sqh','स'=>'s','ह'=>'x', 1104 //Hebrew 1105 '×'=>'a', 'ב'=>'b','×’'=>'g','ד'=>'d','×”'=>'h','ו'=>'v','×–'=>'z','×—'=>'kh','ט'=>'th', 1106 '×™'=>'y','ך'=>'h','×›'=>'k','ל'=>'l','×'=>'m','מ'=>'m','ן'=>'n','× '=>'n', 1107 'ס'=>'s','×¢'=>'ah','×£'=>'f','פ'=>'p','×¥'=>'c','צ'=>'c','×§'=>'q','ר'=>'r', 1108 'ש'=>'sh','ת'=>'t', 1109 //Arabic 1110 'ا'=>'a','ب'=>'b','ت'=>'t','Ø«'=>'th','ج'=>'g','Ø'=>'xh','Ø®'=>'x','د'=>'d', 1111 'ذ'=>'dh','ر'=>'r','ز'=>'z','س'=>'s','Ø´'=>'sh','ص'=>'s\'','ض'=>'d\'', 1112 'Ø·'=>'t\'','ظ'=>'z\'','ع'=>'y','غ'=>'gh','Ù'=>'f','Ù‚'=>'q','Ùƒ'=>'k', 1113 'Ù„'=>'l','Ù…'=>'m','Ù†'=>'n','Ù‡'=>'x\'','Ùˆ'=>'u','ÙŠ'=>'i', 1114 1115 // Japanese hiragana 1116 'ã‚'=>'a','ãˆ'=>'e','ã„'=>'i','ãŠ'=>'o','ã†'=>'u','ã°'=>'ba','ã¹'=>'be', 1117 'ã³'=>'bi','ã¼'=>'bo','ã¶'=>'bu','ã—'=>'ci','ã '=>'da','ã§'=>'de','ã¢'=>'di', 1118 'ã©'=>'do','ã¥'=>'du','ãµã'=>'fa','ãµã‡'=>'fe','ãµãƒ'=>'fi','ãµã‰'=>'fo', 1119 'ãµ'=>'fu','ãŒ'=>'ga','ã’'=>'ge','ãŽ'=>'gi','ã”'=>'go','ã'=>'gu','ã¯'=>'ha', 1120 'ã¸'=>'he','ã²'=>'hi','ã»'=>'ho','ãµ'=>'hu','ã˜ã‚ƒ'=>'ja','ã˜ã‡'=>'je', 1121 'ã˜'=>'ji','ã˜ã‚‡'=>'jo','ã˜ã‚…'=>'ju','ã‹'=>'ka','ã‘'=>'ke','ã'=>'ki', 1122 'ã“'=>'ko','ã'=>'ku','ら'=>'la','れ'=>'le','り'=>'li','ã‚'=>'lo','ã‚‹'=>'lu', 1123 'ã¾'=>'ma','ã‚'=>'me','ã¿'=>'mi','ã‚‚'=>'mo','ã‚€'=>'mu','ãª'=>'na','ã'=>'ne', 1124 'ã«'=>'ni','ã®'=>'no','ã¬'=>'nu','ã±'=>'pa','ãº'=>'pe','ã´'=>'pi','ã½'=>'po', 1125 'ã·'=>'pu','ら'=>'ra','れ'=>'re','り'=>'ri','ã‚'=>'ro','ã‚‹'=>'ru','ã•'=>'sa', 1126 'ã›'=>'se','ã—'=>'si','ã'=>'so','ã™'=>'su','ãŸ'=>'ta','ã¦'=>'te','ã¡'=>'ti', 1127 'ã¨'=>'to','ã¤'=>'tu','ヴã'=>'va','ヴã‡'=>'ve','ヴãƒ'=>'vi','ヴã‰'=>'vo', 1128 'ヴ'=>'vu','ã‚'=>'wa','ã†ã‡'=>'we','ã†ãƒ'=>'wi','ã‚’'=>'wo','ã‚„'=>'ya','ã„ã‡'=>'ye', 1129 'ã„'=>'yi','よ'=>'yo','ゆ'=>'yu','ã–'=>'za','ãœ'=>'ze','ã˜'=>'zi','ãž'=>'zo', 1130 'ãš'=>'zu','ã³ã‚ƒ'=>'bya','ã³ã‡'=>'bye','ã³ãƒ'=>'byi','ã³ã‚‡'=>'byo','ã³ã‚…'=>'byu', 1131 'ã¡ã‚ƒ'=>'cha','ã¡ã‡'=>'che','ã¡'=>'chi','ã¡ã‚‡'=>'cho','ã¡ã‚…'=>'chu','ã¡ã‚ƒ'=>'cya', 1132 'ã¡ã‡'=>'cye','ã¡ãƒ'=>'cyi','ã¡ã‚‡'=>'cyo','ã¡ã‚…'=>'cyu','ã§ã‚ƒ'=>'dha','ã§ã‡'=>'dhe', 1133 'ã§ãƒ'=>'dhi','ã§ã‚‡'=>'dho','ã§ã‚…'=>'dhu','ã©ã'=>'dwa','ã©ã‡'=>'dwe','ã©ãƒ'=>'dwi', 1134 'ã©ã‰'=>'dwo','ã©ã…'=>'dwu','ã¢ã‚ƒ'=>'dya','ã¢ã‡'=>'dye','ã¢ãƒ'=>'dyi','ã¢ã‚‡'=>'dyo', 1135 'ã¢ã‚…'=>'dyu','ã¢'=>'dzi','ãµã'=>'fwa','ãµã‡'=>'fwe','ãµãƒ'=>'fwi','ãµã‰'=>'fwo', 1136 'ãµã…'=>'fwu','ãµã‚ƒ'=>'fya','ãµã‡'=>'fye','ãµãƒ'=>'fyi','ãµã‚‡'=>'fyo','ãµã‚…'=>'fyu', 1137 'ãŽã‚ƒ'=>'gya','ãŽã‡'=>'gye','ãŽãƒ'=>'gyi','ãŽã‚‡'=>'gyo','ãŽã‚…'=>'gyu','ã²ã‚ƒ'=>'hya', 1138 'ã²ã‡'=>'hye','ã²ãƒ'=>'hyi','ã²ã‚‡'=>'hyo','ã²ã‚…'=>'hyu','ã˜ã‚ƒ'=>'jya','ã˜ã‡'=>'jye', 1139 'ã˜ãƒ'=>'jyi','ã˜ã‚‡'=>'jyo','ã˜ã‚…'=>'jyu','ãゃ'=>'kya','ãã‡'=>'kye','ããƒ'=>'kyi', 1140 'ãょ'=>'kyo','ãã‚…'=>'kyu','りゃ'=>'lya','りã‡'=>'lye','りãƒ'=>'lyi','りょ'=>'lyo', 1141 'りゅ'=>'lyu','ã¿ã‚ƒ'=>'mya','ã¿ã‡'=>'mye','ã¿ãƒ'=>'myi','ã¿ã‚‡'=>'myo','ã¿ã‚…'=>'myu', 1142 'ã‚“'=>'n','ã«ã‚ƒ'=>'nya','ã«ã‡'=>'nye','ã«ãƒ'=>'nyi','ã«ã‚‡'=>'nyo','ã«ã‚…'=>'nyu', 1143 'ã´ã‚ƒ'=>'pya','ã´ã‡'=>'pye','ã´ãƒ'=>'pyi','ã´ã‚‡'=>'pyo','ã´ã‚…'=>'pyu','りゃ'=>'rya', 1144 'りã‡'=>'rye','りãƒ'=>'ryi','りょ'=>'ryo','りゅ'=>'ryu','ã—ゃ'=>'sha','ã—ã‡'=>'she', 1145 'ã—'=>'shi','ã—ょ'=>'sho','ã—ã‚…'=>'shu','ã™ã'=>'swa','ã™ã‡'=>'swe','ã™ãƒ'=>'swi', 1146 'ã™ã‰'=>'swo','ã™ã…'=>'swu','ã—ゃ'=>'sya','ã—ã‡'=>'sye','ã—ãƒ'=>'syi','ã—ょ'=>'syo', 1147 'ã—ã‚…'=>'syu','ã¦ã‚ƒ'=>'tha','ã¦ã‡'=>'the','ã¦ãƒ'=>'thi','ã¦ã‚‡'=>'tho','ã¦ã‚…'=>'thu', 1148 'ã¤ã‚ƒ'=>'tsa','ã¤ã‡'=>'tse','ã¤ãƒ'=>'tsi','ã¤ã‚‡'=>'tso','ã¤'=>'tsu','ã¨ã'=>'twa', 1149 'ã¨ã‡'=>'twe','ã¨ãƒ'=>'twi','ã¨ã‰'=>'two','ã¨ã…'=>'twu','ã¡ã‚ƒ'=>'tya','ã¡ã‡'=>'tye', 1150 'ã¡ãƒ'=>'tyi','ã¡ã‚‡'=>'tyo','ã¡ã‚…'=>'tyu','ヴゃ'=>'vya','ヴã‡'=>'vye','ヴãƒ'=>'vyi', 1151 'ヴょ'=>'vyo','ヴゅ'=>'vyu','ã†ã'=>'wha','ã†ã‡'=>'whe','ã†ãƒ'=>'whi','ã†ã‰'=>'who', 1152 'ã†ã…'=>'whu','ã‚‘'=>'wye','ã‚'=>'wyi','ã˜ã‚ƒ'=>'zha','ã˜ã‡'=>'zhe','ã˜ãƒ'=>'zhi', 1153 'ã˜ã‚‡'=>'zho','ã˜ã‚…'=>'zhu','ã˜ã‚ƒ'=>'zya','ã˜ã‡'=>'zye','ã˜ãƒ'=>'zyi','ã˜ã‚‡'=>'zyo', 1154 'ã˜ã‚…'=>'zyu', 1155 // Japanese katakana 1156 'ã‚¢'=>'a','エ'=>'e','イ'=>'i','オ'=>'o','ウ'=>'u','ãƒ'=>'ba','ベ'=>'be','ビ'=>'bi', 1157 'ボ'=>'bo','ブ'=>'bu','ã‚·'=>'ci','ダ'=>'da','デ'=>'de','ヂ'=>'di','ド'=>'do', 1158 'ヅ'=>'du','ファ'=>'fa','フェ'=>'fe','フィ'=>'fi','フォ'=>'fo','フ'=>'fu','ガ'=>'ga', 1159 'ゲ'=>'ge','ã‚®'=>'gi','ã‚´'=>'go','ã‚°'=>'gu','ãƒ'=>'ha','ヘ'=>'he','ヒ'=>'hi','ホ'=>'ho', 1160 'フ'=>'hu','ジャ'=>'ja','ジェ'=>'je','ジ'=>'ji','ジョ'=>'jo','ジュ'=>'ju','ã‚«'=>'ka', 1161 'ケ'=>'ke','ã‚'=>'ki','コ'=>'ko','ク'=>'ku','ラ'=>'la','レ'=>'le','リ'=>'li','ãƒ'=>'lo', 1162 'ル'=>'lu','マ'=>'ma','メ'=>'me','ミ'=>'mi','モ'=>'mo','ム'=>'mu','ナ'=>'na','ãƒ'=>'ne', 1163 'ニ'=>'ni','ノ'=>'no','ヌ'=>'nu','パ'=>'pa','ペ'=>'pe','ピ'=>'pi','ãƒ'=>'po','プ'=>'pu', 1164 'ラ'=>'ra','レ'=>'re','リ'=>'ri','ãƒ'=>'ro','ル'=>'ru','サ'=>'sa','ã‚»'=>'se','ã‚·'=>'si', 1165 'ソ'=>'so','ス'=>'su','ã‚¿'=>'ta','テ'=>'te','ãƒ'=>'ti','ト'=>'to','ツ'=>'tu','ヴァ'=>'va', 1166 'ヴェ'=>'ve','ヴィ'=>'vi','ヴォ'=>'vo','ヴ'=>'vu','ワ'=>'wa','ウェ'=>'we','ウィ'=>'wi', 1167 'ヲ'=>'wo','ヤ'=>'ya','イェ'=>'ye','イ'=>'yi','ヨ'=>'yo','ユ'=>'yu','ã‚¶'=>'za','ゼ'=>'ze', 1168 'ジ'=>'zi','ゾ'=>'zo','ズ'=>'zu','ビャ'=>'bya','ビェ'=>'bye','ビィ'=>'byi','ビョ'=>'byo', 1169 'ビュ'=>'byu','ãƒãƒ£'=>'cha','ãƒã‚§'=>'che','ãƒ'=>'chi','ãƒãƒ§'=>'cho','ãƒãƒ¥'=>'chu', 1170 'ãƒãƒ£'=>'cya','ãƒã‚§'=>'cye','ãƒã‚£'=>'cyi','ãƒãƒ§'=>'cyo','ãƒãƒ¥'=>'cyu','デャ'=>'dha', 1171 'デェ'=>'dhe','ディ'=>'dhi','デョ'=>'dho','デュ'=>'dhu','ドァ'=>'dwa','ドェ'=>'dwe', 1172 'ドィ'=>'dwi','ドォ'=>'dwo','ドゥ'=>'dwu','ヂャ'=>'dya','ヂェ'=>'dye','ヂィ'=>'dyi', 1173 'ヂョ'=>'dyo','ヂュ'=>'dyu','ヂ'=>'dzi','ファ'=>'fwa','フェ'=>'fwe','フィ'=>'fwi', 1174 'フォ'=>'fwo','フゥ'=>'fwu','フャ'=>'fya','フェ'=>'fye','フィ'=>'fyi','フョ'=>'fyo', 1175 'フュ'=>'fyu','ギャ'=>'gya','ギェ'=>'gye','ギィ'=>'gyi','ギョ'=>'gyo','ギュ'=>'gyu', 1176 'ヒャ'=>'hya','ヒェ'=>'hye','ヒィ'=>'hyi','ヒョ'=>'hyo','ヒュ'=>'hyu','ジャ'=>'jya', 1177 'ジェ'=>'jye','ジィ'=>'jyi','ジョ'=>'jyo','ジュ'=>'jyu','ã‚ャ'=>'kya','ã‚ã‚§'=>'kye', 1178 'ã‚ã‚£'=>'kyi','ã‚ョ'=>'kyo','ã‚ュ'=>'kyu','リャ'=>'lya','リェ'=>'lye','リィ'=>'lyi', 1179 'リョ'=>'lyo','リュ'=>'lyu','ミャ'=>'mya','ミェ'=>'mye','ミィ'=>'myi','ミョ'=>'myo', 1180 'ミュ'=>'myu','ン'=>'n','ニャ'=>'nya','ニェ'=>'nye','ニィ'=>'nyi','ニョ'=>'nyo', 1181 'ニュ'=>'nyu','ピャ'=>'pya','ピェ'=>'pye','ピィ'=>'pyi','ピョ'=>'pyo','ピュ'=>'pyu', 1182 'リャ'=>'rya','リェ'=>'rye','リィ'=>'ryi','リョ'=>'ryo','リュ'=>'ryu','シャ'=>'sha', 1183 'ã‚·ã‚§'=>'she','ã‚·'=>'shi','ショ'=>'sho','シュ'=>'shu','スァ'=>'swa','スェ'=>'swe', 1184 'スィ'=>'swi','スォ'=>'swo','スゥ'=>'swu','シャ'=>'sya','ã‚·ã‚§'=>'sye','ã‚·ã‚£'=>'syi', 1185 'ショ'=>'syo','シュ'=>'syu','テャ'=>'tha','テェ'=>'the','ティ'=>'thi','テョ'=>'tho', 1186 'テュ'=>'thu','ツャ'=>'tsa','ツェ'=>'tse','ツィ'=>'tsi','ツョ'=>'tso','ツ'=>'tsu', 1187 'トァ'=>'twa','トェ'=>'twe','トィ'=>'twi','トォ'=>'two','トゥ'=>'twu','ãƒãƒ£'=>'tya', 1188 'ãƒã‚§'=>'tye','ãƒã‚£'=>'tyi','ãƒãƒ§'=>'tyo','ãƒãƒ¥'=>'tyu','ヴャ'=>'vya','ヴェ'=>'vye', 1189 'ヴィ'=>'vyi','ヴョ'=>'vyo','ヴュ'=>'vyu','ウァ'=>'wha','ウェ'=>'whe','ウィ'=>'whi', 1190 'ウォ'=>'who','ウゥ'=>'whu','ヱ'=>'wye','ヰ'=>'wyi','ジャ'=>'zha','ジェ'=>'zhe', 1191 'ジィ'=>'zhi','ジョ'=>'zho','ジュ'=>'zhu','ジャ'=>'zya','ジェ'=>'zye','ジィ'=>'zyi', 1192 'ジョ'=>'zyo','ジュ'=>'zyu', 1193 1194 // "Greeklish" 1195 'Γ'=>'G','Δ'=>'E','Θ'=>'Th','Λ'=>'L','Ξ'=>'X','Î '=>'P','Σ'=>'S','Φ'=>'F','Ψ'=>'Ps', 1196 'γ'=>'g','δ'=>'e','θ'=>'th','λ'=>'l','ξ'=>'x','Ï€'=>'p','σ'=>'s','φ'=>'f','ψ'=>'ps', 1197 1198 // Thai 1199 'à¸'=>'k','ข'=>'kh','ฃ'=>'kh','ค'=>'kh','ฅ'=>'kh','ฆ'=>'kh','ง'=>'ng','จ'=>'ch', 1200 'ฉ'=>'ch','ช'=>'ch','ซ'=>'s','ฌ'=>'ch','à¸'=>'y','ฎ'=>'d','à¸'=>'t','à¸'=>'th', 1201 'ฑ'=>'d','ฒ'=>'th','ณ'=>'n','ด'=>'d','ต'=>'t','ถ'=>'th','ท'=>'th','ธ'=>'th', 1202 'น'=>'n','บ'=>'b','ป'=>'p','ผ'=>'ph','à¸'=>'f','พ'=>'ph','ฟ'=>'f','ภ'=>'ph', 1203 'ม'=>'m','ย'=>'y','ร'=>'r','ฤ'=>'rue','ฤๅ'=>'rue','ล'=>'l','ฦ'=>'lue', 1204 'ฦๅ'=>'lue','ว'=>'w','ศ'=>'s','ษ'=>'s','ส'=>'s','ห'=>'h','ฬ'=>'l','ฮ'=>'h', 1205 'ะ'=>'a','–ั'=>'a','รร'=>'a','า'=>'a','รร'=>'an','ำ'=>'am','–ิ'=>'i','–ี'=>'i', 1206 '–ึ'=>'ue','–ื'=>'ue','–ุ'=>'u','–ู'=>'u','เะ'=>'e','เ–็'=>'e','เ'=>'e','à¹à¸°'=>'ae', 1207 'à¹'=>'ae','โะ'=>'o','โ'=>'o','เาะ'=>'o','à¸'=>'o','เà¸à¸°'=>'oe','เ–ิ'=>'oe', 1208 'เà¸'=>'oe','เ–ียะ'=>'ia','เ–ีย'=>'ia','เ–ืà¸à¸°'=>'uea','เ–ืà¸'=>'uea','–ัวะ'=>'ua', 1209 '–ัว'=>'ua','ว'=>'ua','ใ'=>'ai','ไ'=>'ai','–ัย'=>'ai','ไย'=>'ai','าย'=>'ai', 1210 'เา'=>'ao','าว'=>'ao','–ุย'=>'ui','โย'=>'oi','à¸à¸¢'=>'oi','เย'=>'oei','เ–ืà¸à¸¢'=>'ueai', 1211 'วย'=>'uai','–ิว'=>'io','เ–็ว'=>'eo','เว'=>'eo','à¹â€“็ว'=>'aeo','à¹à¸§'=>'aeo', 1212 'เ–ียว'=>'iao', 1213 1214 // Korean 1215 'ㄱ'=>'k','ã…‹'=>'kh','ㄲ'=>'kk','ã„·'=>'t','ã…Œ'=>'th','ㄸ'=>'tt','ã…‚'=>'p', 1216 'ã…'=>'ph','ã…ƒ'=>'pp','ã…ˆ'=>'c','ã…Š'=>'ch','ã…‰'=>'cc','ã……'=>'s','ã…†'=>'ss', 1217 'ã…Ž'=>'h','ã…‡'=>'ng','ã„´'=>'n','ㄹ'=>'l','ã…'=>'m', 'ã…'=>'a','ã…“'=>'e','ã…—'=>'o', 1218 'ã…œ'=>'wu','ã…¡'=>'u','ã…£'=>'i','ã…'=>'ay','ã…”'=>'ey','ã…š'=>'oy','ã…˜'=>'wa','ã…'=>'we', 1219 'ã…Ÿ'=>'wi','ã…™'=>'way','ã…ž'=>'wey','ã…¢'=>'uy','ã…‘'=>'ya','ã…•'=>'ye','ã…›'=>'oy', 1220 'ã… '=>'yu','ã…’'=>'yay','ã…–'=>'yey', 1221 ); 1222 1223 //Setup VIM: ex: et ts=2 enc=utf-8 : 1224
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Tue Apr 3 20:47:31 2007 | par Balluche grâce à PHPXref 0.7 |