[ Index ] |
|
Code source de eZ Publish 3.9.0 |
1 <?php 2 // 3 // Definition of eZUTF8Codec class 4 // 5 // SOFTWARE NAME: eZ publish 6 // SOFTWARE RELEASE: 3.9.0 7 // BUILD VERSION: 17785 8 // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS 9 // SOFTWARE LICENSE: GNU General Public License v2.0 10 // NOTICE: > 11 // This program is free software; you can redistribute it and/or 12 // modify it under the terms of version 2.0 of the GNU General 13 // Public License as published by the Free Software Foundation. 14 // 15 // This program is distributed in the hope that it will be useful, 16 // but WITHOUT ANY WARRANTY; without even the implied warranty of 17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 18 // GNU General Public License for more details. 19 // 20 // You should have received a copy of version 2.0 of the GNU General 21 // Public License along with this program; if not, write to the Free 22 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 23 // MA 02110-1301, USA. 24 // 25 // 26 27 /*! 28 \class eZUTF8Codec ezutf8codec.php 29 \ingroup eZI18N 30 \brief Converter for utf8 and 32bit unicode 31 32 Allows for conversion from utf8 charactes and to 32bit unicode values, 33 and vice versa. 34 35 */ 36 37 class eZUTF8Codec 38 { 39 /*! 40 Initializes utf8 codec. 41 */ 42 function eZUTF8Codec() 43 { 44 } 45 46 /*! 47 Converts an UTF8 string into Unicode values and returns an array with the values. 48 */ 49 function convertStringToUnicode( $str ) 50 { 51 $unicodeValues = array(); 52 $strLen = strlen( $str ); 53 for ( $offset = 0; $offset < $strLen; ) 54 { 55 $charLen = 1; 56 $unicodeValue = eZUTF8Codec::fromUTF8( $str, $offset, $charLen ); 57 if ( $unicodeValue !== false ) 58 $unicodeValues[] = $unicodeValue; 59 $offset += $charLen; 60 } 61 return $unicodeValues; 62 } 63 64 /*! 65 Converts an array with Unicode values into an UTF8 string and returns it. 66 */ 67 function convertUnicodeToString( $unicodeValues ) 68 { 69 if ( !is_array( $unicodeValues ) ) 70 return false; 71 $text = ''; 72 foreach ( $unicodeValues as $unicodeValue ) 73 { 74 $utf8Char = eZUTF8Codec::toUTF8( $unicodeValue ); 75 $text .= $utf8Char; 76 } 77 return $text; 78 } 79 80 /*! 81 \static 82 Converts the 32 bit integer $char_code to a utf8 string representing the Unicode character. 83 */ 84 function &toUTF8( $char_code ) 85 { 86 switch ( $char_code ) 87 { 88 case 0: 89 $char = chr( 0 ); 90 case !($char_code & 0xffffff80): // 7 bit 91 $char = chr( $char_code ); 92 break; 93 case !($char_code & 0xfffff800): // 11 bit 94 $char = ( chr(0xc0 | (($char_code >> 6) & 0x1f)) . 95 chr(0x80 | ($char_code & 0x3f)) ); 96 break; 97 case !($char_code & 0xffff0000): // 16 bit 98 $char = ( chr(0xe0 | (($char_code >> 12) & 0x0f)) . 99 chr(0x80 | (($char_code >> 6) & 0x3f)) . 100 chr(0x80 | ($char_code & 0x3f)) ); 101 break; 102 case !($char_code & 0xffe00000): // 21 bit 103 $char = ( chr(0xf0 | (($char_code >> 18) & 0x07)) . 104 chr(0x80 | (($char_code >> 12) & 0x3f)) . 105 chr(0x80 | (($char_code >> 6) & 0x3f)) . 106 chr(0x80 | ($char_code & 0x3f)) ); 107 break; 108 case !($char_code & 0xfc000000): // 26 bit 109 $char = ( chr(0xf8 | (($char_code >> 24) & 0x03)) . 110 chr(0x80 | (($char_code >> 18) & 0x3f)) . 111 chr(0x80 | (($char_code >> 12) & 0x3f)) . 112 chr(0x80 | (($char_code >> 6) & 0x3f)) . 113 chr(0x80 | ($char_code & 0x3f)) ); 114 default: // 31 bit 115 $char = ( chr(0xfc | (($char_code >> 30) & 0x01)) . 116 chr(0x80 | (($char_code >> 24) & 0x3f)) . 117 chr(0x80 | (($char_code >> 18) & 0x3f)) . 118 chr(0x80 | (($char_code >> 12) & 0x3f)) . 119 chr(0x80 | (($char_code >> 6) & 0x3f)) . 120 chr(0x80 | ($char_code & 0x3f)) ); 121 } 122 return $char; 123 } 124 125 /*! 126 \static 127 Converts the first utf8 char in the string $multi_char to a 32 bit integer. 128 $offs is the offset in the string. 129 $len will contain the length of utf8 char in the string which can be used to 130 find the next char. 131 */ 132 function &fromUtf8( $multi_char, $offs, &$len ) 133 { 134 $char_code = false; 135 if ( ( ord( $multi_char[$offs + 0] ) & 0x80 ) == 0x00 ) // 7 bit, 1 char 136 { 137 $char_code = ord( $multi_char[$offs + 0] ); 138 $len = 1; 139 } 140 else if ( ( ord( $multi_char[$offs + 0] ) & 0xe0 ) == 0xc0 ) // 11 bit, 2 chars 141 { 142 $len = 2; 143 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 ) 144 return $char_code; 145 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x1f ) << 6) + 146 (( ord( $multi_char[$offs + 1] ) & 0x3f )) ); 147 if ( $char_code < 128 ) // Illegal multibyte, should use less than 2 chars 148 { 149 $char_code == false; 150 } 151 } 152 else if ( ( ord( $multi_char[$offs + 0] ) & 0xf0 ) == 0xe0 ) // 16 bit, 3 chars 153 { 154 $len = 3; 155 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or 156 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 ) 157 return $char_code; 158 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x0f ) << 12) + 159 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 6) + 160 (( ord( $multi_char[$offs + 2] ) & 0x3f )) ); 161 if ( $char_code < 2048 ) // Illegal multibyte, should use less than 3 chars 162 { 163 $char_code == false; 164 } 165 } 166 else if ( ( ord( $multi_char[$offs + 0] ) & 0xf8 ) == 0xf0 ) // 21 bit, 4 chars 167 { 168 $len = 4; 169 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or 170 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or 171 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 ) 172 return $char_code; 173 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x07 ) << 18) + 174 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 12) + 175 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 6) + 176 (( ord( $multi_char[$offs + 3] ) & 0x3f )) ); 177 if ( $char_code < 65536 ) // Illegal multibyte, should use less than 4 chars 178 { 179 $char_code == false; 180 } 181 } 182 else if ( ( ord( $multi_char[$offs + 0] ) & 0xfc ) == 0xf8 ) // 26 bit, 5 chars 183 { 184 $len = 5; 185 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or 186 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or 187 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 or 188 ( ord( $multi_char[$offs + 4] ) & 0xc0 ) != 0x80 ) 189 return $char_code; 190 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x03 ) << 24) + 191 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 18) + 192 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 12) + 193 (( ord( $multi_char[$offs + 3] ) & 0x3f ) << 6) + 194 (( ord( $multi_char[$offs + 4] ) & 0x3f )) ); 195 if ( $char_code < 2097152 ) // Illegal multibyte, should use less than 5 chars 196 { 197 $char_code == false; 198 } 199 } 200 else if ( ( ord( $multi_char[$offs + 0] ) & 0xfe ) == 0xfc ) // 31 bit, 6 chars 201 { 202 $len = 6; 203 if ( ( ord( $multi_char[$offs + 1] ) & 0xc0 ) != 0x80 or 204 ( ord( $multi_char[$offs + 2] ) & 0xc0 ) != 0x80 or 205 ( ord( $multi_char[$offs + 3] ) & 0xc0 ) != 0x80 or 206 ( ord( $multi_char[$offs + 4] ) & 0xc0 ) != 0x80 or 207 ( ord( $multi_char[$offs + 5] ) & 0xc0 ) != 0x80 ) 208 return $char_code; 209 $char_code = ( (( ord( $multi_char[$offs + 0] ) & 0x01 ) << 30) + 210 (( ord( $multi_char[$offs + 1] ) & 0x3f ) << 24) + 211 (( ord( $multi_char[$offs + 2] ) & 0x3f ) << 18) + 212 (( ord( $multi_char[$offs + 3] ) & 0x3f ) << 12) + 213 (( ord( $multi_char[$offs + 4] ) & 0x3f ) << 6) + 214 (( ord( $multi_char[$offs + 5] ) & 0x3f )) ); 215 if ( $char_code < 67108864 ) // Illegal multibyte, should use less than 6 chars 216 { 217 $char_code == false; 218 } 219 } 220 return $char_code; 221 } 222 223 function &utf8LengthTable() 224 { 225 $table =& $GLOBALS["eZUTF8LengthTable"]; 226 if ( !is_array( $table ) ) 227 $table = array( 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 228 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 229 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 6 ); 230 return $table; 231 } 232 233 function characterByteLength( &$str, $pos ) 234 { 235 $table =& eZUTF8Codec::utf8LengthTable(); 236 $char = ord( $str[$pos] ); 237 return $table[($char >> 2) & 0x3f]; 238 } 239 240 function strlen( &$str ) 241 { 242 $table =& eZUTF8Codec::utf8LengthTable(); 243 $len = strlen( $str ); 244 $strlen = 0; 245 for ( $i = 0; $i < $len; ) 246 { 247 $char = ord( $str[$i] ); 248 $char_len = $table[($char >> 2) & 0x3f]; 249 $i += $char_len; 250 ++$strlen; 251 } 252 return $strlen; 253 } 254 255 /*! 256 \return a unique instance of the UTF8 codec. 257 */ 258 function &instance() 259 { 260 $instance =& $GLOBALS["eZUTF8CodecInstance"]; 261 if ( get_class( $instance ) != "ezutf8codec" ) 262 { 263 $instance = new eZUTF8Codec(); 264 } 265 return $instance; 266 } 267 } 268 269 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sat Feb 24 10:30:04 2007 | par Balluche grâce à PHPXref 0.7 |