| [ Index ] |
|
Code source de eGroupWare 1.2.106-2 |
1 <?php 2 3 include_once 'XML/WBXML.php'; 4 include_once 'XML/WBXML/DTDManager.php'; 5 include_once 'XML/WBXML/ContentHandler.php'; 6 7 /** 8 * $Horde: framework/XML_WBXML/WBXML/Decoder.php,v 1.36 2006/01/01 21:10:25 jan Exp $ 9 * 10 * Copyright 2003-2006 Anthony Mills <amills@pyramid6.com> 11 * 12 * See the enclosed file COPYING for license information (LGPL). If you 13 * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html. 14 * 15 * From Binary XML Content Format Specification Version 1.3, 25 July 16 * 2001 found at http://www.wapforum.org 17 * 18 * @package XML_WBXML 19 */ 20 class XML_WBXML_Decoder extends XML_WBXML_ContentHandler { 21 22 /** 23 * Document Public Identifier type 24 * 1 mb_u_int32 well known type 25 * 2 string table 26 * from spec but converted into a string. 27 * 28 * Document Public Identifier 29 * Used with dpiType. 30 */ 31 var $_dpi; 32 33 /** 34 * String table as defined in 5.7 35 */ 36 var $_stringTable = array(); 37 38 /** 39 * Content handler. 40 * Currently just outputs raw XML. 41 */ 42 var $_ch; 43 44 var $_tagDTD; 45 46 var $_prevAttributeDTD; 47 48 var $_attributeDTD; 49 50 /** 51 * State variables. 52 */ 53 var $_tagStack = array(); 54 var $_isAttribute; 55 var $_isData = false; 56 57 var $_error = false; 58 59 /** 60 * The DTD Manager. 61 * 62 * @var XML_WBXML_DTDManager 63 */ 64 var $_dtdManager; 65 66 /** 67 * The string position. 68 * 69 * @var integer 70 */ 71 var $_strpos; 72 73 /** 74 * Constructor. 75 */ 76 function XML_WBXML_Decoder() 77 { 78 $this->_dtdManager = &new XML_WBXML_DTDManager(); 79 } 80 81 /** 82 * Sets the contentHandler that will receive the output of the 83 * decoding. 84 * 85 * @param XML_WBXML_ContentHandler $ch The contentHandler 86 */ 87 function setContentHandler(&$ch) { 88 $this->_ch = &$ch; 89 } 90 /** 91 * Return one byte from the input stream. 92 * 93 * @param string $input The WBXML input string. 94 */ 95 function getByte($input) 96 { 97 $value = $input{$this->_strpos++}; 98 $value = ord($value); 99 100 return $value; 101 } 102 103 /** 104 * Takes a WBXML input document and returns decoded XML. 105 * However the preferred and more effecient method is to 106 * use decode() rather than decodeToString() and have an 107 * appropriate contentHandler deal with the decoded data. 108 * 109 * @param string $wbxml The WBXML document to decode. 110 * 111 * @return string The decoded XML document. 112 */ 113 function decodeToString($wbxml) 114 { 115 $this->_ch = &new XML_WBXML_ContentHandler(); 116 117 $r = $this->decode($wbxml); 118 if (is_a($r, 'PEAR_Error')) { 119 return $r; 120 } 121 return $this->_ch->getOutput(); 122 } 123 124 /** 125 * Takes a WBXML input document and decodes it. 126 * Decoding result is directly passed to the contentHandler. 127 * A contenthandler must be set using setContentHandler 128 * prior to invocation of this method 129 * 130 * @param string $wbxml The WBXML document to decode. 131 * 132 * @return mixed True on success or PEAR_Error. 133 */ 134 function decode($wbxml) 135 { 136 // fix for Nokia Series 60 which seem to send empty data block sometimes 137 if(strlen($wbxml) == 0) { 138 return true; 139 } 140 141 $this->_error = false; // reset state 142 $this->_strpos = 0; 143 144 if (empty($this->_ch)) { 145 return $this->raiseError('No Contenthandler defined.'); 146 } 147 148 // Get Version Number from Section 5.4 149 // version = u_int8 150 // currently 1, 2 or 3 151 $this->_wbxmlVersion = $this->getVersionNumber($wbxml); 152 153 // Get Document Public Idetifier from Section 5.5 154 // publicid = mb_u_int32 | (zero index) 155 // zero = u_int8 156 // Containing the value zero (0) 157 // The actual DPI is determined after the String Table is read. 158 $dpiStruct = $this->getDocumentPublicIdentifier($wbxml); 159 // Get Charset from 5.6 160 // charset = mb_u_int32 161 $this->_charset = $this->getCharset($wbxml); 162 163 // Get String Table from 5.7 164 // strb1 = length *byte 165 $this->retrieveStringTable($wbxml); 166 167 // Get Document Public Idetifier from Section 5.5. 168 $this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'], 169 $dpiStruct['dpiNumber'], 170 $this->_stringTable); 171 172 // Now the real fun begins. 173 // From Sections 5.2 and 5.8 174 175 176 // Default content handler. 177 $this->_dtdManager = &new XML_WBXML_DTDManager(); 178 179 // Get the starting DTD. 180 $this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi); 181 182 if (!$this->_tagDTD) { 183 return $this->raiseError('No DTD found for ' 184 . $this->_dpi . '/' 185 . $dpiStruct['dpiNumber']); 186 } 187 188 $this->_attributeDTD = $this->_tagDTD; 189 190 while (empty($this->_error) && $this->_strpos < strlen($wbxml)) { 191 $this->_decode($wbxml); 192 } 193 if (!empty($this->_error)) { 194 return $this->_error; 195 } 196 return true; 197 } 198 199 function getVersionNumber($input) 200 { 201 return $this->getByte($input); 202 } 203 204 function getDocumentPublicIdentifier($input) 205 { 206 $i = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 207 if ($i == 0) { 208 return array('dpiType' => 2, 209 'dpiNumber' => $this->getByte($input)); 210 } else { 211 return array('dpiType' => 1, 212 'dpiNumber' => $i); 213 } 214 } 215 216 function getDocumentPublicIdentifierImpl($dpiType, $dpiNumber) 217 { 218 if ($dpiType == 1) { 219 return XML_WBXML::getDPIString($dpiNumber); 220 } else { 221 return $this->getStringTableEntry($dpiNumber); 222 } 223 } 224 225 /** 226 * Returns the character encoding. Only default character 227 * encodings from J2SE are supported. From 228 * http://www.iana.org/assignments/character-sets and 229 * http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html 230 */ 231 function getCharset($input) 232 { 233 $cs = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 234 return XML_WBXML::getCharsetString($cs); 235 } 236 237 /** 238 * Retrieves the string table. 239 * The string table consists of an mb_u_int32 length 240 * and then length bytes forming the table. 241 * References to the string table refer to the 242 * starting position of the (null terminated) 243 * string in this table. 244 */ 245 function retrieveStringTable($input) 246 { 247 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 248 $this->_stringTable = substr($input, $this->_strpos, $size); 249 $this->_strpos += $size; 250 // print "stringtable($size):" . $this->_stringTable ."\n"; 251 } 252 253 function getStringTableEntry($index) 254 { 255 if ($index >= strlen($this->_stringTable)) { 256 $this->_error = 257 $this->_ch->raiseError('Invalid offset ' . $index 258 . ' value encountered around position ' 259 . $this->_strpos 260 . '. Broken wbxml?'); 261 return ''; 262 } 263 264 // copy of method termstr but without modification of this->_strpos 265 266 $str = '#'; // must start with nonempty string to allow array access 267 268 $i = 0; 269 $ch = $this->_stringTable[$index++]; 270 if (ord($ch) == 0) { 271 return ''; // don't return '#' 272 } 273 274 while (ord($ch) != 0) { 275 $str[$i++] = $ch; 276 if ($index >= strlen($this->_stringTable)) { 277 break; 278 } 279 $ch = $this->_stringTable[$index++]; 280 } 281 // print "string table entry: $str\n"; 282 return $str; 283 284 } 285 286 function _decode($input) 287 { 288 $token = $this->getByte($input); 289 $str = ''; 290 291 #print "position: " . $this->_strpos . " token: " . $token . " str10: " . substr($input, $this->_strpos, 10) . "\n"; // @todo: remove debug output 292 293 switch ($token) { 294 case XML_WBXML_GLOBAL_TOKEN_STR_I: 295 // Section 5.8.4.1 296 $str = $this->termstr($input); 297 $this->_ch->characters($str); 298 // print "str:$str\n"; // @TODO Remove debug code 299 break; 300 301 case XML_WBXML_GLOBAL_TOKEN_STR_T: 302 // Section 5.8.4.1 303 $x = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 304 $str = $this->getStringTableEntry($x); 305 $this->_ch->characters($str); 306 break; 307 308 case XML_WBXML_GLOBAL_TOKEN_EXT_I_0: 309 case XML_WBXML_GLOBAL_TOKEN_EXT_I_1: 310 case XML_WBXML_GLOBAL_TOKEN_EXT_I_2: 311 // Section 5.8.4.2 312 $str = $this->termstr($input); 313 $this->_ch->characters($str); 314 break; 315 316 case XML_WBXML_GLOBAL_TOKEN_EXT_T_0: 317 case XML_WBXML_GLOBAL_TOKEN_EXT_T_1: 318 case XML_WBXML_GLOBAL_TOKEN_EXT_T_2: 319 // Section 5.8.4.2 320 $str = $this->getStringTableEnty(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 321 $this->_ch->characters($str); 322 break; 323 324 case XML_WBXML_GLOBAL_TOKEN_EXT_0: 325 case XML_WBXML_GLOBAL_TOKEN_EXT_1: 326 case XML_WBXML_GLOBAL_TOKEN_EXT_2: 327 // Section 5.8.4.2 328 $extension = $this->getByte($input); 329 $this->_ch->characters($extension); 330 break; 331 332 case XML_WBXML_GLOBAL_TOKEN_ENTITY: 333 // Section 5.8.4.3 334 // UCS-4 chracter encoding? 335 $entity = $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 336 337 $this->_ch->characters('&#' . $entity . ';'); 338 break; 339 340 case XML_WBXML_GLOBAL_TOKEN_PI: 341 // Section 5.8.4.4 342 // throw new IOException 343 // die("WBXML global token processing instruction(PI, " + token + ") is unsupported!\n"); 344 break; 345 346 case XML_WBXML_GLOBAL_TOKEN_LITERAL: 347 // Section 5.8.4.5 348 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 349 $this->parseTag($input, $str, false, false); 350 break; 351 352 case XML_WBXML_GLOBAL_TOKEN_LITERAL_A: 353 // Section 5.8.4.5 354 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 355 $this->parseTag($input, $str, true, false); 356 break; 357 358 case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC: 359 // Section 5.8.4.5 360 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 361 $this->parseTag($input, $string, true, true); 362 break; 363 364 case XML_WBXML_GLOBAL_TOKEN_LITERAL_C: 365 // Section 5.8.4.5 366 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 367 $this->parseTag($input, $str, false, true); 368 break; 369 370 case XML_WBXML_GLOBAL_TOKEN_OPAQUE: 371 // Section 5.8.4.6 372 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 373 // print "opaque of size $size\n"; // @todo remove debug 374 $b = substr($input, $this->_strpos, $size); 375 #$b = mb_substr($input, $this->_strpos, $size, 'ISO-8859-1'); 376 $this->_strpos += $size; 377 378 // opaque data inside a <data> element may or may not be 379 // a nested wbxml document (for example devinf data). 380 // We find out by checking the first byte of the data: if it's 381 // 1, 2 or 3 we expect it to be the version number of a wbxml 382 // document and thus start a new wbxml decoder instance on it. 383 384 if ($this->_isData && ord($b) <= 10) { 385 $decoder = &new XML_WBXML_Decoder(true); 386 $decoder->setContentHandler($this->_ch); 387 $s = $decoder->decode($b); 388 // /* // @todo: FIXME currently we can't decode Nokia 389 // DevInf data. So ignore error for the time beeing. 390 if (is_a($s, 'PEAR_Error')) { 391 $this->_error = $s; 392 return; 393 } 394 // */ 395 // $this->_ch->characters($s); 396 } else { 397 /* normal opaque behaviour: just copy the raw data: */ 398 $this->_ch->characters( $b); 399 } 400 401 // old approach to deal with opaque data inside ContentHandler: 402 // FIXME Opaque is used by SYNCML. Opaque data that depends on the context 403 // if (contentHandler instanceof OpaqueContentHandler) { 404 // ((OpaqueContentHandler)contentHandler).opaque(b); 405 // } else { 406 // String str = new String(b, 0, size, charset); 407 // char[] chars = str.toCharArray(); 408 409 // contentHandler.characters(chars, 0, chars.length); 410 // } 411 412 break; 413 414 case XML_WBXML_GLOBAL_TOKEN_END: 415 // Section 5.8.4.7.1 416 $str = $this->endTag(); 417 break; 418 419 case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE: 420 // Section 5.8.4.7.2 421 $codePage = $this->getByte($input); 422 // print "switch to codepage $codePage\n"; // @todo: remove debug code 423 $this->switchElementCodePage($codePage); 424 break; 425 426 default: 427 // Section 5.8.2 428 // Section 5.8.3 429 $hasAttributes = (($token & 0x80) != 0); 430 $hasContent = (($token & 0x40) != 0); 431 $realToken = $token & 0x3F; 432 $str = $this->getTag($realToken); 433 434 // print "element:$str\n"; // @TODO Remove debug code 435 $this->parseTag($input, $str, $hasAttributes, $hasContent); 436 437 if ($realToken == 0x0f) { 438 // store if we're inside a Data tag. This may contain 439 // an additional enclosed wbxml document on which we have 440 // to run a seperate encoder 441 $this->_isData = true; 442 } else { 443 $this->_isData = false; 444 } 445 break; 446 } 447 } 448 449 function parseTag($input, $tag, $hasAttributes, $hasContent) 450 { 451 $attrs = array(); 452 if ($hasAttributes) { 453 $attrs = $this->getAttributes($input); 454 } 455 456 $this->_ch->startElement($this->getCurrentURI(), $tag, $attrs); 457 458 if ($hasContent) { 459 // FIXME I forgot what does this does. Not sure if this is 460 // right? 461 $this->_tagStack[] = $tag; 462 } else { 463 $this->_ch->endElement($this->getCurrentURI(), $tag); 464 } 465 } 466 467 function endTag() 468 { 469 if (count($this->_tagStack)) { 470 $tag = array_pop($this->_tagStack); 471 } else { 472 $tag = 'Unknown'; 473 } 474 475 $this->_ch->endElement($this->getCurrentURI(), $tag); 476 477 return $tag; 478 } 479 480 function getAttributes($input) 481 { 482 $this->startGetAttributes(); 483 $hasMoreAttributes = true; 484 485 $attrs = array(); 486 $attr = null; 487 $value = null; 488 $token = null; 489 490 while ($hasMoreAttributes) { 491 $token = $this->getByte($input); 492 493 switch ($token) { 494 // Attribute specified. 495 case XML_WBXML_GLOBAL_TOKEN_LITERAL: 496 // Section 5.8.4.5 497 if (isset($attr)) { 498 $attrs[] = array('attribute' => $attr, 499 'value' => $value); 500 } 501 502 $attr = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 503 break; 504 505 // Value specified. 506 case XML_WBXML_GLOBAL_TOKEN_EXT_I_0: 507 case XML_WBXML_GLOBAL_TOKEN_EXT_I_1: 508 case XML_WBXML_GLOBAL_TOKEN_EXT_I_2: 509 // Section 5.8.4.2 510 $value .= $this->termstr($input); 511 break; 512 513 case XML_WBXML_GLOBAL_TOKEN_EXT_T_0: 514 case XML_WBXML_GLOBAL_TOKEN_EXT_T_1: 515 case XML_WBXML_GLOBAL_TOKEN_EXT_T_2: 516 // Section 5.8.4.2 517 $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 518 break; 519 520 case XML_WBXML_GLOBAL_TOKEN_EXT_0: 521 case XML_WBXML_GLOBAL_TOKEN_EXT_1: 522 case XML_WBXML_GLOBAL_TOKEN_EXT_2: 523 // Section 5.8.4.2 524 $value .= $input[$this->_strpos++]; 525 break; 526 527 case XML_WBXML_GLOBAL_TOKEN_ENTITY: 528 // Section 5.8.4.3 529 $value .= $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 530 break; 531 532 case XML_WBXML_GLOBAL_TOKEN_STR_I: 533 // Section 5.8.4.1 534 $value .= $this->termstr($input); 535 break; 536 537 case XML_WBXML_GLOBAL_TOKEN_STR_T: 538 // Section 5.8.4.1 539 $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 540 break; 541 542 case XML_WBXML_GLOBAL_TOKEN_OPAQUE: 543 // Section 5.8.4.6 544 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 545 $b = substr($input, $this->_strpos, $this->_strpos + $size); 546 $this->_strpos += $size; 547 548 $value .= $b; 549 break; 550 551 case XML_WBXML_GLOBAL_TOKEN_END: 552 // Section 5.8.4.7.1 553 $hasMoreAttributes = false; 554 if (isset($attr)) { 555 $attrs[] = array('attribute' => $attr, 556 'value' => $value); 557 } 558 break; 559 560 case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE: 561 // Section 5.8.4.7.2 562 $codePage = $this->getByte($input); 563 if (!$this->_prevAttributeDTD) { 564 $this->_prevAttributeDTD = $this->_attributeDTD; 565 } 566 567 $this->switchAttributeCodePage($codePage); 568 break; 569 570 default: 571 if ($token > 128) { 572 if (isset($attr)) { 573 $attrs[] = array('attribute' => $attr, 574 'value' => $value); 575 } 576 $attr = $this->_attributeDTD->toAttribute($token); 577 } else { 578 // Value. 579 $value .= $this->_attributeDTD->toAttribute($token); 580 } 581 break; 582 } 583 } 584 585 if (!$this->_prevAttributeDTD) { 586 $this->_attributeDTD = $this->_prevAttributeDTD; 587 $this->_prevAttributeDTD = false; 588 } 589 590 $this->stopGetAttributes(); 591 } 592 593 function startGetAttributes() 594 { 595 $this->_isAttribute = true; 596 } 597 598 function stopGetAttributes() 599 { 600 $this->_isAttribute = false; 601 } 602 603 function getCurrentURI() 604 { 605 if ($this->_isAttribute) { 606 return $this->_tagDTD->getURI(); 607 } else { 608 return $this->_attributeDTD->getURI(); 609 } 610 } 611 612 function writeString($str) 613 { 614 $this->_ch->characters($str); 615 } 616 617 function getTag($tag) 618 { 619 // Should know which state it is in. 620 return $this->_tagDTD->toTagStr($tag); 621 } 622 623 function getAttribute($attribute) 624 { 625 // Should know which state it is in. 626 $this->_attributeDTD->toAttributeInt($attribute); 627 } 628 629 function switchElementCodePage($codePage) 630 { 631 $this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage)); 632 $this->switchAttributeCodePage($codePage); 633 } 634 635 function switchAttributeCodePage($codePage) 636 { 637 $this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage)); 638 } 639 640 /** 641 * Return the hex version of the base 10 $entity. 642 */ 643 function entity($entity) 644 { 645 return dechex($entity); 646 } 647 648 /** 649 * Reads a null terminated string. 650 */ 651 function termstr($input) 652 { 653 $str = '#'; // must start with nonempty string to allow array access 654 $i = 0; 655 $ch = $input[$this->_strpos++]; 656 if (ord($ch) == 0) { 657 return ''; // don't return '#' 658 } 659 while (ord($ch) != 0) { 660 $str[$i++] = $ch; 661 $ch = $input[$this->_strpos++]; 662 } 663 664 return $str; 665 } 666 667 } 668
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
| Généré le : Sun Feb 25 17:20:01 2007 | par Balluche grâce à PHPXref 0.7 |