[ Index ] |
|
Code source de Horde 3.1.3 |
1 <?php 2 3 include_once 'XML/WBXML.php'; 4 include_once 'XML/WBXML/DTDManager.php'; 5 include_once 'XML/WBXML/ContentHandler.php'; 6 7 /** 8 * $Horde: framework/XML_WBXML/WBXML/Decoder.php,v 1.22.10.8 2006/01/01 21:28:45 jan Exp $ 9 * 10 * Copyright 2003-2006 Anthony Mills <amills@pyramid6.com> 11 * 12 * See the enclosed file COPYING for license information (LGPL). If you 13 * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html. 14 * 15 * From Binary XML Content Format Specification Version 1.3, 25 July 16 * 2001 found at http://www.wapforum.org 17 * 18 * @package XML_WBXML 19 */ 20 class XML_WBXML_Decoder extends XML_WBXML_ContentHandler { 21 22 /** 23 * Document Public Identifier type 24 * 1 mb_u_int32 well known type 25 * 2 string table 26 * from spec but converted into a string. 27 * 28 * Document Public Identifier 29 * Used with dpiType. 30 */ 31 var $_dpi; 32 33 /** 34 * String table as defined in 5.7 35 */ 36 var $_stringTable = array(); 37 38 /** 39 * Content handler. 40 * Currently just outputs raw XML. 41 */ 42 var $_ch; 43 44 var $_tagDTD; 45 46 var $_prevAttributeDTD; 47 48 var $_attributeDTD; 49 50 /** 51 * State variables. 52 */ 53 var $_tagStack = array(); 54 var $_isAttribute; 55 var $_isData = false; 56 57 var $_error = false; 58 59 /** 60 * The DTD Manager. 61 * 62 * @var XML_WBXML_DTDManager 63 */ 64 var $_dtdManager; 65 66 /** 67 * The string position. 68 * 69 * @var integer 70 */ 71 var $_strpos; 72 73 /** 74 * Constructor. 75 */ 76 function XML_WBXML_Decoder() 77 { 78 $this->_dtdManager = &new XML_WBXML_DTDManager(); 79 } 80 81 /** 82 * Sets the contentHandler that will receive the output of the 83 * decoding. 84 * 85 * @param XML_WBXML_ContentHandler $ch The contentHandler 86 */ 87 function setContentHandler(&$ch) 88 { 89 $this->_ch = &$ch; 90 } 91 92 /** 93 * Return one byte from the input stream. 94 * 95 * @param string $input The WBXML input string. 96 */ 97 function getByte($input) 98 { 99 return ord($input{$this->_strpos++}); 100 } 101 102 /** 103 * Takes a WBXML input document and returns decoded XML. 104 * However the preferred and more effecient method is to 105 * use decode() rather than decodeToString() and have an 106 * appropriate contentHandler deal with the decoded data. 107 * 108 * @param string $wbxml The WBXML document to decode. 109 * 110 * @return string The decoded XML document. 111 */ 112 function decodeToString($wbxml) 113 { 114 $this->_ch = &new XML_WBXML_ContentHandler(); 115 116 $r = $this->decode($wbxml); 117 if (is_a($r, 'PEAR_Error')) { 118 return $r; 119 } 120 return $this->_ch->getOutput(); 121 } 122 123 /** 124 * Takes a WBXML input document and decodes it. 125 * Decoding result is directly passed to the contentHandler. 126 * A contenthandler must be set using setContentHandler 127 * prior to invocation of this method 128 * 129 * @param string $wbxml The WBXML document to decode. 130 * 131 * @return mixed True on success or PEAR_Error. 132 */ 133 function decode($wbxml) 134 { 135 $this->_error = false; // reset state 136 137 $this->_strpos = 0; 138 139 if (empty($this->_ch)) { 140 return $this->raiseError('No Contenthandler defined.'); 141 } 142 143 // Get Version Number from Section 5.4 144 // version = u_int8 145 // currently 1, 2 or 3 146 $this->_wbxmlVersion = $this->getVersionNumber($wbxml); 147 148 // Get Document Public Idetifier from Section 5.5 149 // publicid = mb_u_int32 | (zero index) 150 // zero = u_int8 151 // Containing the value zero (0) 152 // The actual DPI is determined after the String Table is read. 153 $dpiStruct = $this->getDocumentPublicIdentifier($wbxml); 154 155 // Get Charset from 5.6 156 // charset = mb_u_int32 157 $this->_charset = $this->getCharset($wbxml); 158 159 // Get String Table from 5.7 160 // strb1 = length *byte 161 $this->retrieveStringTable($wbxml); 162 163 // Get Document Public Idetifier from Section 5.5. 164 $this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'], 165 $dpiStruct['dpiNumber'], 166 $this->_stringTable); 167 168 // Now the real fun begins. 169 // From Sections 5.2 and 5.8 170 171 172 // Default content handler. 173 $this->_dtdManager = &new XML_WBXML_DTDManager(); 174 175 // Get the starting DTD. 176 $this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi); 177 178 if (!$this->_tagDTD) { 179 return $this->raiseError('No DTD found for ' 180 . $this->_dpi . '/' 181 . $dpiStruct['dpiNumber']); 182 } 183 184 $this->_attributeDTD = $this->_tagDTD; 185 186 while (empty($this->_error) && $this->_strpos < strlen($wbxml)) { 187 $this->_decode($wbxml); 188 } 189 if (!empty($this->_error)) { 190 return $this->_error; 191 } 192 return true; 193 } 194 195 function getVersionNumber($input) 196 { 197 return $this->getByte($input); 198 } 199 200 function getDocumentPublicIdentifier($input) 201 { 202 $i = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 203 if ($i == 0) { 204 return array('dpiType' => 2, 205 'dpiNumber' => $this->getByte($input)); 206 } else { 207 return array('dpiType' => 1, 208 'dpiNumber' => $i); 209 } 210 } 211 212 function getDocumentPublicIdentifierImpl($dpiType, $dpiNumber) 213 { 214 if ($dpiType == 1) { 215 return XML_WBXML::getDPIString($dpiNumber); 216 } else { 217 return $this->getStringTableEntry($dpiNumber); 218 } 219 } 220 221 /** 222 * Returns the character encoding. Only default character 223 * encodings from J2SE are supported. From 224 * http://www.iana.org/assignments/character-sets and 225 * http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html 226 */ 227 function getCharset($input) 228 { 229 $cs = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 230 return XML_WBXML::getCharsetString($cs); 231 } 232 233 /** 234 * Retrieves the string table. 235 * The string table consists of an mb_u_int32 length 236 * and then length bytes forming the table. 237 * References to the string table refer to the 238 * starting position of the (null terminated) 239 * string in this table. 240 */ 241 function retrieveStringTable($input) 242 { 243 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 244 $this->_stringTable = substr($input, $this->_strpos, $size); 245 $this->_strpos += $size; 246 // print "stringtable($size):" . $this->_stringTable ."\n"; 247 } 248 249 function getStringTableEntry($index) 250 { 251 if ($index >= strlen($this->_stringTable)) { 252 $this->_error = 253 $this->_ch->raiseError('Invalid offset ' . $index 254 . ' value encountered around position ' 255 . $this->_strpos 256 . '. Broken wbxml?'); 257 return ''; 258 } 259 260 // copy of method termstr but without modification of this->_strpos 261 262 $str = '#'; // must start with nonempty string to allow array access 263 264 $i = 0; 265 $ch = $this->_stringTable[$index++]; 266 if (ord($ch) == 0) { 267 return ''; // don't return '#' 268 } 269 270 while (ord($ch) != 0) { 271 $str[$i++] = $ch; 272 if ($index >= strlen($this->_stringTable)) { 273 break; 274 } 275 $ch = $this->_stringTable[$index++]; 276 } 277 // print "string table entry: $str\n"; 278 return $str; 279 280 } 281 282 function _decode($input) 283 { 284 $token = $this->getByte($input); 285 $str = ''; 286 287 // print "position: " . $this->_strpos . " token: " . $token . " str10: " . substr($input, $this->_strpos, 10) . "\n"; // @todo: remove debug output 288 289 switch ($token) { 290 case XML_WBXML_GLOBAL_TOKEN_STR_I: 291 // Section 5.8.4.1 292 $str = $this->termstr($input); 293 $this->_ch->characters($str); 294 // print "str:$str\n"; // @TODO Remove debug code 295 break; 296 297 case XML_WBXML_GLOBAL_TOKEN_STR_T: 298 // Section 5.8.4.1 299 $x = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 300 $str = $this->getStringTableEntry($x); 301 $this->_ch->characters($str); 302 break; 303 304 case XML_WBXML_GLOBAL_TOKEN_EXT_I_0: 305 case XML_WBXML_GLOBAL_TOKEN_EXT_I_1: 306 case XML_WBXML_GLOBAL_TOKEN_EXT_I_2: 307 // Section 5.8.4.2 308 $str = $this->termstr($input); 309 $this->_ch->characters($str); 310 break; 311 312 case XML_WBXML_GLOBAL_TOKEN_EXT_T_0: 313 case XML_WBXML_GLOBAL_TOKEN_EXT_T_1: 314 case XML_WBXML_GLOBAL_TOKEN_EXT_T_2: 315 // Section 5.8.4.2 316 $str = $this->getStringTableEnty(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 317 $this->_ch->characters($str); 318 break; 319 320 case XML_WBXML_GLOBAL_TOKEN_EXT_0: 321 case XML_WBXML_GLOBAL_TOKEN_EXT_1: 322 case XML_WBXML_GLOBAL_TOKEN_EXT_2: 323 // Section 5.8.4.2 324 $extension = $this->getByte($input); 325 $this->_ch->characters($extension); 326 break; 327 328 case XML_WBXML_GLOBAL_TOKEN_ENTITY: 329 // Section 5.8.4.3 330 // UCS-4 chracter encoding? 331 $entity = $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 332 333 $this->_ch->characters('&#' . $entity . ';'); 334 break; 335 336 case XML_WBXML_GLOBAL_TOKEN_PI: 337 // Section 5.8.4.4 338 // throw new IOException 339 // die("WBXML global token processing instruction(PI, " + token + ") is unsupported!\n"); 340 break; 341 342 case XML_WBXML_GLOBAL_TOKEN_LITERAL: 343 // Section 5.8.4.5 344 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 345 $this->parseTag($input, $str, false, false); 346 break; 347 348 case XML_WBXML_GLOBAL_TOKEN_LITERAL_A: 349 // Section 5.8.4.5 350 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 351 $this->parseTag($input, $str, true, false); 352 break; 353 354 case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC: 355 // Section 5.8.4.5 356 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 357 $this->parseTag($input, $string, true, true); 358 break; 359 360 case XML_WBXML_GLOBAL_TOKEN_LITERAL_C: 361 // Section 5.8.4.5 362 $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 363 $this->parseTag($input, $str, false, true); 364 break; 365 366 case XML_WBXML_GLOBAL_TOKEN_OPAQUE: 367 // Section 5.8.4.6 368 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 369 // print "opaque of size $size\n"; // @todo remove debug 370 $b = substr($input, $this->_strpos, $size); 371 $this->_strpos += $size; 372 373 // opaque data inside a <data> element may or may not be 374 // a nested wbxml document (for example devinf data). 375 // We find out by checking the first byte of the data: if it's 376 // 1, 2 or 3 we expect it to be the version number of a wbxml 377 // document and thus start a new wbxml decoder instance on it. 378 379 if ($this->_isData && ord($b) <= 10) { 380 $decoder = &new XML_WBXML_Decoder(true); 381 $decoder->setContentHandler($this->_ch); 382 $s = $decoder->decode($b); 383 // /* // @todo: FIXME currently we can't decode Nokia 384 // DevInf data. So ignore error for the time beeing. 385 if (is_a($s, 'PEAR_Error')) { 386 $this->_error = $s; 387 return; 388 } 389 // */ 390 // $this->_ch->characters($s); 391 } else { 392 /* normal opaque behaviour: just copy the raw data: */ 393 $this->_ch->characters( $b); 394 } 395 396 // old approach to deal with opaque data inside ContentHandler: 397 // FIXME Opaque is used by SYNCML. Opaque data that depends on the context 398 // if (contentHandler instanceof OpaqueContentHandler) { 399 // ((OpaqueContentHandler)contentHandler).opaque(b); 400 // } else { 401 // String str = new String(b, 0, size, charset); 402 // char[] chars = str.toCharArray(); 403 404 // contentHandler.characters(chars, 0, chars.length); 405 // } 406 407 break; 408 409 case XML_WBXML_GLOBAL_TOKEN_END: 410 // Section 5.8.4.7.1 411 $str = $this->endTag(); 412 break; 413 414 case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE: 415 // Section 5.8.4.7.2 416 $codePage = $this->getByte($input); 417 // print "switch to codepage $codePage\n"; // @todo: remove debug code 418 $this->switchElementCodePage($codePage); 419 break; 420 421 default: 422 // Section 5.8.2 423 // Section 5.8.3 424 $hasAttributes = (($token & 0x80) != 0); 425 $hasContent = (($token & 0x40) != 0); 426 $realToken = $token & 0x3F; 427 $str = $this->getTag($realToken); 428 429 // print "element:$str\n"; // @TODO Remove debug code 430 $this->parseTag($input, $str, $hasAttributes, $hasContent); 431 432 if ($realToken == 0x0f) { 433 // store if we're inside a Data tag. This may contain 434 // an additional enclosed wbxml document on which we have 435 // to run a seperate encoder 436 $this->_isData = true; 437 } else { 438 $this->_isData = false; 439 } 440 break; 441 } 442 } 443 444 function parseTag($input, $tag, $hasAttributes, $hasContent) 445 { 446 $attrs = array(); 447 if ($hasAttributes) { 448 $attrs = $this->getAttributes($input); 449 } 450 451 $this->_ch->startElement($this->getCurrentURI(), $tag, $attrs); 452 453 if ($hasContent) { 454 // FIXME I forgot what does this does. Not sure if this is 455 // right? 456 $this->_tagStack[] = $tag; 457 } else { 458 $this->_ch->endElement($this->getCurrentURI(), $tag); 459 } 460 } 461 462 function endTag() 463 { 464 if (count($this->_tagStack)) { 465 $tag = array_pop($this->_tagStack); 466 } else { 467 $tag = 'Unknown'; 468 } 469 470 $this->_ch->endElement($this->getCurrentURI(), $tag); 471 472 return $tag; 473 } 474 475 function getAttributes($input) 476 { 477 $this->startGetAttributes(); 478 $hasMoreAttributes = true; 479 480 $attrs = array(); 481 $attr = null; 482 $value = null; 483 $token = null; 484 485 while ($hasMoreAttributes) { 486 $token = $this->getByte($input); 487 488 switch ($token) { 489 // Attribute specified. 490 case XML_WBXML_GLOBAL_TOKEN_LITERAL: 491 // Section 5.8.4.5 492 if (isset($attr)) { 493 $attrs[] = array('attribute' => $attr, 494 'value' => $value); 495 } 496 497 $attr = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 498 break; 499 500 // Value specified. 501 case XML_WBXML_GLOBAL_TOKEN_EXT_I_0: 502 case XML_WBXML_GLOBAL_TOKEN_EXT_I_1: 503 case XML_WBXML_GLOBAL_TOKEN_EXT_I_2: 504 // Section 5.8.4.2 505 $value .= $this->termstr($input); 506 break; 507 508 case XML_WBXML_GLOBAL_TOKEN_EXT_T_0: 509 case XML_WBXML_GLOBAL_TOKEN_EXT_T_1: 510 case XML_WBXML_GLOBAL_TOKEN_EXT_T_2: 511 // Section 5.8.4.2 512 $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 513 break; 514 515 case XML_WBXML_GLOBAL_TOKEN_EXT_0: 516 case XML_WBXML_GLOBAL_TOKEN_EXT_1: 517 case XML_WBXML_GLOBAL_TOKEN_EXT_2: 518 // Section 5.8.4.2 519 $value .= $input[$this->_strpos++]; 520 break; 521 522 case XML_WBXML_GLOBAL_TOKEN_ENTITY: 523 // Section 5.8.4.3 524 $value .= $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 525 break; 526 527 case XML_WBXML_GLOBAL_TOKEN_STR_I: 528 // Section 5.8.4.1 529 $value .= $this->termstr($input); 530 break; 531 532 case XML_WBXML_GLOBAL_TOKEN_STR_T: 533 // Section 5.8.4.1 534 $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos)); 535 break; 536 537 case XML_WBXML_GLOBAL_TOKEN_OPAQUE: 538 // Section 5.8.4.6 539 $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos); 540 $b = substr($input, $this->_strpos, $this->_strpos + $size); 541 $this->_strpos += $size; 542 543 $value .= $b; 544 break; 545 546 case XML_WBXML_GLOBAL_TOKEN_END: 547 // Section 5.8.4.7.1 548 $hasMoreAttributes = false; 549 if (isset($attr)) { 550 $attrs[] = array('attribute' => $attr, 551 'value' => $value); 552 } 553 break; 554 555 case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE: 556 // Section 5.8.4.7.2 557 $codePage = $this->getByte($input); 558 if (!$this->_prevAttributeDTD) { 559 $this->_prevAttributeDTD = $this->_attributeDTD; 560 } 561 562 $this->switchAttributeCodePage($codePage); 563 break; 564 565 default: 566 if ($token > 128) { 567 if (isset($attr)) { 568 $attrs[] = array('attribute' => $attr, 569 'value' => $value); 570 } 571 $attr = $this->_attributeDTD->toAttribute($token); 572 } else { 573 // Value. 574 $value .= $this->_attributeDTD->toAttribute($token); 575 } 576 break; 577 } 578 } 579 580 if (!$this->_prevAttributeDTD) { 581 $this->_attributeDTD = $this->_prevAttributeDTD; 582 $this->_prevAttributeDTD = false; 583 } 584 585 $this->stopGetAttributes(); 586 } 587 588 function startGetAttributes() 589 { 590 $this->_isAttribute = true; 591 } 592 593 function stopGetAttributes() 594 { 595 $this->_isAttribute = false; 596 } 597 598 function getCurrentURI() 599 { 600 if ($this->_isAttribute) { 601 return $this->_tagDTD->getURI(); 602 } else { 603 return $this->_attributeDTD->getURI(); 604 } 605 } 606 607 function writeString($str) 608 { 609 $this->_ch->characters($str); 610 } 611 612 function getTag($tag) 613 { 614 // Should know which state it is in. 615 return $this->_tagDTD->toTagStr($tag); 616 } 617 618 function getAttribute($attribute) 619 { 620 // Should know which state it is in. 621 $this->_attributeDTD->toAttributeInt($attribute); 622 } 623 624 function switchElementCodePage($codePage) 625 { 626 $this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage)); 627 $this->switchAttributeCodePage($codePage); 628 } 629 630 function switchAttributeCodePage($codePage) 631 { 632 $this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage)); 633 } 634 635 /** 636 * Return the hex version of the base 10 $entity. 637 */ 638 function entity($entity) 639 { 640 return dechex($entity); 641 } 642 643 /** 644 * Reads a null terminated string. 645 */ 646 function termstr($input) 647 { 648 $str = '#'; // must start with nonempty string to allow array access 649 $i = 0; 650 $ch = $input[$this->_strpos++]; 651 if (ord($ch) == 0) { 652 return ''; // don't return '#' 653 } 654 while (ord($ch) != 0) { 655 $str[$i++] = $ch; 656 $ch = $input[$this->_strpos++]; 657 } 658 659 return $str; 660 } 661 662 } 663
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 18:01:28 2007 | par Balluche grâce à PHPXref 0.7 |