[ Index ]
 

Code source de Horde 3.1.3

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/lib/XML/WBXML/ -> Decoder.php (source)

   1  <?php
   2  
   3  include_once 'XML/WBXML.php';
   4  include_once 'XML/WBXML/DTDManager.php';
   5  include_once 'XML/WBXML/ContentHandler.php';
   6  
   7  /**
   8   * $Horde: framework/XML_WBXML/WBXML/Decoder.php,v 1.22.10.8 2006/01/01 21:28:45 jan Exp $
   9   *
  10   * Copyright 2003-2006 Anthony Mills <amills@pyramid6.com>
  11   *
  12   * See the enclosed file COPYING for license information (LGPL).  If you
  13   * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
  14   *
  15   * From Binary XML Content Format Specification Version 1.3, 25 July
  16   * 2001 found at http://www.wapforum.org
  17   *
  18   * @package XML_WBXML
  19   */
  20  class XML_WBXML_Decoder extends XML_WBXML_ContentHandler {
  21  
  22      /**
  23       * Document Public Identifier type
  24       * 1 mb_u_int32 well known type
  25       * 2 string table
  26       * from spec but converted into a string.
  27       *
  28       * Document Public Identifier
  29       * Used with dpiType.
  30       */
  31      var $_dpi;
  32  
  33      /**
  34       * String table as defined in 5.7
  35       */
  36      var $_stringTable = array();
  37  
  38      /**
  39       * Content handler.
  40       * Currently just outputs raw XML.
  41       */
  42      var $_ch;
  43  
  44      var $_tagDTD;
  45  
  46      var $_prevAttributeDTD;
  47  
  48      var $_attributeDTD;
  49  
  50      /**
  51       * State variables.
  52       */
  53      var $_tagStack = array();
  54      var $_isAttribute;
  55      var $_isData = false;
  56  
  57      var $_error = false;
  58  
  59      /**
  60       * The DTD Manager.
  61       *
  62       * @var XML_WBXML_DTDManager
  63       */
  64      var $_dtdManager;
  65  
  66      /**
  67       * The string position.
  68       *
  69       * @var integer
  70       */
  71      var $_strpos;
  72  
  73      /**
  74       * Constructor.
  75       */
  76      function XML_WBXML_Decoder()
  77      {
  78          $this->_dtdManager = &new XML_WBXML_DTDManager();
  79      }
  80  
  81      /**
  82       * Sets the contentHandler that will receive the output of the
  83       * decoding.
  84       *
  85       * @param XML_WBXML_ContentHandler $ch The contentHandler
  86       */
  87      function setContentHandler(&$ch)
  88      {
  89          $this->_ch = &$ch;
  90      }
  91  
  92      /**
  93       * Return one byte from the input stream.
  94       *
  95       * @param string $input  The WBXML input string.
  96       */
  97      function getByte($input)
  98      {
  99          return ord($input{$this->_strpos++});
 100      }
 101  
 102      /**
 103       * Takes a WBXML input document and returns decoded XML.
 104       * However the preferred and more effecient method is to
 105       * use decode() rather than decodeToString() and have an
 106       * appropriate contentHandler deal with the decoded data.
 107       *
 108       * @param string $wbxml  The WBXML document to decode.
 109       *
 110       * @return string  The decoded XML document.
 111       */
 112      function decodeToString($wbxml)
 113      {
 114          $this->_ch = &new XML_WBXML_ContentHandler();
 115  
 116          $r = $this->decode($wbxml);
 117          if (is_a($r, 'PEAR_Error')) {
 118              return $r;
 119          }
 120          return $this->_ch->getOutput();
 121      }
 122  
 123      /**
 124       * Takes a WBXML input document and decodes it.
 125       * Decoding result is directly passed to the contentHandler.
 126       * A contenthandler must be set using setContentHandler
 127       * prior to invocation of this method
 128       *
 129       * @param string $wbxml  The WBXML document to decode.
 130       *
 131       * @return mixed  True on success or PEAR_Error.
 132       */
 133      function decode($wbxml)
 134      {
 135          $this->_error = false; // reset state
 136  
 137          $this->_strpos = 0;
 138  
 139          if (empty($this->_ch)) {
 140              return $this->raiseError('No Contenthandler defined.');
 141          }
 142  
 143          // Get Version Number from Section 5.4
 144          // version = u_int8
 145          // currently 1, 2 or 3
 146          $this->_wbxmlVersion = $this->getVersionNumber($wbxml);
 147  
 148          // Get Document Public Idetifier from Section 5.5
 149          // publicid = mb_u_int32 | (zero index)
 150          // zero = u_int8
 151          // Containing the value zero (0)
 152          // The actual DPI is determined after the String Table is read.
 153          $dpiStruct = $this->getDocumentPublicIdentifier($wbxml);
 154  
 155          // Get Charset from 5.6
 156          // charset = mb_u_int32
 157          $this->_charset = $this->getCharset($wbxml);
 158  
 159          // Get String Table from 5.7
 160          // strb1 = length *byte
 161          $this->retrieveStringTable($wbxml);
 162  
 163          // Get Document Public Idetifier from Section 5.5.
 164          $this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'],
 165                                                               $dpiStruct['dpiNumber'],
 166                                                               $this->_stringTable);
 167  
 168          // Now the real fun begins.
 169          // From Sections 5.2 and 5.8
 170  
 171  
 172          // Default content handler.
 173          $this->_dtdManager = &new XML_WBXML_DTDManager();
 174  
 175          // Get the starting DTD.
 176          $this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi);
 177  
 178          if (!$this->_tagDTD) {
 179              return $this->raiseError('No DTD found for '
 180                               . $this->_dpi . '/'
 181                               . $dpiStruct['dpiNumber']);
 182          }
 183  
 184          $this->_attributeDTD = $this->_tagDTD;
 185  
 186          while (empty($this->_error) && $this->_strpos < strlen($wbxml)) {
 187              $this->_decode($wbxml);
 188          }
 189          if (!empty($this->_error)) {
 190              return $this->_error;
 191          }
 192          return true;
 193      }
 194  
 195      function getVersionNumber($input)
 196      {
 197          return $this->getByte($input);
 198      }
 199  
 200      function getDocumentPublicIdentifier($input)
 201      {
 202          $i = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 203          if ($i == 0) {
 204              return array('dpiType' => 2,
 205                           'dpiNumber' => $this->getByte($input));
 206          } else {
 207              return array('dpiType' => 1,
 208                           'dpiNumber' => $i);
 209          }
 210      }
 211  
 212      function getDocumentPublicIdentifierImpl($dpiType, $dpiNumber)
 213      {
 214          if ($dpiType == 1) {
 215              return XML_WBXML::getDPIString($dpiNumber);
 216          } else {
 217              return $this->getStringTableEntry($dpiNumber);
 218          }
 219      }
 220  
 221      /**
 222       * Returns the character encoding. Only default character
 223       * encodings from J2SE are supported.  From
 224       * http://www.iana.org/assignments/character-sets and
 225       * http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html
 226       */
 227      function getCharset($input)
 228      {
 229          $cs = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 230          return XML_WBXML::getCharsetString($cs);
 231      }
 232  
 233      /**
 234       * Retrieves the string table.
 235       * The string table consists of an mb_u_int32 length
 236       * and then length bytes forming the table.
 237       * References to the string table refer to the
 238       * starting position of the (null terminated)
 239       * string in this table.
 240       */
 241      function retrieveStringTable($input)
 242      {
 243          $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 244          $this->_stringTable = substr($input, $this->_strpos, $size);
 245          $this->_strpos += $size;
 246          // print "stringtable($size):" . $this->_stringTable ."\n";
 247      }
 248  
 249      function getStringTableEntry($index)
 250      {
 251          if ($index >= strlen($this->_stringTable)) {
 252              $this->_error =
 253                  $this->_ch->raiseError('Invalid offset ' . $index
 254                                       . ' value encountered around position '
 255                                       . $this->_strpos
 256                                       . '. Broken wbxml?');
 257              return '';
 258          }
 259  
 260          // copy of method termstr but without modification of this->_strpos
 261  
 262          $str = '#'; // must start with nonempty string to allow array access
 263  
 264          $i = 0;
 265          $ch = $this->_stringTable[$index++];
 266          if (ord($ch) == 0) {
 267              return ''; // don't return '#'
 268          }
 269  
 270          while (ord($ch) != 0) {
 271              $str[$i++] = $ch;
 272              if ($index >= strlen($this->_stringTable)) {
 273                  break;
 274              }
 275              $ch = $this->_stringTable[$index++];
 276          }
 277          // print "string table entry: $str\n";
 278          return $str;
 279  
 280      }
 281  
 282      function _decode($input)
 283      {
 284          $token = $this->getByte($input);
 285          $str = '';
 286  
 287          // print "position: " . $this->_strpos . " token: " . $token . " str10: " . substr($input, $this->_strpos, 10) . "\n"; // @todo: remove debug output
 288  
 289          switch ($token) {
 290          case XML_WBXML_GLOBAL_TOKEN_STR_I:
 291              // Section 5.8.4.1
 292              $str = $this->termstr($input);
 293              $this->_ch->characters($str);
 294              // print "str:$str\n"; // @TODO Remove debug code
 295              break;
 296  
 297          case XML_WBXML_GLOBAL_TOKEN_STR_T:
 298              // Section 5.8.4.1
 299              $x = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 300              $str = $this->getStringTableEntry($x);
 301              $this->_ch->characters($str);
 302              break;
 303  
 304          case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
 305          case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
 306          case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
 307              // Section 5.8.4.2
 308              $str = $this->termstr($input);
 309              $this->_ch->characters($str);
 310              break;
 311  
 312          case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
 313          case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
 314          case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
 315              // Section 5.8.4.2
 316              $str = $this->getStringTableEnty(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 317              $this->_ch->characters($str);
 318              break;
 319  
 320          case XML_WBXML_GLOBAL_TOKEN_EXT_0:
 321          case XML_WBXML_GLOBAL_TOKEN_EXT_1:
 322          case XML_WBXML_GLOBAL_TOKEN_EXT_2:
 323              // Section 5.8.4.2
 324              $extension = $this->getByte($input);
 325              $this->_ch->characters($extension);
 326              break;
 327  
 328          case XML_WBXML_GLOBAL_TOKEN_ENTITY:
 329              // Section 5.8.4.3
 330              // UCS-4 chracter encoding?
 331              $entity = $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 332  
 333              $this->_ch->characters('&#' . $entity . ';');
 334              break;
 335  
 336          case XML_WBXML_GLOBAL_TOKEN_PI:
 337              // Section 5.8.4.4
 338              // throw new IOException
 339              // die("WBXML global token processing instruction(PI, " + token + ") is unsupported!\n");
 340              break;
 341  
 342          case XML_WBXML_GLOBAL_TOKEN_LITERAL:
 343              // Section 5.8.4.5
 344              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 345              $this->parseTag($input, $str, false, false);
 346              break;
 347  
 348          case XML_WBXML_GLOBAL_TOKEN_LITERAL_A:
 349              // Section 5.8.4.5
 350              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 351              $this->parseTag($input, $str, true, false);
 352              break;
 353  
 354          case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC:
 355              // Section 5.8.4.5
 356              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 357              $this->parseTag($input, $string, true, true);
 358              break;
 359  
 360          case XML_WBXML_GLOBAL_TOKEN_LITERAL_C:
 361              // Section 5.8.4.5
 362              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 363              $this->parseTag($input, $str, false, true);
 364              break;
 365  
 366          case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
 367              // Section 5.8.4.6
 368              $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 369              // print "opaque of size $size\n"; // @todo remove debug
 370              $b = substr($input, $this->_strpos, $size);
 371              $this->_strpos += $size;
 372  
 373              // opaque data inside a <data> element may or may not be
 374              // a nested wbxml document (for example devinf data).
 375              // We find out by checking the first byte of the data: if it's
 376              // 1, 2 or 3 we expect it to be the version number of a wbxml
 377              // document and thus start a new wbxml decoder instance on it.
 378  
 379              if ($this->_isData && ord($b) <= 10) {
 380                  $decoder = &new XML_WBXML_Decoder(true);
 381                  $decoder->setContentHandler($this->_ch);
 382                  $s = $decoder->decode($b);
 383          //                /* // @todo: FIXME currently we can't decode Nokia
 384                  // DevInf data. So ignore error for the time beeing.
 385          if (is_a($s, 'PEAR_Error')) {
 386                      $this->_error = $s;
 387                      return;
 388                  }
 389                  // */
 390                  // $this->_ch->characters($s);
 391              } else {
 392                  /* normal opaque behaviour: just copy the raw data: */
 393                  $this->_ch->characters( $b);
 394              }
 395  
 396              // old approach to deal with opaque data inside ContentHandler:
 397              // FIXME Opaque is used by SYNCML.  Opaque data that depends on the context
 398              // if (contentHandler instanceof OpaqueContentHandler) {
 399              //     ((OpaqueContentHandler)contentHandler).opaque(b);
 400              // } else {
 401              //     String str = new String(b, 0, size, charset);
 402              //     char[] chars = str.toCharArray();
 403  
 404              //     contentHandler.characters(chars, 0, chars.length);
 405              // }
 406  
 407              break;
 408  
 409          case XML_WBXML_GLOBAL_TOKEN_END:
 410              // Section 5.8.4.7.1
 411              $str = $this->endTag();
 412              break;
 413  
 414          case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
 415              // Section 5.8.4.7.2
 416              $codePage = $this->getByte($input);
 417              // print "switch to codepage $codePage\n"; // @todo: remove debug code
 418              $this->switchElementCodePage($codePage);
 419              break;
 420  
 421          default:
 422              // Section 5.8.2
 423              // Section 5.8.3
 424              $hasAttributes = (($token & 0x80) != 0);
 425              $hasContent = (($token & 0x40) != 0);
 426              $realToken = $token & 0x3F;
 427              $str = $this->getTag($realToken);
 428  
 429              // print "element:$str\n"; // @TODO Remove debug code
 430              $this->parseTag($input, $str, $hasAttributes, $hasContent);
 431  
 432              if ($realToken == 0x0f) {
 433                  // store if we're inside a Data tag. This may contain
 434                  // an additional enclosed wbxml document on which we have
 435                  // to run a seperate encoder
 436                  $this->_isData = true;
 437              } else {
 438                  $this->_isData = false;
 439              }
 440              break;
 441          }
 442      }
 443  
 444      function parseTag($input, $tag, $hasAttributes, $hasContent)
 445      {
 446          $attrs = array();
 447          if ($hasAttributes) {
 448              $attrs = $this->getAttributes($input);
 449          }
 450  
 451          $this->_ch->startElement($this->getCurrentURI(), $tag, $attrs);
 452  
 453          if ($hasContent) {
 454              // FIXME I forgot what does this does. Not sure if this is
 455              // right?
 456              $this->_tagStack[] = $tag;
 457          } else {
 458              $this->_ch->endElement($this->getCurrentURI(), $tag);
 459          }
 460      }
 461  
 462      function endTag()
 463      {
 464          if (count($this->_tagStack)) {
 465              $tag = array_pop($this->_tagStack);
 466          } else {
 467              $tag = 'Unknown';
 468          }
 469  
 470          $this->_ch->endElement($this->getCurrentURI(), $tag);
 471  
 472          return $tag;
 473      }
 474  
 475      function getAttributes($input)
 476      {
 477          $this->startGetAttributes();
 478          $hasMoreAttributes = true;
 479  
 480          $attrs = array();
 481          $attr = null;
 482          $value = null;
 483          $token = null;
 484  
 485          while ($hasMoreAttributes) {
 486              $token = $this->getByte($input);
 487  
 488              switch ($token) {
 489              // Attribute specified.
 490              case XML_WBXML_GLOBAL_TOKEN_LITERAL:
 491                  // Section 5.8.4.5
 492                  if (isset($attr)) {
 493                      $attrs[] = array('attribute' => $attr,
 494                                       'value' => $value);
 495                  }
 496  
 497                  $attr = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 498                  break;
 499  
 500              // Value specified.
 501              case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
 502              case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
 503              case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
 504                  // Section 5.8.4.2
 505                  $value .= $this->termstr($input);
 506                  break;
 507  
 508              case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
 509              case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
 510              case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
 511                  // Section 5.8.4.2
 512                  $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 513                  break;
 514  
 515              case XML_WBXML_GLOBAL_TOKEN_EXT_0:
 516              case XML_WBXML_GLOBAL_TOKEN_EXT_1:
 517              case XML_WBXML_GLOBAL_TOKEN_EXT_2:
 518                  // Section 5.8.4.2
 519                  $value .= $input[$this->_strpos++];
 520                  break;
 521  
 522              case XML_WBXML_GLOBAL_TOKEN_ENTITY:
 523                  // Section 5.8.4.3
 524                  $value .= $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 525                  break;
 526  
 527              case XML_WBXML_GLOBAL_TOKEN_STR_I:
 528                  // Section 5.8.4.1
 529                  $value .= $this->termstr($input);
 530                  break;
 531  
 532              case XML_WBXML_GLOBAL_TOKEN_STR_T:
 533                  // Section 5.8.4.1
 534                  $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 535                  break;
 536  
 537              case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
 538                  // Section 5.8.4.6
 539                  $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 540                  $b = substr($input, $this->_strpos, $this->_strpos + $size);
 541                  $this->_strpos += $size;
 542  
 543                  $value .= $b;
 544                  break;
 545  
 546              case XML_WBXML_GLOBAL_TOKEN_END:
 547                  // Section 5.8.4.7.1
 548                  $hasMoreAttributes = false;
 549                  if (isset($attr)) {
 550                      $attrs[] = array('attribute' => $attr,
 551                                       'value' => $value);
 552                  }
 553                  break;
 554  
 555              case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
 556                  // Section 5.8.4.7.2
 557                  $codePage = $this->getByte($input);
 558                  if (!$this->_prevAttributeDTD) {
 559                      $this->_prevAttributeDTD = $this->_attributeDTD;
 560                  }
 561  
 562                  $this->switchAttributeCodePage($codePage);
 563                  break;
 564  
 565              default:
 566                  if ($token > 128) {
 567                      if (isset($attr)) {
 568                          $attrs[] = array('attribute' => $attr,
 569                                           'value' => $value);
 570                      }
 571                      $attr = $this->_attributeDTD->toAttribute($token);
 572                  } else {
 573                      // Value.
 574                      $value .= $this->_attributeDTD->toAttribute($token);
 575                  }
 576                  break;
 577              }
 578          }
 579  
 580          if (!$this->_prevAttributeDTD) {
 581              $this->_attributeDTD = $this->_prevAttributeDTD;
 582              $this->_prevAttributeDTD = false;
 583          }
 584  
 585          $this->stopGetAttributes();
 586      }
 587  
 588      function startGetAttributes()
 589      {
 590          $this->_isAttribute = true;
 591      }
 592  
 593      function stopGetAttributes()
 594      {
 595          $this->_isAttribute = false;
 596      }
 597  
 598      function getCurrentURI()
 599      {
 600          if ($this->_isAttribute) {
 601              return $this->_tagDTD->getURI();
 602          } else {
 603              return $this->_attributeDTD->getURI();
 604          }
 605      }
 606  
 607      function writeString($str)
 608      {
 609          $this->_ch->characters($str);
 610      }
 611  
 612      function getTag($tag)
 613      {
 614          // Should know which state it is in.
 615          return $this->_tagDTD->toTagStr($tag);
 616      }
 617  
 618      function getAttribute($attribute)
 619      {
 620          // Should know which state it is in.
 621          $this->_attributeDTD->toAttributeInt($attribute);
 622      }
 623  
 624      function switchElementCodePage($codePage)
 625      {
 626          $this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage));
 627          $this->switchAttributeCodePage($codePage);
 628      }
 629  
 630      function switchAttributeCodePage($codePage)
 631      {
 632          $this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage));
 633      }
 634  
 635      /**
 636       * Return the hex version of the base 10 $entity.
 637       */
 638      function entity($entity)
 639      {
 640          return dechex($entity);
 641      }
 642  
 643      /**
 644       * Reads a null terminated string.
 645       */
 646      function termstr($input)
 647      {
 648          $str = '#'; // must start with nonempty string to allow array access
 649          $i = 0;
 650          $ch = $input[$this->_strpos++];
 651          if (ord($ch) == 0) {
 652              return ''; // don't return '#'
 653          }
 654          while (ord($ch) != 0) {
 655              $str[$i++] = $ch;
 656              $ch = $input[$this->_strpos++];
 657          }
 658  
 659          return $str;
 660      }
 661  
 662  }
 663  


Généré le : Sun Feb 25 18:01:28 2007 par Balluche grâce à PHPXref 0.7