[ Index ]
 

Code source de eGroupWare 1.2.106-2

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/phpgwapi/inc/horde/XML/WBXML/ -> Decoder.php (source)

   1  <?php
   2  
   3  include_once 'XML/WBXML.php';
   4  include_once 'XML/WBXML/DTDManager.php';
   5  include_once 'XML/WBXML/ContentHandler.php';
   6  
   7  /**
   8   * $Horde: framework/XML_WBXML/WBXML/Decoder.php,v 1.36 2006/01/01 21:10:25 jan Exp $
   9   *
  10   * Copyright 2003-2006 Anthony Mills <amills@pyramid6.com>
  11   *
  12   * See the enclosed file COPYING for license information (LGPL).  If you
  13   * did not receive this file, see http://www.fsf.org/copyleft/lgpl.html.
  14   *
  15   * From Binary XML Content Format Specification Version 1.3, 25 July
  16   * 2001 found at http://www.wapforum.org
  17   *
  18   * @package XML_WBXML
  19   */
  20  class XML_WBXML_Decoder extends XML_WBXML_ContentHandler {
  21  
  22      /**
  23       * Document Public Identifier type
  24       * 1 mb_u_int32 well known type
  25       * 2 string table
  26       * from spec but converted into a string.
  27       *
  28       * Document Public Identifier
  29       * Used with dpiType.
  30       */
  31      var $_dpi;
  32  
  33      /**
  34       * String table as defined in 5.7
  35       */
  36      var $_stringTable = array();
  37  
  38      /**
  39       * Content handler.
  40       * Currently just outputs raw XML.
  41       */
  42      var $_ch;
  43  
  44      var $_tagDTD;
  45  
  46      var $_prevAttributeDTD;
  47  
  48      var $_attributeDTD;
  49  
  50      /**
  51       * State variables.
  52       */
  53      var $_tagStack = array();
  54      var $_isAttribute;
  55      var $_isData = false;
  56  
  57      var $_error = false;
  58  
  59      /**
  60       * The DTD Manager.
  61       *
  62       * @var XML_WBXML_DTDManager
  63       */
  64      var $_dtdManager;
  65  
  66      /**
  67       * The string position.
  68       *
  69       * @var integer
  70       */
  71      var $_strpos;
  72  
  73      /**
  74       * Constructor.
  75       */
  76      function XML_WBXML_Decoder()
  77      {
  78          $this->_dtdManager = &new XML_WBXML_DTDManager();
  79      }
  80  
  81      /**
  82       * Sets the contentHandler that will receive the output of the
  83       * decoding.
  84       *
  85       * @param XML_WBXML_ContentHandler $ch The contentHandler
  86       */
  87      function setContentHandler(&$ch) {
  88          $this->_ch = &$ch;
  89      }
  90      /**
  91       * Return one byte from the input stream.
  92       *
  93       * @param string $input  The WBXML input string.
  94       */
  95      function getByte($input)
  96      {
  97          $value =  $input{$this->_strpos++};
  98          $value =  ord($value);
  99          
 100          return $value;
 101      }
 102  
 103      /**
 104       * Takes a WBXML input document and returns decoded XML.
 105       * However the preferred and more effecient method is to
 106       * use decode() rather than decodeToString() and have an
 107       * appropriate contentHandler deal with the decoded data.
 108       *
 109       * @param string $wbxml  The WBXML document to decode.
 110       *
 111       * @return string  The decoded XML document.
 112       */
 113      function decodeToString($wbxml)
 114      {
 115          $this->_ch = &new XML_WBXML_ContentHandler();
 116  
 117          $r = $this->decode($wbxml);
 118          if (is_a($r, 'PEAR_Error')) {
 119              return $r;
 120          }
 121          return $this->_ch->getOutput();
 122      }
 123  
 124      /**
 125       * Takes a WBXML input document and decodes it.
 126       * Decoding result is directly passed to the contentHandler.
 127       * A contenthandler must be set using setContentHandler
 128       * prior to invocation of this method
 129       *
 130       * @param string $wbxml  The WBXML document to decode.
 131       *
 132       * @return mixed  True on success or PEAR_Error.
 133       */
 134      function decode($wbxml)
 135      {
 136      // fix for Nokia Series 60 which seem to send empty data block sometimes
 137      if(strlen($wbxml) == 0) {
 138              return true;
 139      }
 140      
 141          $this->_error = false; // reset state
 142          $this->_strpos = 0;
 143  
 144          if (empty($this->_ch)) {
 145              return $this->raiseError('No Contenthandler defined.');
 146          }
 147  
 148          // Get Version Number from Section 5.4
 149          // version = u_int8
 150          // currently 1, 2 or 3
 151          $this->_wbxmlVersion = $this->getVersionNumber($wbxml);
 152  
 153          // Get Document Public Idetifier from Section 5.5
 154          // publicid = mb_u_int32 | (zero index)
 155          // zero = u_int8
 156          // Containing the value zero (0)
 157          // The actual DPI is determined after the String Table is read.
 158          $dpiStruct = $this->getDocumentPublicIdentifier($wbxml);
 159          // Get Charset from 5.6
 160          // charset = mb_u_int32
 161          $this->_charset = $this->getCharset($wbxml);
 162  
 163          // Get String Table from 5.7
 164          // strb1 = length *byte
 165          $this->retrieveStringTable($wbxml);
 166  
 167          // Get Document Public Idetifier from Section 5.5.
 168          $this->_dpi = $this->getDocumentPublicIdentifierImpl($dpiStruct['dpiType'],
 169                                                               $dpiStruct['dpiNumber'],
 170                                                               $this->_stringTable);
 171  
 172          // Now the real fun begins.
 173          // From Sections 5.2 and 5.8
 174  
 175  
 176          // Default content handler.
 177          $this->_dtdManager = &new XML_WBXML_DTDManager();
 178  
 179          // Get the starting DTD.
 180          $this->_tagDTD = $this->_dtdManager->getInstance($this->_dpi);
 181  
 182          if (!$this->_tagDTD) {
 183              return $this->raiseError('No DTD found for ' 
 184                               . $this->_dpi . '/' 
 185                               . $dpiStruct['dpiNumber']);
 186          }
 187  
 188          $this->_attributeDTD = $this->_tagDTD;
 189  
 190          while (empty($this->_error) && $this->_strpos < strlen($wbxml)) {
 191              $this->_decode($wbxml);
 192          }
 193          if (!empty($this->_error)) {
 194              return $this->_error;
 195          }
 196          return true;
 197      }
 198  
 199      function getVersionNumber($input)
 200      {
 201          return $this->getByte($input);
 202      }
 203  
 204      function getDocumentPublicIdentifier($input)
 205      {
 206          $i = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 207          if ($i == 0) {
 208              return array('dpiType' => 2,
 209                           'dpiNumber' => $this->getByte($input));
 210          } else {
 211              return array('dpiType' => 1,
 212                           'dpiNumber' => $i);
 213          }
 214      }
 215  
 216      function getDocumentPublicIdentifierImpl($dpiType, $dpiNumber)
 217      {
 218          if ($dpiType == 1) {
 219              return XML_WBXML::getDPIString($dpiNumber);
 220          } else {
 221              return $this->getStringTableEntry($dpiNumber);
 222          }
 223      }
 224  
 225      /**
 226       * Returns the character encoding. Only default character
 227       * encodings from J2SE are supported.  From
 228       * http://www.iana.org/assignments/character-sets and
 229       * http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html
 230       */
 231      function getCharset($input)
 232      {
 233          $cs = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 234          return XML_WBXML::getCharsetString($cs);
 235      }
 236  
 237      /**
 238       * Retrieves the string table. 
 239       * The string table consists of an mb_u_int32 length
 240       * and then length bytes forming the table.
 241       * References to the string table refer to the
 242       * starting position of the (null terminated)
 243       * string in this table.
 244       */
 245      function retrieveStringTable($input)
 246      {
 247          $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 248          $this->_stringTable = substr($input, $this->_strpos, $size);
 249          $this->_strpos += $size;
 250          // print "stringtable($size):" . $this->_stringTable ."\n";
 251      }
 252  
 253      function getStringTableEntry($index)
 254      {
 255          if ($index >= strlen($this->_stringTable)) {
 256              $this->_error =
 257                  $this->_ch->raiseError('Invalid offset ' . $index
 258                                       . ' value encountered around position '
 259                                       . $this->_strpos
 260                                       . '. Broken wbxml?');
 261              return '';
 262          }
 263  
 264          // copy of method termstr but without modification of this->_strpos
 265  
 266          $str = '#'; // must start with nonempty string to allow array access
 267  
 268          $i = 0;
 269          $ch = $this->_stringTable[$index++];
 270          if (ord($ch) == 0) {
 271              return ''; // don't return '#'
 272          }
 273   
 274          while (ord($ch) != 0) {
 275              $str[$i++] = $ch;
 276              if ($index >= strlen($this->_stringTable)) {
 277                  break;    
 278              }
 279              $ch = $this->_stringTable[$index++];
 280          }
 281          // print "string table entry: $str\n";
 282          return $str;
 283          
 284      }
 285  
 286      function _decode($input)
 287      {
 288          $token = $this->getByte($input);
 289          $str = '';
 290  
 291          #print "position: " . $this->_strpos . " token: " . $token . " str10: " . substr($input, $this->_strpos, 10) . "\n"; // @todo: remove debug output
 292  
 293          switch ($token) {
 294          case XML_WBXML_GLOBAL_TOKEN_STR_I:
 295              // Section 5.8.4.1
 296              $str = $this->termstr($input);
 297              $this->_ch->characters($str);
 298              // print "str:$str\n"; // @TODO Remove debug code
 299              break;
 300  
 301          case XML_WBXML_GLOBAL_TOKEN_STR_T:
 302              // Section 5.8.4.1
 303              $x = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 304              $str = $this->getStringTableEntry($x);
 305              $this->_ch->characters($str);
 306              break;
 307  
 308          case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
 309          case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
 310          case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
 311              // Section 5.8.4.2
 312              $str = $this->termstr($input);
 313              $this->_ch->characters($str);
 314              break;
 315  
 316          case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
 317          case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
 318          case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
 319              // Section 5.8.4.2
 320              $str = $this->getStringTableEnty(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 321              $this->_ch->characters($str);
 322              break;
 323  
 324          case XML_WBXML_GLOBAL_TOKEN_EXT_0:
 325          case XML_WBXML_GLOBAL_TOKEN_EXT_1:
 326          case XML_WBXML_GLOBAL_TOKEN_EXT_2:
 327              // Section 5.8.4.2
 328              $extension = $this->getByte($input);
 329              $this->_ch->characters($extension);
 330              break;
 331  
 332          case XML_WBXML_GLOBAL_TOKEN_ENTITY:
 333              // Section 5.8.4.3
 334              // UCS-4 chracter encoding?
 335              $entity = $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 336  
 337              $this->_ch->characters('&#' . $entity . ';');
 338              break;
 339  
 340          case XML_WBXML_GLOBAL_TOKEN_PI:
 341              // Section 5.8.4.4
 342              // throw new IOException
 343              // die("WBXML global token processing instruction(PI, " + token + ") is unsupported!\n");
 344              break;
 345  
 346          case XML_WBXML_GLOBAL_TOKEN_LITERAL:
 347              // Section 5.8.4.5
 348              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 349              $this->parseTag($input, $str, false, false);
 350              break;
 351  
 352          case XML_WBXML_GLOBAL_TOKEN_LITERAL_A:
 353              // Section 5.8.4.5
 354              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 355              $this->parseTag($input, $str, true, false);
 356              break;
 357  
 358          case XML_WBXML_GLOBAL_TOKEN_LITERAL_AC:
 359              // Section 5.8.4.5
 360              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 361              $this->parseTag($input, $string, true, true);
 362              break;
 363  
 364          case XML_WBXML_GLOBAL_TOKEN_LITERAL_C:
 365              // Section 5.8.4.5
 366              $str = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 367              $this->parseTag($input, $str, false, true);
 368              break;
 369  
 370          case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
 371              // Section 5.8.4.6
 372              $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 373              // print "opaque of size $size\n"; // @todo remove debug
 374              $b = substr($input, $this->_strpos, $size);
 375              #$b = mb_substr($input, $this->_strpos, $size, 'ISO-8859-1');
 376              $this->_strpos += $size;
 377  
 378              // opaque data inside a <data> element may or may not be
 379              // a nested wbxml document (for example devinf data).
 380              // We find out by checking the first byte of the data: if it's
 381              // 1, 2 or 3 we expect it to be the version number of a wbxml
 382              // document and thus start a new wbxml decoder instance on it.
 383  
 384              if ($this->_isData && ord($b) <= 10) {
 385                  $decoder = &new XML_WBXML_Decoder(true);
 386                  $decoder->setContentHandler($this->_ch);
 387                  $s = $decoder->decode($b);
 388          //                /* // @todo: FIXME currently we can't decode Nokia
 389                  // DevInf data. So ignore error for the time beeing.
 390          if (is_a($s, 'PEAR_Error')) {
 391                      $this->_error = $s;
 392                      return;
 393                  }
 394                  // */
 395                  // $this->_ch->characters($s);
 396              } else {
 397                  /* normal opaque behaviour: just copy the raw data: */
 398                  $this->_ch->characters( $b);
 399              }
 400  
 401              // old approach to deal with opaque data inside ContentHandler:
 402              // FIXME Opaque is used by SYNCML.  Opaque data that depends on the context
 403              // if (contentHandler instanceof OpaqueContentHandler) {
 404              //     ((OpaqueContentHandler)contentHandler).opaque(b);
 405              // } else {
 406              //     String str = new String(b, 0, size, charset);
 407              //     char[] chars = str.toCharArray();
 408  
 409              //     contentHandler.characters(chars, 0, chars.length);
 410              // }
 411  
 412              break;
 413  
 414          case XML_WBXML_GLOBAL_TOKEN_END:
 415              // Section 5.8.4.7.1
 416              $str = $this->endTag();
 417              break;
 418  
 419          case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
 420              // Section 5.8.4.7.2
 421              $codePage = $this->getByte($input);
 422              // print "switch to codepage $codePage\n"; // @todo: remove debug code
 423              $this->switchElementCodePage($codePage);
 424              break;
 425  
 426          default:
 427              // Section 5.8.2
 428              // Section 5.8.3
 429              $hasAttributes = (($token & 0x80) != 0);
 430              $hasContent = (($token & 0x40) != 0);
 431              $realToken = $token & 0x3F;
 432              $str = $this->getTag($realToken);
 433  
 434              // print "element:$str\n"; // @TODO Remove debug code
 435              $this->parseTag($input, $str, $hasAttributes, $hasContent);
 436  
 437              if ($realToken == 0x0f) {
 438                  // store if we're inside a Data tag. This may contain
 439                  // an additional enclosed wbxml document on which we have
 440                  // to run a seperate encoder
 441                  $this->_isData = true;
 442              } else {
 443                  $this->_isData = false;
 444              }
 445              break;
 446          }
 447      }
 448  
 449      function parseTag($input, $tag, $hasAttributes, $hasContent)
 450      {
 451          $attrs = array();
 452          if ($hasAttributes) {
 453              $attrs = $this->getAttributes($input);
 454          }
 455  
 456          $this->_ch->startElement($this->getCurrentURI(), $tag, $attrs);
 457  
 458          if ($hasContent) {
 459              // FIXME I forgot what does this does. Not sure if this is
 460              // right?
 461              $this->_tagStack[] = $tag;
 462          } else {
 463              $this->_ch->endElement($this->getCurrentURI(), $tag);
 464          }
 465      }
 466  
 467      function endTag()
 468      {
 469          if (count($this->_tagStack)) {
 470              $tag = array_pop($this->_tagStack);
 471          } else {
 472              $tag = 'Unknown';
 473          }
 474  
 475          $this->_ch->endElement($this->getCurrentURI(), $tag);
 476  
 477          return $tag;
 478      }
 479  
 480      function getAttributes($input)
 481      {
 482          $this->startGetAttributes();
 483          $hasMoreAttributes = true;
 484  
 485          $attrs = array();
 486          $attr = null;
 487          $value = null;
 488          $token = null;
 489  
 490          while ($hasMoreAttributes) {
 491              $token = $this->getByte($input);
 492  
 493              switch ($token) {
 494              // Attribute specified.
 495              case XML_WBXML_GLOBAL_TOKEN_LITERAL:
 496                  // Section 5.8.4.5
 497                  if (isset($attr)) {
 498                      $attrs[] = array('attribute' => $attr,
 499                                       'value' => $value);
 500                  }
 501  
 502                  $attr = $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 503                  break;
 504  
 505              // Value specified.
 506              case XML_WBXML_GLOBAL_TOKEN_EXT_I_0:
 507              case XML_WBXML_GLOBAL_TOKEN_EXT_I_1:
 508              case XML_WBXML_GLOBAL_TOKEN_EXT_I_2:
 509                  // Section 5.8.4.2
 510                  $value .= $this->termstr($input);
 511                  break;
 512  
 513              case XML_WBXML_GLOBAL_TOKEN_EXT_T_0:
 514              case XML_WBXML_GLOBAL_TOKEN_EXT_T_1:
 515              case XML_WBXML_GLOBAL_TOKEN_EXT_T_2:
 516                  // Section 5.8.4.2
 517                  $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 518                  break;
 519  
 520              case XML_WBXML_GLOBAL_TOKEN_EXT_0:
 521              case XML_WBXML_GLOBAL_TOKEN_EXT_1:
 522              case XML_WBXML_GLOBAL_TOKEN_EXT_2:
 523                  // Section 5.8.4.2
 524                  $value .= $input[$this->_strpos++];
 525                  break;
 526  
 527              case XML_WBXML_GLOBAL_TOKEN_ENTITY:
 528                  // Section 5.8.4.3
 529                  $value .= $this->entity(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 530                  break;
 531  
 532              case XML_WBXML_GLOBAL_TOKEN_STR_I:
 533                  // Section 5.8.4.1
 534                  $value .= $this->termstr($input);
 535                  break;
 536  
 537              case XML_WBXML_GLOBAL_TOKEN_STR_T:
 538                  // Section 5.8.4.1
 539                  $value .= $this->getStringTableEntry(XML_WBXML::MBUInt32ToInt($input, $this->_strpos));
 540                  break;
 541  
 542              case XML_WBXML_GLOBAL_TOKEN_OPAQUE:
 543                  // Section 5.8.4.6
 544                  $size = XML_WBXML::MBUInt32ToInt($input, $this->_strpos);
 545                  $b = substr($input, $this->_strpos, $this->_strpos + $size);
 546                  $this->_strpos += $size;
 547  
 548                  $value .= $b;
 549                  break;
 550  
 551              case XML_WBXML_GLOBAL_TOKEN_END:
 552                  // Section 5.8.4.7.1
 553                  $hasMoreAttributes = false;
 554                  if (isset($attr)) {
 555                      $attrs[] = array('attribute' => $attr,
 556                                       'value' => $value);
 557                  }
 558                  break;
 559  
 560              case XML_WBXML_GLOBAL_TOKEN_SWITCH_PAGE:
 561                  // Section 5.8.4.7.2
 562                  $codePage = $this->getByte($input);
 563                  if (!$this->_prevAttributeDTD) {
 564                      $this->_prevAttributeDTD = $this->_attributeDTD;
 565                  }
 566  
 567                  $this->switchAttributeCodePage($codePage);
 568                  break;
 569  
 570              default:
 571                  if ($token > 128) {
 572                      if (isset($attr)) {
 573                          $attrs[] = array('attribute' => $attr,
 574                                           'value' => $value);
 575                      }
 576                      $attr = $this->_attributeDTD->toAttribute($token);
 577                  } else {
 578                      // Value.
 579                      $value .= $this->_attributeDTD->toAttribute($token);
 580                  }
 581                  break;
 582              }
 583          }
 584  
 585          if (!$this->_prevAttributeDTD) {
 586              $this->_attributeDTD = $this->_prevAttributeDTD;
 587              $this->_prevAttributeDTD = false;
 588          }
 589  
 590          $this->stopGetAttributes();
 591      }
 592  
 593      function startGetAttributes()
 594      {
 595          $this->_isAttribute = true;
 596      }
 597  
 598      function stopGetAttributes()
 599      {
 600          $this->_isAttribute = false;
 601      }
 602  
 603      function getCurrentURI()
 604      {
 605          if ($this->_isAttribute) {
 606              return $this->_tagDTD->getURI();
 607          } else {
 608              return $this->_attributeDTD->getURI();
 609          }
 610      }
 611  
 612      function writeString($str)
 613      {
 614          $this->_ch->characters($str);
 615      }
 616  
 617      function getTag($tag)
 618      {
 619          // Should know which state it is in.
 620          return $this->_tagDTD->toTagStr($tag);
 621      }
 622  
 623      function getAttribute($attribute)
 624      {
 625          // Should know which state it is in.
 626          $this->_attributeDTD->toAttributeInt($attribute);
 627      }
 628  
 629      function switchElementCodePage($codePage)
 630      {
 631          $this->_tagDTD = &$this->_dtdManager->getInstance($this->_tagDTD->toCodePageStr($codePage));
 632          $this->switchAttributeCodePage($codePage);
 633      }
 634  
 635      function switchAttributeCodePage($codePage)
 636      {
 637          $this->_attributeDTD = &$this->_dtdManager->getInstance($this->_attributeDTD->toCodePageStr($codePage));
 638      }
 639  
 640      /**
 641       * Return the hex version of the base 10 $entity.
 642       */
 643      function entity($entity)
 644      {
 645          return dechex($entity);
 646      }
 647  
 648      /**
 649       * Reads a null terminated string.
 650       */
 651      function termstr($input)
 652      {
 653          $str = '#'; // must start with nonempty string to allow array access 
 654          $i = 0;
 655          $ch = $input[$this->_strpos++];
 656          if (ord($ch) == 0) {
 657              return ''; // don't return '#'
 658          }
 659          while (ord($ch) != 0) {
 660              $str[$i++] = $ch;
 661              $ch = $input[$this->_strpos++];
 662          }
 663  
 664          return $str;
 665      }
 666  
 667  }
 668  


Généré le : Sun Feb 25 17:20:01 2007 par Balluche grâce à PHPXref 0.7