[ Index ]
 

Code source de eZ Publish 3.9.0

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/lib/ezxml/classes/ -> ezxml.php (source)

   1  <?php
   2  //
   3  // $Id$
   4  //
   5  // Definition of eZXML class
   6  //
   7  // Created on: <13-Feb-2002 09:15:42 bf>
   8  //
   9  // SOFTWARE NAME: eZ publish
  10  // SOFTWARE RELEASE: 3.9.0
  11  // BUILD VERSION: 17785
  12  // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS
  13  // SOFTWARE LICENSE: GNU General Public License v2.0
  14  // NOTICE: >
  15  //   This program is free software; you can redistribute it and/or
  16  //   modify it under the terms of version 2.0  of the GNU General
  17  //   Public License as published by the Free Software Foundation.
  18  //
  19  //   This program is distributed in the hope that it will be useful,
  20  //   but WITHOUT ANY WARRANTY; without even the implied warranty of
  21  //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  22  //   GNU General Public License for more details.
  23  //
  24  //   You should have received a copy of version 2.0 of the GNU General
  25  //   Public License along with this program; if not, write to the Free
  26  //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  27  //   MA 02110-1301, USA.
  28  //
  29  //
  30  
  31  /*! \file ezxml.php
  32    XML DOM parser.
  33  */
  34  
  35  /*! \defgroup eZXML XML parser and DOM library */
  36  
  37  /*!
  38    \class eZXML ezxml.php
  39    \ingroup eZXML
  40    \brief eZXML handles parsing of well formed XML documents.
  41  
  42  1  eZXML will create a DOM tree from well formed XML documents.
  43  
  44   \sa eZDOMDocument eZDOMNode
  45  */
  46  
  47  include_once ( "lib/ezutils/classes/ezdebug.php" );
  48  include_once ( "lib/ezxml/classes/ezdomnode.php" );
  49  include_once ( "lib/ezxml/classes/ezdomdocument.php" );
  50  
  51  define( "EZ_NODE_TYPE_ELEMENT", 1 );
  52  define( "EZ_NODE_TYPE_ATTRIBUTE", 2 );
  53  define( "EZ_NODE_TYPE_TEXT", 3 );
  54  define( "EZ_NODE_TYPE_CDATASECTION", 4 );
  55  
  56  class eZXML
  57  {
  58      /*!
  59        Constructor
  60      */
  61      function eZXML( )
  62      {
  63  
  64      }
  65  
  66      /*!
  67        Will return a DOM object tree from the well formed XML.
  68  
  69        $params["SetParentNode"] = false/true : create eZDOMDocument with setParentNode parameter set to true or false.
  70        $params["TrimWhiteSpace"] = false/true : should the XML parser ignore whitespaces between tags.
  71        $params["CharsetConversion"] = false/true : Whether charset conversion is done or not, default is true.
  72        $params["ConvertSpecialChars"] = false/true: whether to convert &lt; &gt; &amp; etc into < > &; default is true.
  73      */
  74      function &domTree( $xmlDoc, $params = array(), $native = false )
  75      {
  76          /* We remove all control chars from the text, although they
  77           * should have not be there in the first place. This is
  78           * iso-8859-1 and UTF-8 safe. Those characters might also never exist
  79           * in an XML document in the first place
  80           * (http://w3.org/TR/2004/REC-xml-20040204/#NT-Char) so it's safe to
  81           * remove them */
  82          $xmlDoc = preg_replace('/[\x00-\x08\x0b-\x0c\x0e-\x1f]/', '', $xmlDoc);
  83  
  84          if ( $native and function_exists( 'domxml_open_mem' ) )
  85          {
  86              $domDocument = domxml_open_mem( $xmlDoc );
  87              return $domDocument;
  88          }
  89  
  90          if ( !isset( $params["TrimWhiteSpace"] ) )
  91              $params["TrimWhiteSpace"] = true;
  92  
  93          if ( !isset( $params["SetParentNode"] ) )
  94              $params["SetParentNode"] = false;
  95  
  96          $schema = false;
  97          if ( isset( $params["Schema"] ) && get_class( $params["Schema"]  ) == "ezschema" )
  98          {
  99              $schema = $params["Schema"];
 100          }
 101          $charset = 'UTF-8';
 102          if ( isset( $params['CharsetConversion'] ) and
 103               !$params['CharsetConversion'] )
 104              $charset = false;
 105          if ( !isset( $params['ConvertSpecialChars'] ) )
 106          {
 107              $params['ConvertSpecialChars'] = true;
 108          }
 109  
 110          $TagStack = array();
 111  
 112          $xmlAttributes = array();
 113  
 114          // strip header
 115          if ( preg_match( "#<\?xml(.*?)\?>#", $xmlDoc, $matches ) )
 116          {
 117              $xmlAttributeText = $matches[1];
 118              $xmlAttributes = $this->parseAttributes( $xmlAttributeText );
 119              for ( $i = 0; $i < count( $xmlAttributes ); ++$i )
 120              {
 121                  $xmlAttribute =& $xmlAttributes[$i];
 122                  if ( $xmlAttribute->name() == 'encoding' )
 123                      $charset = $xmlAttribute->content();
 124                  // This is required due to a bug in an old xml parser
 125                  else if ( $xmlAttribute->name() == 'charset' )
 126                      $charset = $xmlAttribute->content();
 127              }
 128          }
 129          else if ( !preg_match( "#<[a-zA-Z0-9_-]+>#", $xmlDoc ) )
 130          {
 131              $retVal = null;
 132              return $retVal;
 133          }
 134          if ( $charset !== false )
 135          {
 136              include_once ( 'lib/ezi18n/classes/eztextcodec.php' );
 137              $codec =& eZTextCodec::instance( $charset, false, false );
 138              if ( $codec )
 139              {
 140                  $xmlDoc = $codec->convertString( $xmlDoc );
 141              }
 142          }
 143  
 144          $xmlDoc = preg_replace( "#<\?.*?\?>#", "", $xmlDoc );
 145  
 146          // get document version
 147          $xmlDoc = preg_replace( "%<\!DOCTYPE.*?>%is", "", $xmlDoc );
 148  
 149          // convert all newline types to unix newlines
 150          $xmlDoc = preg_replace( "#\n|\r\n|\r#", "\n", $xmlDoc );
 151  
 152          // strip comments
 153          $xmlDoc = $this->stripComments( $xmlDoc );
 154  
 155          // libxml compatible object creation
 156          $domDocument = new eZDOMDocument( '', $params["SetParentNode"] );
 157  
 158          $this->DOMDocument =& $domDocument;
 159          $currentNode =& $domDocument;
 160  
 161          $defaultNamespace = "";
 162  
 163          $pos = 0;
 164          $endTagPos = 0;
 165          while ( $pos < strlen( $xmlDoc ) )
 166          {
 167              $char = $xmlDoc[$pos];
 168              if ( $char == "<" )
 169              {
 170                  // find tag name
 171                  $endTagPos = strpos( $xmlDoc, ">", $pos );
 172  
 173                  // tag name with attributes
 174                  $tagName = substr( $xmlDoc, $pos + 1, $endTagPos - ( $pos + 1 ) );
 175  
 176                  // check if it's an endtag </tagname>
 177                  if ( $tagName[0] == "/" )
 178                  {
 179                      $lastNodeArray = array_pop( $TagStack );
 180                      $lastTag = $lastNodeArray["TagName"];
 181  
 182                      $lastNode =& $lastNodeArray["ParentNodeObject"];
 183  
 184                      unset( $currentNode );
 185                      $currentNode =& $lastNode;
 186  
 187                      $tagName = substr( $tagName, 1, strlen( $tagName ) );
 188  
 189                      // strip out namespace; nameSpace:Name
 190                      $colonPos = strpos( $tagName, ":" );
 191  
 192                      if ( $colonPos > 0 )
 193                          $tagName = substr( $tagName, $colonPos + 1, strlen( $tagName ) );
 194  
 195                      if ( $lastTag != $tagName )
 196                      {
 197                          eZDebug::writeError( "Error parsing XML, unmatched tags $tagName" );
 198                          $retVal = false;
 199                          return $retVal;
 200                      }
 201                      else
 202                      {
 203                          //    print( "endtag name: $tagName ending: $lastTag <br> " );
 204                      }
 205                  }
 206                  else
 207                  {
 208                      $firstSpaceEnd = strpos( $tagName, " " );
 209                      $firstNewlineEnd = strpos( $tagName, "\n" );
 210  
 211                      if ( $firstNewlineEnd != false )
 212                      {
 213                          if ( $firstSpaceEnd != false )
 214                          {
 215                              $tagNameEnd = min( $firstSpaceEnd, $firstNewlineEnd );
 216                          }
 217                          else
 218                          {
 219                              $tagNameEnd = $firstNewlineEnd;
 220                          }
 221                      }
 222                      else
 223                      {
 224                          if ( $firstSpaceEnd != false )
 225                          {
 226                              $tagNameEnd = $firstSpaceEnd;
 227                          }
 228                          else
 229                          {
 230                              $tagNameEnd = 0;
 231                          }
 232                      }
 233  
 234                      if ( $tagNameEnd > 0 )
 235                      {
 236                          $justName = substr( $tagName, 0, $tagNameEnd );
 237                      }
 238                      else
 239                          $justName = $tagName;
 240  
 241  
 242                      // strip out the namespace prefix
 243                      // If $justname contains ![CDATA[ we should not set namespace prefix
 244                      $colonPos = strpos( $justName, "![CDATA[" ) === false ? strpos( $justName, ":" ) : false;
 245  
 246                      $prefix = "";
 247                      if ( $colonPos > 0 )
 248                      {
 249                          $prefix = substr( $justName, 0, $colonPos );
 250                          $justName = substr( $justName, $colonPos + 1, strlen( $justName ) );
 251                      }
 252  
 253  
 254                      // remove trailing / from the name if exists
 255                      if ( $justName[strlen($justName) - 1]  == "/" )
 256                      {
 257                          $justName = substr( $justName, 0, strlen( $justName ) - 1 );
 258                      }
 259  
 260  
 261                      // create the new XML element node
 262                      unset( $subNode );
 263                      $subNode = $domDocument->createElementNode( $justName );
 264  
 265                      // find attributes
 266                      if ( $tagNameEnd > 0 )
 267                      {
 268                          unset( $attributePart );
 269                          $attributePart = substr( $tagName, $tagNameEnd, strlen( $tagName ) );
 270  
 271                          // attributes
 272                          unset( $attr );
 273                          $attr = $this->parseAttributes( $attributePart );
 274  
 275                          if ( $attr != false )
 276                              $subNode->Attributes =& $attr;
 277                      }
 278  
 279                      if ( $prefix != false  )
 280                      {
 281                          $subNode->Prefix = $prefix;
 282  
 283                          // find prefix
 284                          if ( isSet( $this->NamespaceArray[$prefix] ) )
 285                          {
 286                              $subNode->setNamespaceURI( $this->NamespaceArray[$prefix] );
 287                          }
 288                          else
 289                          {
 290                              eZDebug::writeError( "Namespace: $prefix not defined", "eZ xml" );
 291                          }
 292                      }
 293                      else
 294                      {
 295                          // set the default namespace
 296                          if ( isset( $this->NamespaceStack[0] ) )
 297                          {
 298                              $subNode->setNamespaceURI( $this->NamespaceStack[0] );
 299                          }
 300                      }
 301  
 302                      // check for CDATA
 303                      $cdataSection = "";
 304                      $isCDATASection = false;
 305                      $cdataPos = strpos( $xmlDoc, "<![CDATA[", $pos );
 306                      if ( $cdataPos == $pos && $pos > 0)
 307                      {
 308                          $isCDATASection = true;
 309                          $endTagPos = strpos( $xmlDoc, "]]>", $cdataPos );
 310                          if ( $endTagPos == false )
 311                          {
 312                              eZDebug::writeError( "XML parser error: Closing tag \']]>\' for <![CDATA[ not found" , "eZ xml" );
 313                              $endTagPos = strlen($xmlDoc);
 314                          }
 315                          $cdataSection = substr( $xmlDoc, $cdataPos + 9, $endTagPos - ( $cdataPos + 9 ) );
 316  
 317                          // new CDATA node
 318                          $subNode->Name = $subNode->LocalName = "#cdata-section";
 319                          $subNode->Content = $cdataSection;
 320                          $subNode->Type = EZ_NODE_TYPE_CDATASECTION;
 321  
 322                          $pos = $endTagPos;
 323                          $endTagPos += 2;
 324                      }
 325                      else
 326                      {
 327                          // element start tag
 328                          //$subNode->Name = $justName;
 329                          //$subNode->LocalName = $justName;
 330                          //$subNode->Type = EZ_NODE_TYPE_ELEMENT;
 331  
 332                          $domDocument->registerElement( $subNode );
 333                      }
 334  
 335  
 336                      $currentNode->appendChild( $subNode );
 337  
 338  
 339                      // check it it's a oneliner: <tagname /> or a cdata section
 340                      if ( $isCDATASection == false )
 341                          if ( $tagName[strlen($tagName) - 1]  != "/" )
 342                          {
 343                              $TagStack[] = array( "TagName" => $justName, "ParentNodeObject" => &$currentNode );
 344  
 345                              unset( $currentNode );
 346                              $currentNode =& $subNode;
 347                          }
 348                  }
 349              }
 350  
 351              $pos = strpos( $xmlDoc, "<", $pos + 1 );
 352  
 353              if ( $pos == false )
 354              {
 355                  // end of document
 356                  $pos = strlen( $xmlDoc );
 357              }
 358              else
 359              {
 360                  // content tag
 361                  $tagContent = substr( $xmlDoc, $endTagPos + 1, $pos - ( $endTagPos + 1 ) );
 362  
 363                  // Keep the whitespace consistent, parsing back and forward shouldn't change data
 364                  $tagContent = preg_replace( "#[\n]+[\s]*$#", "", $tagContent, 1 );
 365  
 366                  if ( ( $params["TrimWhiteSpace"] == true and trim( $tagContent ) != "" ) or ( $params["TrimWhiteSpace"] == false and $tagContent != "" ) )
 367                  {
 368                      // convert special chars
 369                      if ( $params["ConvertSpecialChars"] == true )
 370                      {
 371                          $tagContent = str_replace("&gt;", ">", $tagContent );
 372                          $tagContent = str_replace("&lt;", "<", $tagContent );
 373                          $tagContent = str_replace("&apos;", "'", $tagContent );
 374                          $tagContent = str_replace("&quot;", '"', $tagContent );
 375                          $tagContent = str_replace("&amp;", "&", $tagContent );
 376                      }
 377  
 378                      unset( $subNode );
 379                      $subNode = $domDocument->createTextNode( $tagContent );
 380  
 381                      $domDocument->registerElement( $subNode );
 382                      $currentNode->appendChild( $subNode );
 383                  }
 384              }
 385          }
 386  
 387          return $domDocument;
 388      }
 389  
 390      /*!
 391        \static
 392        \private
 393      */
 394      function stripComments( &$str )
 395      {
 396          return preg_replace( "#<\!--.*?-->#s", "", $str );
 397      }
 398  
 399      /*!
 400        \private
 401        Parses the attributes. Returns false if no attributes in the supplied string is found.
 402      */
 403      function parseAttributes( $attributeString )
 404      {
 405          $ret = false;
 406  
 407          preg_match_all( "/([a-zA-Z0-9:_-]+\s*=\s*(\"|').*?(\\2))/i",  $attributeString, $attributeArray );
 408  
 409          foreach ( $attributeArray[0] as $attributePart )
 410          {
 411              if ( trim( $attributePart ) != "" && trim( $attributePart ) != "/" )
 412              {
 413                  $attributeNamespaceURI = false;
 414                  $attributePrefix = false;
 415                  $attributeTmpArray = preg_split ("#\s*(=\s*(\"|'))#", $attributePart );
 416  
 417                  $attributeName = $attributeTmpArray[0];
 418  
 419                  // strip out namespace; nameSpace:Name
 420                  $colonPos = strpos( $attributeName, ":" );
 421  
 422                  if ( $colonPos > 0 )
 423                  {
 424                      $attributePrefix = substr( $attributeName, 0, $colonPos );
 425                      $attributeName = substr( $attributeName, $colonPos + 1, strlen( $attributeName ) );
 426                  }
 427                  else
 428                  {
 429                      $attributePrefix = false;
 430                  }
 431  
 432                  $attributeValue = $attributeTmpArray[1];
 433  
 434                  // remove " from value part
 435                  $attributeValue = substr( $attributeValue, 0, strlen( $attributeValue ) - 1);
 436  
 437                  $attributeValue = str_replace( "&gt;", ">", $attributeValue );
 438                  $attributeValue = str_replace( "&lt;", "<", $attributeValue );
 439                  $attributeValue = str_replace( "&apos;", "'", $attributeValue );
 440                  $attributeValue = str_replace( "&quot;", '"', $attributeValue );
 441                  $attributeValue = str_replace( "&amp;", "&", $attributeValue );
 442  
 443                  // check for namespace definition
 444                  if ( $attributePrefix == "xmlns" )
 445                  {
 446                      $attributeNamespaceURI = $attributeValue;
 447                      $this->NamespaceArray[$attributeName] = $attributeValue;
 448  
 449                      $this->DOMDocument->registerNamespaceAlias( $attributeName, $attributeValue );
 450                  }
 451  
 452                  // check for default namespace definition
 453                  if ( $attributeName == "xmlns" )
 454                  {
 455                      $attributeNamespaceURI = $attributeValue;
 456  
 457                      // change the default namespace
 458                      $this->NamespaceStack[] = $attributeNamespaceURI;
 459                  }
 460  
 461                  unset( $attrNode );
 462                  $attrNode = new eZDOMNode();
 463                  $attrNode->Name = $attributeName;
 464  
 465                  if ( $attributePrefix != false && $attributePrefix != "xmlns" )
 466                  {
 467                      $attrNode->Prefix = $attributePrefix;
 468                      $attrNode->LocalName = $attributeName;
 469  
 470                      // find prefix
 471                      if ( isSet( $this->NamespaceArray["$attributePrefix"] ) )
 472                      {
 473                          $attrNode->NamespaceURI = $this->NamespaceArray["$attributePrefix"];
 474                      }
 475                      else
 476                      {
 477                          eZDebug::writeError( "Namespace: $attributePrefix not found", "eZ xml" );
 478                      }
 479                  }
 480                  else if ( $attributePrefix == "xmlns" )
 481                  {
 482                      $attrNode->LocalName = $attributeName;
 483                      $attrNode->NamespaceURI = $attributeNamespaceURI;
 484                      $attrNode->Prefix = $attributePrefix;
 485                  }
 486                  else
 487                  {
 488                      // check for default namespace definition
 489                      if ( $attributeName == "xmlns" )
 490                      {
 491                          $attrNode->LocalName = $attributeName;
 492                          $attrNode->NamespaceURI = $attributeNamespaceURI;
 493                      }
 494                      else
 495                      {
 496                          $attrNode->NamespaceURI = false;
 497                          $attrNode->LocalName = false;
 498                      }
 499                      $attrNode->Prefix = false;
 500                  }
 501  
 502                  $attrNode->Type = EZ_NODE_TYPE_ATTRIBUTE;
 503                  $attrNode->Content = $attributeValue;
 504  
 505  
 506                  $ret[] = $attrNode;
 507  
 508              }
 509          }
 510          return $ret;
 511      }
 512  
 513      /// Contains the namespaces
 514      var $NamespaceStack = array();
 515  
 516      /// Contains the available namespaces
 517      var $NamespaceArray = array();
 518  
 519      /// Contains the current namespace
 520      var $CurrentNameSpace;
 521  
 522      /// Contains a reference to the DOM document object
 523      var $DOMDocument;
 524  }
 525  
 526  ?>


Généré le : Sat Feb 24 10:30:04 2007 par Balluche grâce à PHPXref 0.7