[ Index ]
 

Code source de eZ Publish 3.9.0

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/kernel/classes/datatypes/ezxmltext/ -> ezxmlinputparser.php (source)

   1  <?php
   2  //
   3  // Definition of eZXMLInputParser class
   4  //
   5  // Created on: <27-Mar-2006 15:28:39 ks>
   6  //
   7  // SOFTWARE NAME: eZ publish
   8  // SOFTWARE RELEASE: 3.9.0
   9  // BUILD VERSION: 17785
  10  // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS
  11  // SOFTWARE LICENSE: GNU General Public License v2.0
  12  // NOTICE: >
  13  //   This program is free software; you can redistribute it and/or
  14  //   modify it under the terms of version 2.0  of the GNU General
  15  //   Public License as published by the Free Software Foundation.
  16  //
  17  //   This program is distributed in the hope that it will be useful,
  18  //   but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  //   GNU General Public License for more details.
  21  //
  22  //   You should have received a copy of version 2.0 of the GNU General
  23  //   Public License along with this program; if not, write to the Free
  24  //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  25  //   MA 02110-1301, USA.
  26  //
  27  
  28  /*
  29      Base class for the input parser.
  30      The goal of the parser is XML/HTML analyzing, fixing and transforming.
  31      The input is processed in 2 passes:
  32      - 1st pass: Parsing input, check for syntax errors, build DOM tree.
  33      - 2nd pass: Walking through DOM tree, checking validity by XML schema,
  34                  calling tag handlers to transform the tree.
  35                  
  36      Both passes are controlled by the arrays described bellow and user handler functions.
  37  
  38  */
  39  
  40  include_once ( "lib/ezxml/classes/ezxml.php" );
  41  
  42  if ( !class_exists( 'eZXMLSchema' ) )
  43      include_once ( 'kernel/classes/datatypes/ezxmltext/ezxmlschema.php' );
  44  
  45  define( 'EZ_XMLINPUTPARSER_SHOW_NO_ERRORS', 0 );
  46  define( 'EZ_XMLINPUTPARSER_SHOW_SCHEMA_ERRORS', 1 );
  47  define( 'EZ_XMLINPUTPARSER_SHOW_ALL_ERRORS', 2 );
  48  
  49  class eZXMLInputParser
  50  {
  51  
  52      /* $InputTags array contains properties of elements that come from the input.
  53      
  54      Each array element describes a tag that comes from the input. Arrays index is
  55      a tag's name. Each element is an array that may contain the following members:
  56      
  57      'name'        - a string representing a new name of the tag,
  58      'nameHandler' - a name of the function that returns new tag name. Function format:
  59                      function &tagNameHandler( $tagName, &$attributes )
  60                      
  61      If no of those elements are defined the original tag's name is used.
  62                      
  63      'noChildren'  - boolean value that determines if this tag could have child tags,
  64                      default value is false.
  65      
  66      Example:
  67      
  68      var $InputTags = array(
  69      
  70          'old-name' => array( 'name' => 'new-name' ),
  71      
  72          'tagname' => array( 'nameHandler' => 'tagNameHandler',
  73                              'noChildren' => true ),
  74                              
  75           ...
  76           
  77           );
  78      */
  79  
  80      var $InputTags = array();
  81  
  82      /*
  83      $OutputTags array contains properties of elements that are produced in the output.
  84      Each array element describes a tag presented in the output. Arrays index is
  85      a tag's name. Each element is an array that may contain the following members:
  86      
  87      'parsingHandler' - "Parsing handler" called at parse pass 1 before processing tag's children.
  88      'initHandler'    - "Init handler" called at pass 2 before proccessing tag's children.
  89      'structHandler'  - "Structure handler" called at pass 2 after proccessing tag's children,
  90                         but before schema validity check. It can be used to implement structure
  91                         transformations.
  92      'publishHandler' - "Publish handler" called at pass 2 after schema validity check, so it is called
  93                         in case the element has it's guaranteed place in the DOM tree.
  94                         
  95      'attributes'     - an array that describes attributes transformations. Array's index is the
  96                         original name of an attribute, and the value is the new name.
  97      
  98      'requiredInputAttributes' - attributes that are required in the input tag. If they are not presented
  99                                  it raises invalid input flag.
 100                         
 101      Example:
 102      
 103      var $OutputTags = array(
 104      
 105          'custom'    => array( 'parsingHandler' => 'parsingHandlerCustom',
 106                                'initHandler' => 'initHandlerCustom',
 107                                'structHandler' => 'structHandlerCustom',
 108                                'publishHandler' => 'publishHandlerCustom',
 109                                'attributes' => array( 'title' => 'name' ) ),
 110                                
 111          ...
 112      );
 113                       
 114      */
 115  
 116      var $OutputTags = array();
 117  
 118      var $Namespaces = array( 'image' => 'http://ez.no/namespaces/ezpublish3/image/',
 119                               'xhtml' => 'http://ez.no/namespaces/ezpublish3/xhtml/',
 120                               'custom' => 'http://ez.no/namespaces/ezpublish3/custom/' );
 121  
 122      /*!
 123      
 124      The constructor.
 125         
 126      \param $validate   If true, parser quits immediately after validity flag (isInputValid)
 127                         set to false and function 'process' returns false.
 128                         
 129                         If false, parser tries to modify and transform the input automatically
 130                         in order to get the valid result. 
 131      */
 132  
 133      function eZXMLInputParser( $validate = false, $errorLevel = EZ_XMLINPUTPARSER_SHOW_NO_ERRORS, $parseLineBreaks = false,
 134                                 $removeDefaultAttrs = false )
 135      {
 136          $this->quitIfInvalid = $validate;
 137          $this->errorLevel = $errorLevel;
 138  
 139          $this->removeDefaultAttrs = $removeDefaultAttrs;
 140          $this->parseLineBreaks = $parseLineBreaks;
 141  
 142          $this->XMLSchema =& eZXMLSchema::instance();
 143          //$this->getClassesList();
 144  
 145          include_once ( 'lib/version.php' );
 146          $this->eZPublishVersion = eZPublishSDK::majorVersion() + eZPublishSDK::minorVersion() * 0.1;
 147  
 148          $ini =& eZINI::instance( 'ezxml.ini' );
 149          if ( $this->eZPublishVersion >= 3.8 )
 150          {
 151              if ( $ini->hasVariable( 'InputSettings', 'TrimSpaces' ) )
 152              {
 153                  $trimSpaces = $ini->variable( 'InputSettings', 'TrimSpaces' );
 154                  $this->TrimSpaces = $trimSpaces == 'true' ? true : false;
 155              }
 156      
 157              if ( $ini->hasVariable( 'InputSettings', 'AllowMultipleSpaces' ) )
 158              {
 159                  $allowMultipleSpaces = $ini->variable( 'InputSettings', 'AllowMultipleSpaces' );
 160                  $this->AllowMultipleSpaces = $allowMultipleSpaces == 'true' ? true : false;
 161              }
 162          }
 163          else
 164          {
 165              $this->TrimSpaces = true;
 166              $this->AllowMultipleSpaces = false;
 167          }
 168  
 169          if ( $this->eZPublishVersion >= 3.9 )
 170          {
 171              if ( $ini->hasVariable( 'InputSettings', 'AllowNumericEntities' ) )
 172              {
 173                  $allowNumericEntities = $ini->variable( 'InputSettings', 'AllowNumericEntities' );
 174                  $this->AllowNumericEntities = $allowNumericEntities == 'true' ? true : false;
 175              }
 176          }
 177          else
 178          {
 179              $this->AllowNumericEntities = false;
 180          }
 181  
 182      }
 183  
 184      function setDOMDocumentClass( $DOMDocumentClass )
 185      {
 186          $this->DOMDocumentClass = $DOMDocumentClass;
 187      }
 188  
 189      function setParseLineBreaks( $value )
 190      {
 191          $this->parseLineBreaks = $value;
 192      }
 193  
 194      function setRemoveDefaultAttrs( $value )
 195      {
 196          $this->removeDefaultAttrs = $value;
 197      }
 198  
 199      /*!
 200          Call this function to process your input
 201      */
 202      function process( $text, $createRootNode = true )
 203      {
 204          $text = str_replace( "\r", '', $text);
 205          $text = str_replace( "\t", ' ', $text);
 206          if ( !$this->parseLineBreaks )
 207          {
 208              $text = str_replace( "\n", '', $text);
 209          }
 210  
 211          if ( $createRootNode )
 212          {
 213              // Creating root section with namespaces definitions
 214              $this->Document = new $this->DOMDocumentClass( '', true );
 215              $mainSection =& $this->Document->createElement( 'section' );
 216              $this->Document->appendChild( $mainSection );
 217              foreach( $this->Namespaces as $prefix => $value )
 218              {
 219                  $mainSection->setAttributeNS( 'http://www.w3.org/2000/xmlns/', 'xmlns:' . $prefix, $value );
 220              }
 221          }
 222  
 223          // Perform pass 1
 224          // Parsing the source string
 225          $this->performPass1( $text );
 226  
 227          if ( $this->quitIfInvalid && !$this->isInputValid )
 228              return false;
 229  
 230          // Perform pass 2
 231          $this->performPass2();
 232  
 233          if ( $this->quitIfInvalid && !$this->isInputValid )
 234              return false;
 235  
 236          return $this->Document;
 237      }
 238  
 239      /*
 240         Pass 1: Parsing the source HTML string.
 241      */
 242  
 243      function performPass1( &$data )
 244      {
 245          $ret = true;
 246          $pos = 0;
 247  
 248          if ( $this->Document->Root )
 249          {
 250              do
 251              {
 252                  $this->parseTag( $data, $pos, $this->Document->Root );
 253                  if ( $this->quitIfInvalid && !$this->isInputValid )
 254                  {
 255                      $ret = false;
 256                      break;
 257                  }
 258  
 259              }
 260              while( $pos < strlen( $data ) );
 261          }
 262          else
 263          {
 264              $tmp = null;
 265              $this->parseTag( $data, $pos, $tmp );
 266              if ( $this->quitIfInvalid && !$this->isInputValid )
 267              {
 268                  $ret = false;
 269              }
 270          }
 271          return $ret;
 272      }
 273  
 274      // The main recursive function for pass 1
 275  
 276      function parseTag( &$data, &$pos, &$parent )
 277      {
 278          // Find tag, determine it's type, name and attributes.
 279          $initialPos = $pos;
 280  
 281          /*if ( $this->trimSpaces )
 282          {
 283              while( $pos < strlen( $data ) && $data[$pos] == ' ' ) $pos++;
 284          }*/
 285  
 286          if ( $pos >= strlen( $data ) )
 287          {
 288              return true;
 289          }
 290          $tagBeginPos = strpos( $data, '<', $pos );
 291  
 292          if ( $this->parseLineBreaks )
 293          {
 294              // Regard line break as a start tag position
 295              $lineBreakPos = strpos( $data, "\n", $pos );
 296              if ( $lineBreakPos !== false )
 297              {
 298                  if ( $tagBeginPos === false )
 299                      $tagBeginPos = $lineBreakPos;
 300                  else
 301                      $tagBeginPos = min( $tagBeginPos, $lineBreakPos );
 302              }
 303          }
 304  
 305          $tagName = '';
 306          $attributes = null;
 307          // If it doesn't begin with '<' then its a text node.
 308          if ( $tagBeginPos != $pos || $tagBeginPos === false )
 309          {
 310              $pos = $initialPos;
 311              $tagName = $newTagName = '#text';
 312              $noChildren = true;
 313  
 314              if ( !$tagBeginPos )
 315                  $tagBeginPos = strlen( $data );
 316  
 317              $textContent = substr( $data, $pos, $tagBeginPos - $pos );
 318  
 319              $textContent = $this->washText( $textContent );
 320  
 321              $pos = $tagBeginPos;
 322              if ( $textContent === '' )
 323                  return false;
 324          }
 325          // Process closing tag.
 326          elseif ( $data[$tagBeginPos] == '<' && $tagBeginPos + 1 < strlen( $data ) &&
 327                   $data[$tagBeginPos + 1] == '/' )
 328          {
 329              $tagEndPos = strpos( $data, '>', $tagBeginPos + 1 );
 330              if ( $tagEndPos === false )
 331              {
 332                  $pos = $tagBeginPos + 1;
 333  
 334                  $this->isInputValid = false;
 335                  if ( $this->errorLevel >= 2 )
 336                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag' );
 337                  return false;
 338              }
 339  
 340              $pos = $tagEndPos + 1;
 341              $closedTagName = strtolower( trim( substr( $data, $tagBeginPos + 2, $tagEndPos - $tagBeginPos - 2 ) ) );
 342  
 343              // Find matching tag in ParentStack array
 344              $firstLoop = true;
 345              for( $i = count( $this->ParentStack ) - 1; $i >= 0; $i-- )
 346              {
 347                  $parentNames = $this->ParentStack[$i];
 348                  if ( $parentNames[0] == $closedTagName )
 349                  {
 350                      array_pop( $this->ParentStack );
 351                      if ( !$firstLoop )
 352                      {
 353                          $pos = $tagBeginPos;
 354                          return true;
 355                      }
 356                      // If newTagName was '' we don't break children loop
 357                      elseif ( $parentNames[1] !== '' )
 358                          return true;
 359                      else
 360                          return false;
 361                  }
 362                  $firstLoop = false;
 363              }
 364  
 365              $this->isInputValid = false;
 366              if ( $this->errorLevel >= 2 )
 367                  $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag : &lt;/%1&gt;.', false, array( $closedTagName ) );
 368  
 369              return false;
 370          }
 371          // Insert <br/> instead of linebreaks
 372          elseif ( $this->parseLineBreaks && $data[$tagBeginPos] == "\n" )
 373          {
 374              $newTagName = 'br';
 375              $noChildren = true;
 376              $pos = $tagBeginPos + 1;
 377          }
 378          //  Regular tag: get tag's name and attributes.
 379          else
 380          {
 381              $tagEndPos = strpos( $data, '>', $tagBeginPos );
 382              if ( $tagEndPos === false )
 383              {
 384                  $pos = $tagBeginPos + 1;
 385  
 386                  $this->isInputValid = false;
 387                  if ( $this->errorLevel >= 2 )
 388                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong opening tag' );
 389                  return false;
 390              }
 391  
 392              $pos = $tagEndPos + 1;
 393              $tagString = substr( $data, $tagBeginPos + 1, $tagEndPos - $tagBeginPos - 1 );
 394              // Check for final backslash
 395              $noChildren = substr( $tagString, -1, 1 ) == '/' ? true : false;
 396              // Remove final backslash and spaces
 397              $tagString = preg_replace( "/\s*\/$/", "", $tagString );
 398  
 399              $firstSpacePos = strpos( $tagString, ' ' );
 400              if ( $firstSpacePos === false )
 401              {
 402                  $tagName = strtolower( trim( $tagString ) );
 403                  $attributeString = '';
 404              }
 405              else
 406              {
 407                  $tagName = strtolower( substr( $tagString, 0, $firstSpacePos ) );
 408                  $attributeString = substr( $tagString, $firstSpacePos + 1 );
 409                  $attributeString = trim( $attributeString );
 410                  // Parse attribute string
 411                  if ( $attributeString )
 412                      $attributes = $this->parseAttributes( $attributeString );
 413              }
 414  
 415              // Determine tag's name
 416              if ( isset( $this->InputTags[$tagName] ) )
 417              {
 418                  $thisInputTag =& $this->InputTags[$tagName];
 419  
 420                  if ( isset( $thisInputTag['name'] ) )
 421                      $newTagName = $thisInputTag['name'];
 422                  else
 423                      $newTagName =& $this->callInputHandler( 'nameHandler', $tagName, $attributes );
 424              }
 425              else
 426              {
 427                  if ( $this->XMLSchema->exists( $tagName ) )
 428                  {
 429                      $newTagName = $tagName;
 430                  }
 431                  else
 432                  {
 433                      $this->isInputValid = false;
 434                      if ( $this->errorLevel >= 2 )
 435                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Unknown tag: &lt;%1&gt;.", false, array( $tagName ) );
 436  
 437                      return false;
 438                  }
 439              }
 440  
 441              // Check 'noChildren' property
 442              if ( isset( $thisInputTag['noChildren'] ) )
 443                  $noChildren = true;
 444  
 445              $thisOutputTag =& $this->OutputTags[$newTagName];
 446  
 447              // Implementation of 'autoCloseOn' rule ( Handling of unclosed tags, ex.: <p>, <li> )
 448              if ( isset( $thisOutputTag['autoCloseOn'] ) &&
 449                   $parent &&
 450                   $parent->parentNode &&
 451                   in_array( $parent->nodeName, $thisOutputTag['autoCloseOn'] ) )
 452              {
 453                  // Wrong nesting: auto-close parent and try to re-parse this tag at higher level
 454                  array_pop( $this->ParentStack );
 455                  $pos = $tagBeginPos;
 456                  return true;
 457              }
 458  
 459              // Append to parent stack
 460              if ( !$noChildren && $newTagName !== false )
 461              {
 462                  $this->ParentStack[] = array( $tagName, $newTagName, $attributeString );
 463              }
 464  
 465              if ( !$newTagName )
 466              {
 467                  if ( $newTagName === false )
 468                  {
 469                      $this->isInputValid = false;
 470                      if ( $this->errorLevel >= 2 )
 471                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Can't convert tag's name: &lt;%1&gt;.", false, array( $tagName ) );
 472                  }
 473                  return false;
 474                  // TODO: return it before and don't append to ParentStack?
 475                  // (need to skip processing closing tag on empty tagname)
 476                  // LATER: no.. this is not very good for data consistance
 477              }
 478  
 479              // wordmatch.ini support
 480              if ( $attributeString )
 481              {
 482                  $this->wordMatchSupport( $newTagName, $attributes, $attributeString );
 483              }
 484          }
 485  
 486          // Create text or normal node.
 487          if ( $newTagName == '#text' )
 488              $element = $this->Document->createTextNode( $textContent );
 489          else
 490              $element =& $this->Document->createElement( $newTagName );
 491  
 492          if ( $attributes )
 493          {
 494              $this->setAttributes( $element, $attributes );
 495          }
 496  
 497          // Append element as a child or set it as root if there is no parent.
 498          if ( $parent )
 499              $parent->appendChild( $element );
 500          else
 501              $this->Document->appendChild( $element );
 502          // php5 TODO : $this->Document->documentElement->appendChild( $element );
 503  
 504          $params = array();
 505          $params[] =& $data;
 506          $params[] =& $pos;
 507          $params[] =& $tagBeginPos;
 508          $result =& $this->callOutputHandler( 'parsingHandler', $element, $params );
 509  
 510          if ( $result === false )
 511          {
 512              // This tag is already parsed in handler
 513              if ( !$noChildren )
 514                  array_pop( $this->ParentStack );
 515              return false;
 516          }
 517  
 518          if ( $this->quitIfInvalid && !$this->isInputValid )
 519              return false;
 520  
 521          // Process children
 522          if ( !$noChildren )
 523          {
 524              do
 525              {
 526                  $parseResult = $this->parseTag( $data, $pos, $element );
 527  
 528                  if ( $this->quitIfInvalid && !$this->isInputValid )
 529                      return false;
 530              }
 531              while( $parseResult !== true );
 532          }
 533  
 534          return false;
 535      }
 536  
 537      /*
 538          Helper functions for pass 1
 539      */
 540  
 541      function parseAttributes( $attributeString )
 542      {
 543          // Convert single quotes to double quotes
 544          $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\='(.*?)'/e", "' \\1'.'=\"'.'\\2'.'\"'", ' ' . $attributeString );
 545      
 546          // Convert no quotes to double quotes and remove extra spaces
 547          $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\= *([^\s'\"]+)/e", "' \\1'.'=\"'.'\\2'.'\" '", $attributeString );
 548      
 549          // Split by quotes followed by spaces
 550          $attributeArray = preg_split( "#(?<=\") +#", $attributeString );
 551      
 552          $attributes = array();
 553          foreach( $attributeArray as $attrStr )
 554          {
 555              if ( !$attrStr || strlen( $attrStr ) < 4 )
 556                  continue;
 557      
 558              list( $attrName, $attrValue ) = split( '="', $attrStr );
 559      
 560              $attrName = strtolower( trim( $attrName ) );
 561              if ( !$attrName )
 562                  continue;
 563      
 564              $attrValue = substr( $attrValue, 0, -1 );
 565              if ( $attrValue === '' || $attrValue === false )
 566                  continue;
 567      
 568              $attributes[$attrName] = $attrValue;
 569          }
 570      
 571          return $attributes;
 572      }
 573  
 574      function setAttributes( &$element, $attributes )
 575      {
 576          $thisOutputTag =& $this->OutputTags[$element->nodeName];
 577  
 578          foreach( $attributes as $key => $value )
 579          {
 580              // Convert attribute names
 581              if ( isset( $thisOutputTag['attributes'] ) &&
 582                   isset( $thisOutputTag['attributes'][$key] ) )
 583              {
 584                  $qualifiedName = $thisOutputTag['attributes'][$key];
 585              }
 586              else
 587                  $qualifiedName = $key;
 588  
 589              // Filter classes
 590              if ( $qualifiedName == 'class' )
 591              {
 592                  $classesList = $this->XMLSchema->getClassesList( $element->nodeName );
 593                  if ( !in_array( $value, $classesList ) )
 594                  {
 595                      $this->isInputValid = false;
 596                      if ( $this->errorLevel >= 2 )
 597                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Class '%1' is not allowed for element &lt;%2&gt; (check content.ini).", false, array( $value, $element->nodeName ) );
 598                      continue;
 599                  }
 600              }
 601  
 602              // Create attribute nodes
 603              if ( $qualifiedName )
 604              {
 605                  if ( strpos( $qualifiedName, ':' ) )
 606                  {
 607                      list( $prefix, $name ) = explode( ':', $qualifiedName );
 608                      if ( isset( $this->Namespaces[$prefix] ) )
 609                      {
 610                          $URI = $this->Namespaces[$prefix];
 611                          $element->setAttributeNS( $URI, $qualifiedName, $value );
 612                      }
 613                      else
 614                          eZDebug::writeWarning( "No namespace defined for prefix '$prefix'.", 'eZXML input parser' );
 615                  }
 616                  else
 617                  {
 618                      $element->setAttribute( $qualifiedName, $value );
 619                  }
 620              }
 621          }
 622  
 623          // Check for required attrs are present
 624          if ( isset( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] ) )
 625          {
 626              foreach( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] as $reqAttrName )
 627              {
 628                  $presented = false;
 629                  foreach( $attributes as $key => $value )
 630                  {
 631                      if ( $key == $reqAttrName )
 632                      {
 633                          $presented = true;
 634                          break;
 635                      }
 636                  }
 637                  if ( !$presented )
 638                  {
 639                      $this->isInputValid = false;
 640                      if ( $this->errorLevel >= 2 )
 641                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Required attribute '%1' is not presented in tag &lt;%2&gt;.",
 642                                                      false, array( $reqAttrName, $element->nodeName ) );
 643                  }
 644              }
 645          }
 646      }
 647  
 648      function washText( $textContent )
 649      {
 650          $textContent = $this->entitiesDecode( $textContent );
 651  
 652          if ( !$this->AllowNumericEntities )
 653              $textContent = $this->convertNumericEntities( $textContent );
 654  
 655          if ( !$this->AllowMultipleSpaces )
 656              $textContent = preg_replace( "/ {2,}/", " ", $textContent );
 657  
 658          return $textContent;
 659      }
 660  
 661      function entitiesDecode( $text )
 662      {
 663          //$text = str_replace( "&amp;", "&", $text );
 664          $text = str_replace( "&#039;", "'", $text );
 665  
 666          $text = str_replace( "&gt;", ">", $text );
 667          $text = str_replace( "&lt;", "<", $text );
 668          $text = str_replace( "&apos;", "'", $text );
 669          $text = str_replace( "&quot;", '"', $text );
 670          $text = str_replace( "&amp;", "&", $text );
 671          $text = str_replace( "&nbsp;", " ", $text );
 672          return $text;
 673      }
 674  
 675      function convertNumericEntities( $text )
 676      {
 677          if ( strlen( $text ) < 4 )
 678          {
 679              return $text;
 680          }
 681          // Convert other HTML entities to the current charset characters.
 682          include_once ( 'lib/ezi18n/classes/eztextcodec.php' );
 683          $codec = eZTextCodec::instance( 'unicode', false );
 684          $pos = 0;
 685          $domString = "";
 686          while ( $pos < strlen( $text ) - 1 )
 687          {
 688              $startPos = $pos;
 689              while( !( $text[$pos] == '&' && $text[$pos + 1] == '#' ) && $pos < strlen( $text ) - 1 )
 690                  $pos++;
 691      
 692              $domString .= substr( $text, $startPos, $pos - $startPos );
 693      
 694              if ( $pos < strlen( $text ) - 1 )
 695              {
 696                  $endPos = strpos( $text, ";", $pos + 2 );
 697                  if ( $endPos === false )
 698                  {
 699                      $convertedText .= '&#';
 700                      $pos += 2;
 701                      continue;
 702                  }
 703      
 704                  $code = substr( $text, $pos + 2, $endPos - ( $pos + 2 ) );
 705                  $char = $codec->convertString( array( $code ) );
 706      
 707                  $pos = $endPos + 1;
 708                  $domString .= $char;
 709              }
 710              else
 711              {
 712                  $domString .= substr( $text, $pos, 2 );
 713              }
 714          }
 715          return $domString;
 716      }
 717  
 718      /*function getClassesList()
 719      {
 720          $ini =& eZINI::instance( 'content.ini' );
 721          foreach( array_keys( $this->OutputTags ) as $tagName )
 722          {
 723              if ( $ini->hasVariable( $tagName, 'AvailableClasses' ) )
 724              {
 725                  $avail = $ini->variable( $tagName, 'AvailableClasses' );
 726                  if ( is_array( $avail ) && count( $avail ) )
 727                      $this->OutputTags[$tagName]['classesList'] = $avail;
 728                  else
 729                      $this->OutputTags[$tagName]['classesList'] = array();
 730              }
 731              else
 732                  $this->OutputTags[$tagName]['classesList'] = array();
 733          }
 734      }*/
 735  
 736      function wordMatchSupport( $newTagName, &$attributes, $attributeString )
 737      {
 738          $ini =& eZINI::instance( 'wordmatch.ini' );
 739          if ( $ini->hasVariable( $newTagName, 'MatchString' ) )
 740          {
 741              $matchArray = $ini->variable( $newTagName, 'MatchString' );
 742              if ( $matchArray )
 743              {
 744                  foreach ( array_keys( $matchArray ) as $key )
 745                  {
 746                      $matchString = $matchArray[$key];
 747                      if (  preg_match( "/$matchString/i", $attributeString ) )
 748                      {
 749                          $attributes['class'] = $key;
 750                          unset( $attributes['style'] );
 751                      }
 752                  }
 753              }
 754          }
 755      }
 756  
 757  
 758      /*
 759          Pass 2: Process the tree, run handlers, rebuild and validate.
 760      */
 761  
 762      function performPass2()
 763      {
 764          $tmp = null;
 765  
 766          //php5 TODO: $this->Document->documentElement;
 767          $this->processSubtree( $this->Document->Root, $tmp );
 768      }
 769  
 770      // main recursive function for pass 2
 771  
 772      function &processSubtree( &$element, &$lastHandlerResult )
 773      {
 774          $ret = null;
 775          $tmp = null;
 776  
 777          //eZDOMNode::writeDebugStr( $element, '$element' );
 778          //eZDOMNode::writeDebugStr( $this->Document->Root, 'root' );
 779  
 780          // Call "Init handler"
 781          $this->callOutputHandler( 'initHandler', $element, $tmp );
 782  
 783          // Process children
 784          if ( $element->hasChildNodes() )
 785          {
 786              // Make another copy of children to save primary structure
 787              // php5 TODO: childNodes->item(), childNodes->length()
 788              $childrenCount = count( $element->Children );
 789              $children = array();
 790              foreach( array_keys( $element->Children ) as $child_key )
 791              {
 792                  $children[] =& $element->Children[$child_key];
 793              }
 794              $lastResult = null;
 795              $newElements = array();
 796              for( $i = 0; $i < $childrenCount; $i++ )
 797              {
 798                  $childReturn =& $this->processSubtree( $children[$i], $lastResult );
 799          if ( isset( $childReturn['result'] ) )
 800                  $lastResult =& $childReturn['result'];
 801                  else
 802                  unset( $lastResult );
 803  
 804              if ( isset( $childReturn['new_elements'] ) )
 805                      $newElements = array_merge( $newElements, $childReturn['new_elements'] );
 806  
 807                  unset( $childReturn );
 808                  if ( $this->quitIfInvalid && !$this->isInputValid )
 809                  {
 810                      return $ret;
 811                  }
 812              }
 813  
 814          // process elements created in children handlers
 815              $this->processNewElements( $newElements );
 816          }
 817  
 818          // Call "Structure handler"
 819          $ret =& $this->callOutputHandler( 'structHandler', $element, $lastHandlerResult );
 820  
 821          // Process by schema and fix tree
 822          if ( !$this->processElementBySchema( $element ) )
 823          {
 824              return $ret;
 825          }
 826  
 827          $tmp = null;
 828          // Call "Publish handler"
 829          $this->callOutputHandler( 'publishHandler', $element, $tmp );
 830  
 831          // Process attributes according to the schema
 832          if( $element->hasAttributes() )
 833          {
 834              if ( !$this->XMLSchema->hasAttributes( $element ) )
 835              {
 836                  $element->removeAttributes();
 837              }
 838              else
 839              {
 840                  $this->processAttributesBySchema( $element );
 841              }
 842          }
 843          return $ret;
 844      }
 845      /*
 846          Helper functions for pass 2
 847      */
 848  
 849      // Check element's schema and fix subtree if needed
 850      function processElementBySchema( &$element, $verbose = true )
 851      {
 852          $parent =& $element->parentNode;
 853          if ( $parent )
 854          {
 855              // If this is a foreign element, remove it
 856              if ( !$this->XMLSchema->exists( $element ) )
 857              {
 858                  if ( $element->nodeName == 'custom' )
 859                  {
 860                      $this->isInputValid = false;
 861                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Custom tag '%1' is not allowed.",
 862                                                  false, array( $element->getAttribute( 'name' ) ) );
 863                  }
 864                  $parent->removeChild( $element );
 865                  return false;
 866              }
 867  
 868              // Delete if children required and no children
 869              if ( ( $this->XMLSchema->childrenRequired( $element ) || $element->getAttribute( 'children_required' ) )
 870                   && !$element->hasChildNodes() )
 871              {
 872                  $this->isInputValid = false;
 873                  if ( $verbose && $this->errorLevel >= 1 )
 874                  {
 875                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "&lt;%1&gt; tag can't be empty.",
 876                                                  false, array( $element->nodeName ) );
 877                  }
 878                  $parent->removeChild( $element );
 879                  return false;
 880              }
 881  
 882              // Check schema and remove wrong elements
 883              $schemaCheckResult = $this->XMLSchema->check( $parent, $element );
 884              if ( !$schemaCheckResult )
 885              {
 886                  if ( $schemaCheckResult === false )
 887                  {
 888                      // Remove indenting spaces
 889                      if ( $element->Type == EZ_XML_NODE_TEXT && !trim( $element->content() ) )
 890                      {
 891                          $parent->removeChild( $element );
 892                          return false;
 893                      }
 894  
 895                      $this->isInputValid = false;
 896                      if ( $verbose && $this->errorLevel >= 1 )
 897                      {
 898                          $elementName = $element->nodeName == '#text' ? $element->nodeName : '&lt;' . $element->nodeName . '&gt;';
 899                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "%1 is not allowed to be a child of &lt;%2&gt;.",
 900                                                      false, array( $elementName, $parent->nodeName ) );
 901                      }
 902                  }
 903                  $this->fixSubtree( $element, $element );
 904                  return false;
 905              }
 906          }
 907          // TODO: break processing of any node that doesn't have parent
 908          //       and is not a root node.
 909          elseif ( $element->nodeName != 'section' )
 910          {
 911              return false;
 912          }
 913          return true;
 914      }
 915  
 916      // Remove only nodes that don't match schema (recursively)
 917      function fixSubtree( &$element, &$mainChild )
 918      {
 919          $parent =& $element->parentNode;
 920          $mainParent =& $mainChild->parentNode;
 921          if ( $element->hasChildNodes() )
 922          {
 923              foreach( array_keys( $element->Children ) as $child_key )
 924              {
 925                  $child =& $element->Children[$child_key];
 926  
 927                  $element->removeChild( $child );
 928                  // php5 TODO: use child returned by insertBefore (php dom manual)
 929                  $mainParent->insertBefore( $child, $mainChild );
 930  
 931                  if ( !$this->XMLSchema->check( $mainParent, $child ) )
 932                      $this->fixSubtree( $child, $mainChild );
 933              }
 934          }
 935          $parent->removeChild( $element );
 936      }
 937  
 938      function processAttributesBySchema( &$element, $verbose = true )
 939      {
 940          // Remove attributes that don't match schema
 941          $schemaAttributes = $this->XMLSchema->attributes( $element );
 942          if ( $this->eZPublishVersion >= 3.9 )
 943          {
 944              $schemaCustomAttributes = $this->XMLSchema->customAttributes( $element );
 945          }
 946  
 947          $attributes = $element->attributes();
 948          foreach( $attributes as $attr )
 949          {
 950              $allowed = false;
 951              $removeAttr = false;
 952  
 953              // php5 TODO: small letters
 954              if ( $attr->Prefix )
 955                  $fullName = $attr->Prefix . ':' . $attr->LocalName;
 956              else
 957                  $fullName = $attr->LocalName;
 958  
 959              if ( $this->eZPublishVersion >= 3.9 )
 960              {
 961                  // check for allowed custom attributes (3.9)
 962                  if ( $attr->Prefix == 'custom' && in_array( $attr->LocalName, $schemaCustomAttributes ) )
 963                  {
 964                      $allowed = true;
 965                  }
 966                  else
 967                  {
 968                      if ( in_array( $fullName, $schemaAttributes ) )
 969                      {
 970                         $allowed = true;
 971                      }
 972                      elseif ( in_array( $fullName, $schemaCustomAttributes ) )
 973                      {
 974                          // add 'custom' prefix if it is not given
 975                          $allowed = true;
 976                          $removeAttr = true;
 977                          $element->setAttributeNS( $this->Namespaces['custom'], 'custom:' . $fullName, $attr->content() );
 978                      }
 979                  }
 980              }
 981              else
 982              {
 983                  if ( $attr->Prefix == 'custom' ||
 984                       in_array( $fullName, $schemaAttributes ) )
 985                  {
 986                      $allowed = true;
 987                  }
 988              }
 989  
 990              if ( !$allowed )
 991              {
 992                  $removeAttr = true;
 993                  $this->isInputValid = false;
 994                  if ( $verbose && $this->errorLevel >= 1 )
 995                  {
 996                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Attribute '%1' is not allowed in &lt;%2&gt; element.",
 997                                                          false, array( $fullName, $element->nodeName ) );
 998                  }
 999              }
1000              elseif ( $this->removeDefaultAttrs ) 
1001              {
1002                  // Remove attributes having default values
1003                  $default = $this->XMLSchema->attrDefaultValue( $element->nodeName, $fullName );
1004                  if ( $attr->Content == $default )
1005                  {
1006                      $removeAttr = true;
1007                  }
1008              }
1009  
1010              if ( $removeAttr )
1011              {
1012                  if ( $attr->Prefix )
1013                      $element->removeAttributeNS( $attr->NamespaceURI, $attr->LocalName );
1014                  else
1015                      $element->removeAttribute( $attr->nodeName );
1016              }
1017          }
1018      }
1019  
1020      function &callInputHandler( $handlerName, $tagName, &$attributes )
1021      {
1022          $result = null;
1023          $thisInputTag =& $this->InputTags[$tagName];
1024          if ( isset( $thisInputTag[$handlerName] ) )
1025          {
1026              if ( is_callable( array( $this, $thisInputTag[$handlerName] ) ) )
1027                  eval( '$result =& $this->' . $thisInputTag[$handlerName] . '( $tagName, $attributes );' );
1028              else
1029                  eZDebug::writeWarning( "'$handlerName' input handler for tag <$tagName> doesn't exist: '" . $thisInputTag[$handlerName] . "'.", 'eZXML input parser' );
1030          }
1031          return $result;
1032      }
1033  
1034      function &callOutputHandler( $handlerName, &$element, &$params )
1035      {
1036          $result = null;
1037          $thisOutputTag =& $this->OutputTags[$element->nodeName];
1038          if ( isset( $thisOutputTag[$handlerName] ) )
1039          {
1040              if ( is_callable( array( $this, $thisOutputTag[$handlerName] ) ) )
1041                  eval( '$result =& $this->' . $thisOutputTag[$handlerName] . '( $element, $params );' );
1042              else
1043                  eZDebug::writeWarning( "'$handlerName' output handler for tag <$element->nodeName> doesn't exist: '" . $thisOutputTag[$handlerName] . "'.", 'eZXML input parser' );
1044          }
1045          return $result;
1046      }
1047  
1048      // Creates new element and adds it to array for further post-processing.
1049      // Use this function if you need to process newly created element (check it by schema
1050      // and call 'structure' and 'publish' handlers)
1051      function &createAndPublishElement( $elementName, &$ret )
1052      {
1053          $element =& $this->Document->createElement( $elementName );
1054          //$element->setAttribute( 'ezparser-new-element', 'true' );
1055  
1056      if ( !isset( $ret['new_elements'] ) )
1057               $ret['new_elements'] = array();
1058  
1059      $ret['new_elements'][] =& $element;
1060          return $element;
1061      }
1062  
1063      function processNewElements( &$createdElements )
1064      {
1065          // Call publish handlers for newly created elements
1066          foreach( array_keys( $createdElements ) as $key )
1067          {
1068              $element =& $createdElements[$key];
1069  
1070              $tmp = null;
1071              // Call "Structure handler"
1072              $this->callOutputHandler( 'structHandler', $element, $tmp );
1073  
1074              if ( !$this->processElementBySchema( $element ) )
1075                  continue;
1076  
1077              $tmp2 = null;
1078              // Call "Publish handler"
1079              $this->callOutputHandler( 'publishHandler', $element, $tmp2 );
1080              //$element->removeAttribute( 'ezparser-new-element' );
1081          }
1082      }
1083  
1084      function getMessages()
1085      {
1086          return $this->Messages;
1087      }
1088  
1089      function isValid()
1090      {
1091          return $this->isInputValid;
1092      }
1093  
1094      var $DOMDocumentClass = 'eZDOMDocument';
1095  
1096      var $XMLSchema;
1097      var $Document;
1098      var $Messages = array();
1099      var $eZPublishVersion;
1100  
1101      var $ParentStack = array();
1102  
1103      var $errorLevel = 0;
1104  
1105      var $isInputValid = true;
1106      var $quitIfInvalid = false;
1107  
1108      // options that depend on settings
1109      var $TrimSpaces = true;
1110      var $AllowMultipleSpaces = false;
1111      var $AllowNumericEntities = false;
1112  
1113      // options that depend on parameters passed
1114      var $parseLineBreaks = false;
1115      var $removeDefaultAttrs = false;
1116  
1117      var $createdElements = array();
1118  }
1119  ?>


Généré le : Sat Feb 24 10:30:04 2007 par Balluche grâce à PHPXref 0.7