[ Index ]
 

Code source de eZ Publish 3.9.0

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/extension/ezdhtml/ezxmltext/common/ -> ezxmlinputparser.php (source)

   1  <?php
   2  //
   3  // Definition of eZXMLInputParser class
   4  //
   5  // Created on: <27-Mar-2006 15:28:39 ks>
   6  //
   7  // SOFTWARE NAME: eZ publish
   8  // SOFTWARE RELEASE: 3.9.0
   9  // BUILD VERSION: 17785
  10  // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS
  11  // SOFTWARE LICENSE: GNU General Public License v2.0
  12  // NOTICE: >
  13  //   This program is free software; you can redistribute it and/or
  14  //   modify it under the terms of version 2.0  of the GNU General
  15  //   Public License as published by the Free Software Foundation.
  16  //
  17  //   This program is distributed in the hope that it will be useful,
  18  //   but WITHOUT ANY WARRANTY; without even the implied warranty of
  19  //   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  20  //   GNU General Public License for more details.
  21  //
  22  //   You should have received a copy of version 2.0 of the GNU General
  23  //   Public License along with this program; if not, write to the Free
  24  //   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
  25  //   MA 02110-1301, USA.
  26  //
  27  
  28  /*
  29      Base class for the input parser.
  30      The goal of the parser is XML/HTML analyzing, fixing and transforming.
  31      The input is processed in 2 passes:
  32      - 1st pass: Parsing input, check for syntax errors, build DOM tree.
  33      - 2nd pass: Walking through DOM tree, checking validity by XML schema,
  34                  calling tag handlers to transform the tree.
  35                  
  36      Both passes are controlled by the arrays described bellow and user handler functions.
  37  
  38  */
  39  
  40  include_once ( "lib/ezxml/classes/ezxml.php" );
  41  
  42  if ( !class_exists( 'eZXMLSchema' ) )
  43      include_once ( 'kernel/classes/datatypes/ezxmltext/ezxmlschema.php' );
  44  
  45  define( 'EZ_XMLINPUTPARSER_SHOW_NO_ERRORS', 0 );
  46  define( 'EZ_XMLINPUTPARSER_SHOW_SCHEMA_ERRORS', 1 );
  47  define( 'EZ_XMLINPUTPARSER_SHOW_ALL_ERRORS', 2 );
  48  
  49  class eZXMLInputParser
  50  {
  51  
  52      /* $InputTags array contains properties of elements that come from the input.
  53      
  54      Each array element describes a tag that comes from the input. Arrays index is
  55      a tag's name. Each element is an array that may contain the following members:
  56      
  57      'name'        - a string representing a new name of the tag,
  58      'nameHandler' - a name of the function that returns new tag name. Function format:
  59                      function &tagNameHandler( $tagName, &$attributes )
  60                      
  61      If no of those elements are defined the original tag's name is used.
  62                      
  63      'noChildren'  - boolean value that determines if this tag could have child tags,
  64                      default value is false.
  65      
  66      Example:
  67      
  68      var $InputTags = array(
  69      
  70          'old-name' => array( 'name' => 'new-name' ),
  71      
  72          'tagname' => array( 'nameHandler' => 'tagNameHandler',
  73                              'noChildren' => true ),
  74                              
  75           ...
  76           
  77           );
  78      */
  79  
  80      var $InputTags = array();
  81  
  82      /*
  83      $OutputTags array contains properties of elements that are produced in the output.
  84      Each array element describes a tag presented in the output. Arrays index is
  85      a tag's name. Each element is an array that may contain the following members:
  86      
  87      'parsingHandler' - "Parsing handler" called at parse pass 1 before processing tag's children.
  88      'initHandler'    - "Init handler" called at pass 2 before proccessing tag's children.
  89      'structHandler'  - "Structure handler" called at pass 2 after proccessing tag's children,
  90                         but before schema validity check. It can be used to implement structure
  91                         transformations.
  92      'publishHandler' - "Publish handler" called at pass 2 after schema validity check, so it is called
  93                         in case the element has it's guaranteed place in the DOM tree.
  94                         
  95      'attributes'     - an array that describes attributes transformations. Array's index is the
  96                         original name of an attribute, and the value is the new name.
  97      
  98      'requiredInputAttributes' - attributes that are required in the input tag. If they are not presented
  99                                  it raises invalid input flag.
 100                         
 101      Example:
 102      
 103      var $OutputTags = array(
 104      
 105          'custom'    => array( 'parsingHandler' => 'parsingHandlerCustom',
 106                                'initHandler' => 'initHandlerCustom',
 107                                'structHandler' => 'structHandlerCustom',
 108                                'publishHandler' => 'publishHandlerCustom',
 109                                'attributes' => array( 'title' => 'name' ) ),
 110                                
 111          ...
 112      );
 113                       
 114      */
 115  
 116      var $OutputTags = array();
 117  
 118      var $Namespaces = array( 'image' => 'http://ez.no/namespaces/ezpublish3/image/',
 119                               'xhtml' => 'http://ez.no/namespaces/ezpublish3/xhtml/',
 120                               'custom' => 'http://ez.no/namespaces/ezpublish3/custom/' );
 121  
 122      /*!
 123      
 124      The constructor.
 125         
 126      \param $validate   If true, parser quits immediately after validity flag (isInputValid)
 127                         set to false and function 'process' returns false.
 128                         
 129                         If false, parser tries to modify and transform the input automatically
 130                         in order to get the valid result. 
 131      */
 132  
 133      function eZXMLInputParser( $validate = false, $errorLevel = EZ_XMLINPUTPARSER_SHOW_NO_ERRORS, $parseLineBreaks = false,
 134                                 $removeDefaultAttrs = false )
 135      {
 136          $this->quitIfInvalid = $validate;
 137          $this->errorLevel = $errorLevel;
 138  
 139          $this->removeDefaultAttrs = $removeDefaultAttrs;
 140          $this->parseLineBreaks = $parseLineBreaks;
 141  
 142          $this->XMLSchema =& eZXMLSchema::instance();
 143          //$this->getClassesList();
 144  
 145          include_once ( 'lib/version.php' );
 146          $this->eZPublishVersion = eZPublishSDK::majorVersion() + eZPublishSDK::minorVersion() * 0.1;
 147  
 148          $ini =& eZINI::instance( 'ezxml.ini' );
 149          if ( $this->eZPublishVersion >= 3.8 )
 150          {
 151              if ( $ini->hasVariable( 'InputSettings', 'TrimSpaces' ) )
 152              {
 153                  $trimSpaces = $ini->variable( 'InputSettings', 'TrimSpaces' );
 154                  $this->TrimSpaces = $trimSpaces == 'true' ? true : false;
 155              }
 156      
 157              if ( $ini->hasVariable( 'InputSettings', 'AllowMultipleSpaces' ) )
 158              {
 159                  $allowMultipleSpaces = $ini->variable( 'InputSettings', 'AllowMultipleSpaces' );
 160                  $this->AllowMultipleSpaces = $allowMultipleSpaces == 'true' ? true : false;
 161              }
 162          }
 163          else
 164          {
 165              $this->TrimSpaces = true;
 166              $this->AllowMultipleSpaces = false;
 167          }
 168  
 169          if ( $this->eZPublishVersion >= 3.9 )
 170          {
 171              if ( $ini->hasVariable( 'InputSettings', 'AllowNumericEntities' ) )
 172              {
 173                  $allowNumericEntities = $ini->variable( 'InputSettings', 'AllowNumericEntities' );
 174                  $this->AllowNumericEntities = $allowNumericEntities == 'true' ? true : false;
 175              }
 176          }
 177          else
 178          {
 179              $this->AllowNumericEntities = false;
 180          }
 181  
 182      }
 183  
 184      function setDOMDocumentClass( $DOMDocumentClass )
 185      {
 186          $this->DOMDocumentClass = $DOMDocumentClass;
 187      }
 188  
 189      function setParseLineBreaks( $value )
 190      {
 191          $this->parseLineBreaks = $value;
 192      }
 193  
 194      function setRemoveDefaultAttrs( $value )
 195      {
 196          $this->removeDefaultAttrs = $value;
 197      }
 198  
 199      /*!
 200          Call this function to process your input
 201      */
 202      function process( $text, $createRootNode = true )
 203      {
 204          $text = str_replace( "\r", '', $text);
 205          $text = str_replace( "\t", ' ', $text);
 206          if ( !$this->parseLineBreaks )
 207          {
 208              $text = str_replace( "\n", '', $text);
 209          }
 210  
 211          if ( $createRootNode )
 212          {
 213              // Creating root section with namespaces definitions
 214              $this->Document = new $this->DOMDocumentClass( '', true );
 215              $mainSection =& $this->Document->createElement( 'section' );
 216              $this->Document->appendChild( $mainSection );
 217              foreach( $this->Namespaces as $prefix => $value )
 218              {
 219                  $mainSection->setAttributeNS( 'http://www.w3.org/2000/xmlns/', 'xmlns:' . $prefix, $value );
 220              }
 221          }
 222  
 223          // Perform pass 1
 224          // Parsing the source string
 225          $this->performPass1( $text );
 226  
 227          if ( $this->quitIfInvalid && !$this->isInputValid )
 228              return false;
 229  
 230          // Perform pass 2
 231          $this->performPass2();
 232  
 233          if ( $this->quitIfInvalid && !$this->isInputValid )
 234              return false;
 235  
 236          return $this->Document;
 237      }
 238  
 239      /*
 240         Pass 1: Parsing the source HTML string.
 241      */
 242  
 243      function performPass1( &$data )
 244      {
 245          $ret = true;
 246          $pos = 0;
 247  
 248          if ( $this->Document->Root )
 249          {
 250              do
 251              {
 252                  $this->parseTag( $data, $pos, $this->Document->Root );
 253                  if ( $this->quitIfInvalid && !$this->isInputValid )
 254                  {
 255                      $ret = false;
 256                      break;
 257                  }
 258  
 259              }
 260              while( $pos < strlen( $data ) );
 261          }
 262          else
 263          {
 264              $tmp = null;
 265              $this->parseTag( $data, $pos, $tmp );
 266              if ( $this->quitIfInvalid && !$this->isInputValid )
 267              {
 268                  $ret = false;
 269                  break;
 270              }
 271          }
 272          return $ret;
 273      }
 274  
 275      // The main recursive function for pass 1
 276  
 277      function parseTag( &$data, &$pos, &$parent )
 278      {
 279          // Find tag, determine it's type, name and attributes.
 280          $initialPos = $pos;
 281  
 282          /*if ( $this->trimSpaces )
 283          {
 284              while( $pos < strlen( $data ) && $data[$pos] == ' ' ) $pos++;
 285          }*/
 286  
 287          if ( $pos >= strlen( $data ) )
 288          {
 289              return true;
 290          }
 291          $tagBeginPos = strpos( $data, '<', $pos );
 292  
 293          if ( $this->parseLineBreaks )
 294          {
 295              // Regard line break as a start tag position
 296              $lineBreakPos = strpos( $data, "\n", $pos );
 297              if ( $lineBreakPos !== false )
 298              {
 299                  if ( $tagBeginPos === false )
 300                      $tagBeginPos = $lineBreakPos;
 301                  else
 302                      $tagBeginPos = min( $tagBeginPos, $lineBreakPos );
 303              }
 304          }
 305  
 306          $tagName = '';
 307          $attributes = null;
 308          // If it doesn't begin with '<' then its a text node.
 309          if ( $tagBeginPos != $pos || $tagBeginPos === false )
 310          {
 311              $pos = $initialPos;
 312              $tagName = $newTagName = '#text';
 313              $noChildren = true;
 314  
 315              if ( !$tagBeginPos )
 316                  $tagBeginPos = strlen( $data );
 317  
 318              $textContent = substr( $data, $pos, $tagBeginPos - $pos );
 319  
 320              $textContent = $this->washText( $textContent );
 321  
 322              $pos = $tagBeginPos;
 323              if ( $textContent === '' )
 324                  return false;
 325          }
 326          // Process closing tag.
 327          elseif ( $data[$tagBeginPos] == '<' && $tagBeginPos + 1 < strlen( $data ) &&
 328                   $data[$tagBeginPos + 1] == '/' )
 329          {
 330              $tagEndPos = strpos( $data, '>', $tagBeginPos + 1 );
 331              if ( $tagEndPos === false )
 332              {
 333                  $pos = $tagBeginPos + 1;
 334  
 335                  $this->isInputValid = false;
 336                  if ( $this->errorLevel >= 2 )
 337                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag' );
 338                  return false;
 339              }
 340  
 341              $pos = $tagEndPos + 1;
 342              $closedTagName = strtolower( trim( substr( $data, $tagBeginPos + 2, $tagEndPos - $tagBeginPos - 2 ) ) );
 343  
 344              // Find matching tag in ParentStack array
 345              $firstLoop = true;
 346              for( $i = count( $this->ParentStack ) - 1; $i >= 0; $i-- )
 347              {
 348                  $parentNames = $this->ParentStack[$i];
 349                  if ( $parentNames[0] == $closedTagName )
 350                  {
 351                      array_pop( $this->ParentStack );
 352                      if ( !$firstLoop )
 353                      {
 354                          $pos = $tagBeginPos;
 355                          return true;
 356                      }
 357                      // If newTagName was '' we don't break children loop
 358                      elseif ( $parentNames[1] !== '' )
 359                          return true;
 360                      else
 361                          return false;
 362                  }
 363                  $firstLoop = false;
 364              }
 365  
 366              $this->isInputValid = false;
 367              if ( $this->errorLevel >= 2 )
 368                  $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong closing tag : &lt;/%1&gt;.', false, array( $closedTagName ) );
 369  
 370              return false;
 371          }
 372          // Insert <br/> instead of linebreaks
 373          elseif ( $this->parseLineBreaks && $data[$tagBeginPos] == "\n" )
 374          {
 375              $newTagName = 'br';
 376              $noChildren = true;
 377              $pos = $tagBeginPos + 1;
 378          }
 379          //  Regular tag: get tag's name and attributes.
 380          else
 381          {
 382              $tagEndPos = strpos( $data, '>', $tagBeginPos );
 383              if ( $tagEndPos === false )
 384              {
 385                  $pos = $tagBeginPos + 1;
 386  
 387                  $this->isInputValid = false;
 388                  if ( $this->errorLevel >= 2 )
 389                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', 'Wrong opening tag' );
 390                  return false;
 391              }
 392  
 393              $pos = $tagEndPos + 1;
 394              $tagString = substr( $data, $tagBeginPos + 1, $tagEndPos - $tagBeginPos - 1 );
 395              // Check for final backslash
 396              $noChildren = substr( $tagString, -1, 1 ) == '/' ? true : false;
 397              // Remove final backslash and spaces
 398              $tagString = preg_replace( "/\s*\/$/", "", $tagString );
 399  
 400              $firstSpacePos = strpos( $tagString, ' ' );
 401              if ( $firstSpacePos === false )
 402              {
 403                  $tagName = strtolower( trim( $tagString ) );
 404                  $attributeString = '';
 405              }
 406              else
 407              {
 408                  $tagName = strtolower( substr( $tagString, 0, $firstSpacePos ) );
 409                  $attributeString = substr( $tagString, $firstSpacePos + 1 );
 410                  $attributeString = trim( $attributeString );
 411                  // Parse attribute string
 412                  if ( $attributeString )
 413                      $attributes = $this->parseAttributes( $attributeString );
 414              }
 415  
 416              // Determine tag's name
 417              if ( isset( $this->InputTags[$tagName] ) )
 418              {
 419                  $thisInputTag =& $this->InputTags[$tagName];
 420  
 421                  if ( isset( $thisInputTag['name'] ) )
 422                      $newTagName = $thisInputTag['name'];
 423                  else
 424                      $newTagName =& $this->callInputHandler( 'nameHandler', $tagName, $attributes );
 425              }
 426              else
 427              {
 428                  if ( $this->XMLSchema->exists( $tagName ) )
 429                  {
 430                      $newTagName = $tagName;
 431                  }
 432                  else
 433                  {
 434                      $this->isInputValid = false;
 435                      if ( $this->errorLevel >= 2 )
 436                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Unknown tag: &lt;%1&gt;.", false, array( $tagName ) );
 437  
 438                      return false;
 439                  }
 440              }
 441  
 442              // Check 'noChildren' property
 443              if ( isset( $thisInputTag['noChildren'] ) )
 444                  $noChildren = true;
 445  
 446              $thisOutputTag =& $this->OutputTags[$newTagName];
 447  
 448              // Implementation of 'autoCloseOn' rule ( Handling of unclosed tags, ex.: <p>, <li> )
 449              if ( isset( $thisOutputTag['autoCloseOn'] ) &&
 450                   $parent &&
 451                   $parent->parentNode &&
 452                   in_array( $parent->nodeName, $thisOutputTag['autoCloseOn'] ) )
 453              {
 454                  // Wrong nesting: auto-close parent and try to re-parse this tag at higher level
 455                  array_pop( $this->ParentStack );
 456                  $pos = $tagBeginPos;
 457                  return true;
 458              }
 459  
 460              // Append to parent stack
 461              if ( !$noChildren && $newTagName !== false )
 462              {
 463                  $this->ParentStack[] = array( $tagName, $newTagName, $attributeString );
 464              }
 465  
 466              if ( !$newTagName )
 467              {
 468                  if ( $newTagName === false )
 469                  {
 470                      $this->isInputValid = false;
 471                      if ( $this->errorLevel >= 2 )
 472                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Can't convert tag's name: &lt;%1&gt;.", false, array( $tagName ) );
 473                  }
 474                  return false;
 475                  // TODO: return it before and don't append to ParentStack?
 476                  // (need to skip processing closing tag on empty tagname)
 477                  // LATER: no.. this is not very good for data consistance
 478              }
 479  
 480              // wordmatch.ini support
 481              if ( $attributeString )
 482              {
 483                  $this->wordMatchSupport( $newTagName, $attributes, $attributeString );
 484              }
 485          }
 486  
 487          // Create text or normal node.
 488          if ( $newTagName == '#text' )
 489              $element = $this->Document->createTextNode( $textContent );
 490          else
 491              $element =& $this->Document->createElement( $newTagName );
 492  
 493          if ( $attributes )
 494          {
 495              $this->setAttributes( $element, $attributes );
 496          }
 497  
 498          // Append element as a child or set it as root if there is no parent.
 499          if ( $parent )
 500              $parent->appendChild( $element );
 501          else
 502              $this->Document->appendChild( $element );
 503          // php5 TODO : $this->Document->documentElement->appendChild( $element );
 504  
 505          $params = array();
 506          $params[] =& $data;
 507          $params[] =& $pos;
 508          $params[] =& $tagBeginPos;
 509          $result =& $this->callOutputHandler( 'parsingHandler', $element, $params );
 510  
 511          if ( $result === false )
 512          {
 513              // This tag is already parsed in handler
 514              if ( !$noChildren )
 515                  array_pop( $this->ParentStack );
 516              return false;
 517          }
 518  
 519          if ( $this->quitIfInvalid && !$this->isInputValid )
 520              return false;
 521  
 522          // Process children
 523          if ( !$noChildren )
 524          {
 525              do
 526              {
 527                  $parseResult = $this->parseTag( $data, $pos, $element );
 528  
 529                  if ( $this->quitIfInvalid && !$this->isInputValid )
 530                      return false;
 531              }
 532              while( $parseResult !== true );
 533          }
 534  
 535          return false;
 536      }
 537  
 538      /*
 539          Helper functions for pass 1
 540      */
 541  
 542      function parseAttributes( $attributeString )
 543      {
 544          // Convert single quotes to double quotes
 545          $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\='(.*?)'/e", "' \\1'.'=\"'.'\\2'.'\"'", ' ' . $attributeString );
 546      
 547          // Convert no quotes to double quotes and remove extra spaces
 548          $attributeString = preg_replace( "/ +([a-zA-Z0-9:-_#\-]+) *\= *([^\s'\"]+)/e", "' \\1'.'=\"'.'\\2'.'\" '", $attributeString );
 549      
 550          // Split by quotes followed by spaces
 551          $attributeArray = preg_split( "#(?<=\") +#", $attributeString );
 552      
 553          $attributes = array();
 554          foreach( $attributeArray as $attrStr )
 555          {
 556              if ( !$attrStr || strlen( $attrStr ) < 4 )
 557                  continue;
 558      
 559              list( $attrName, $attrValue ) = split( '="', $attrStr );
 560      
 561              $attrName = strtolower( trim( $attrName ) );
 562              if ( !$attrName )
 563                  continue;
 564      
 565              $attrValue = substr( $attrValue, 0, -1 );
 566              if ( $attrValue === '' || $attrValue === false )
 567                  continue;
 568      
 569              $attributes[$attrName] = $attrValue;
 570          }
 571      
 572          return $attributes;
 573      }
 574  
 575      function setAttributes( &$element, $attributes )
 576      {
 577          $thisOutputTag =& $this->OutputTags[$element->nodeName];
 578  
 579          foreach( $attributes as $key => $value )
 580          {
 581              // Convert attribute names
 582              if ( isset( $thisOutputTag['attributes'] ) &&
 583                   isset( $thisOutputTag['attributes'][$key] ) )
 584              {
 585                  $qualifiedName = $thisOutputTag['attributes'][$key];
 586              }
 587              else
 588                  $qualifiedName = $key;
 589  
 590              // Filter classes
 591              if ( $qualifiedName == 'class' )
 592              {
 593                  $classesList = $this->XMLSchema->getClassesList( $element->nodeName );
 594                  if ( !in_array( $value, $classesList ) )
 595                  {
 596                      $this->isInputValid = false;
 597                      if ( $this->errorLevel >= 2 )
 598                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Class '%1' is not allowed for element &lt;%2&gt; (check content.ini).", false, array( $value, $element->nodeName ) );
 599                      continue;
 600                  }
 601              }
 602  
 603              // Create attribute nodes
 604              if ( $qualifiedName )
 605              {
 606                  if ( strpos( $qualifiedName, ':' ) )
 607                  {
 608                      list( $prefix, $name ) = explode( ':', $qualifiedName );
 609                      if ( isset( $this->Namespaces[$prefix] ) )
 610                      {
 611                          $URI = $this->Namespaces[$prefix];
 612                          $element->setAttributeNS( $URI, $qualifiedName, $value );
 613                      }
 614                      else
 615                          eZDebug::writeWarning( "No namespace defined for prefix '$prefix'.", 'eZXML input parser' );
 616                  }
 617                  else
 618                  {
 619                      $element->setAttribute( $qualifiedName, $value );
 620                  }
 621              }
 622          }
 623  
 624          // Check for required attrs are present
 625          if ( isset( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] ) )
 626          {
 627              foreach( $this->OutputTags[$element->nodeName]['requiredInputAttributes'] as $reqAttrName )
 628              {
 629                  $presented = false;
 630                  foreach( $attributes as $key => $value )
 631                  {
 632                      if ( $key == $reqAttrName )
 633                      {
 634                          $presented = true;
 635                          break;
 636                      }
 637                  }
 638                  if ( !$presented )
 639                  {
 640                      $this->isInputValid = false;
 641                      if ( $this->errorLevel >= 2 )
 642                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Required attribute '%1' is not presented in tag &lt;%2&gt;.",
 643                                                      false, array( $reqAttrName, $element->nodeName ) );
 644                  }
 645              }
 646          }
 647      }
 648  
 649      function washText( $textContent )
 650      {
 651          $textContent = $this->entitiesDecode( $textContent );
 652  
 653          if ( !$this->AllowNumericEntities )
 654              $textContent = $this->convertNumericEntities( $textContent );
 655  
 656          if ( !$this->AllowMultipleSpaces )
 657              $textContent = preg_replace( "/ {2,}/", " ", $textContent );
 658  
 659          return $textContent;
 660      }
 661  
 662      function entitiesDecode( $text )
 663      {
 664          //$text = str_replace( "&amp;", "&", $text );
 665          $text = str_replace( "&#039;", "'", $text );
 666  
 667          $text = str_replace( "&gt;", ">", $text );
 668          $text = str_replace( "&lt;", "<", $text );
 669          $text = str_replace( "&apos;", "'", $text );
 670          $text = str_replace( "&quot;", '"', $text );
 671          $text = str_replace( "&amp;", "&", $text );
 672          $text = str_replace( "&nbsp;", " ", $text );
 673          return $text;
 674      }
 675  
 676      function convertNumericEntities( $text )
 677      {
 678          if ( strlen( $text ) < 4 )
 679          {
 680              return $text;
 681          }
 682          // Convert other HTML entities to the current charset characters.
 683          include_once ( 'lib/ezi18n/classes/eztextcodec.php' );
 684          $codec = eZTextCodec::instance( 'unicode', false );
 685          $pos = 0;
 686          $domString = "";
 687          while ( $pos < strlen( $text ) - 1 )
 688          {
 689              $startPos = $pos;
 690              while( !( $text[$pos] == '&' && $text[$pos + 1] == '#' ) && $pos < strlen( $text ) - 1 )
 691                  $pos++;
 692      
 693              $domString .= substr( $text, $startPos, $pos - $startPos );
 694      
 695              if ( $pos < strlen( $text ) - 1 )
 696              {
 697                  $endPos = strpos( $text, ";", $pos + 2 );
 698                  if ( $endPos === false )
 699                  {
 700                      $convertedText .= '&#';
 701                      $pos += 2;
 702                      continue;
 703                  }
 704      
 705                  $code = substr( $text, $pos + 2, $endPos - ( $pos + 2 ) );
 706                  $char = $codec->convertString( array( $code ) );
 707      
 708                  $pos = $endPos + 1;
 709                  $domString .= $char;
 710              }
 711              else
 712              {
 713                  $domString .= substr( $text, $pos, 2 );
 714              }
 715          }
 716          return $domString;
 717      }
 718  
 719      /*function getClassesList()
 720      {
 721          $ini =& eZINI::instance( 'content.ini' );
 722          foreach( array_keys( $this->OutputTags ) as $tagName )
 723          {
 724              if ( $ini->hasVariable( $tagName, 'AvailableClasses' ) )
 725              {
 726                  $avail = $ini->variable( $tagName, 'AvailableClasses' );
 727                  if ( is_array( $avail ) && count( $avail ) )
 728                      $this->OutputTags[$tagName]['classesList'] = $avail;
 729                  else
 730                      $this->OutputTags[$tagName]['classesList'] = array();
 731              }
 732              else
 733                  $this->OutputTags[$tagName]['classesList'] = array();
 734          }
 735      }*/
 736  
 737      function wordMatchSupport( $newTagName, &$attributes, $attributeString )
 738      {
 739          $ini =& eZINI::instance( 'wordmatch.ini' );
 740          if ( $ini->hasVariable( $newTagName, 'MatchString' ) )
 741          {
 742              $matchArray = $ini->variable( $newTagName, 'MatchString' );
 743              if ( $matchArray )
 744              {
 745                  foreach ( array_keys( $matchArray ) as $key )
 746                  {
 747                      $matchString = $matchArray[$key];
 748                      if (  preg_match( "/$matchString/i", $attributeString ) )
 749                      {
 750                          $attributes['class'] = $key;
 751                          unset( $attributes['style'] );
 752                      }
 753                  }
 754              }
 755          }
 756      }
 757  
 758  
 759      /*
 760          Pass 2: Process the tree, run handlers, rebuild and validate.
 761      */
 762  
 763      function performPass2()
 764      {
 765          $tmp = null;
 766  
 767          //php5 TODO: $this->Document->documentElement;
 768          $this->processSubtree( $this->Document->Root, $tmp );
 769      }
 770  
 771      // main recursive function for pass 2
 772  
 773      function &processSubtree( &$element, &$lastHandlerResult )
 774      {
 775          $ret = null;
 776          $tmp = null;
 777  
 778          //eZDOMNode::writeDebugStr( $element, '$element' );
 779          //eZDOMNode::writeDebugStr( $this->Document->Root, 'root' );
 780  
 781          // Call "Init handler"
 782          $this->callOutputHandler( 'initHandler', $element, $tmp );
 783  
 784          // Process children
 785          if ( $element->hasChildNodes() )
 786          {
 787              // Make another copy of children to save primary structure
 788              // php5 TODO: childNodes->item(), childNodes->length()
 789              $childrenCount = count( $element->Children );
 790              $children = array();
 791              foreach( array_keys( $element->Children ) as $child_key )
 792              {
 793                  $children[] =& $element->Children[$child_key];
 794              }
 795              $lastResult = null;
 796              $newElements = array();
 797              for( $i = 0; $i < $childrenCount; $i++ )
 798              {
 799                  $childReturn =& $this->processSubtree( $children[$i], $lastResult );
 800          if ( isset( $childReturn['result'] ) )
 801                  $lastResult =& $childReturn['result'];
 802                  else
 803                  unset( $lastResult );
 804  
 805              if ( isset( $childReturn['new_elements'] ) )
 806                      $newElements = array_merge( $newElements, $childReturn['new_elements'] );
 807  
 808                  unset( $childReturn );
 809                  if ( $this->quitIfInvalid && !$this->isInputValid )
 810                  {
 811                      return $ret;
 812                  }
 813              }
 814  
 815          // process elements created in children handlers
 816              $this->processNewElements( $newElements );
 817          }
 818  
 819          // Call "Structure handler"
 820          $ret =& $this->callOutputHandler( 'structHandler', $element, $lastHandlerResult );
 821  
 822          // Process by schema and fix tree
 823          if ( !$this->processElementBySchema( $element ) )
 824          {
 825              return $ret;
 826          }
 827  
 828          $tmp = null;
 829          // Call "Publish handler"
 830          $this->callOutputHandler( 'publishHandler', $element, $tmp );
 831  
 832          // Process attributes according to the schema
 833          if( $element->hasAttributes() )
 834          {
 835              if ( !$this->XMLSchema->hasAttributes( $element ) )
 836              {
 837                  $element->removeAttributes();
 838              }
 839              else
 840              {
 841                  $this->processAttributesBySchema( $element );
 842              }
 843          }
 844          return $ret;
 845      }
 846      /*
 847          Helper functions for pass 2
 848      */
 849  
 850      // Check element's schema and fix subtree if needed
 851      function processElementBySchema( &$element, $verbose = true )
 852      {
 853          $parent =& $element->parentNode;
 854          if ( $parent )
 855          {
 856              // If this is a foreign element, remove it
 857              if ( !$this->XMLSchema->exists( $element ) )
 858              {
 859                  if ( $element->nodeName == 'custom' )
 860                  {
 861                      $this->isInputValid = false;
 862                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Custom tag '%1' is not allowed.",
 863                                                  false, array( $element->getAttribute( 'name' ) ) );
 864                  }
 865                  $parent->removeChild( $element );
 866                  return false;
 867              }
 868  
 869              // Delete if children required and no children
 870              if ( ( $this->XMLSchema->childrenRequired( $element ) || $element->getAttribute( 'children_required' ) )
 871                   && !$element->hasChildNodes() )
 872              {
 873                  $parent->removeChild( $element );
 874                  return false;
 875              }
 876  
 877              // Check schema and remove wrong elements
 878              $schemaCheckResult = $this->XMLSchema->check( $parent, $element );
 879              if ( !$schemaCheckResult )
 880              {
 881                  if ( $schemaCheckResult === false )
 882                  {
 883                      // Remove indenting spaces
 884                      if ( $element->Type == EZ_XML_NODE_TEXT && !trim( $element->content() ) )
 885                      {
 886                          $parent->removeChild( $element );
 887                          return false;
 888                      }
 889  
 890                      $this->isInputValid = false;
 891                      if ( $verbose && $this->errorLevel >= 1 )
 892                      {
 893                          $elementName = $element->nodeName == '#text' ? $element->nodeName : '&lt;' . $element->nodeName . '&gt;';
 894                          $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "%1 is not allowed to be a child of &lt;%2&gt;.",
 895                                                      false, array( $elementName, $parent->nodeName ) );
 896                      }
 897                  }
 898                  $this->fixSubtree( $element, $element );
 899                  return false;
 900              }
 901          }
 902          // TODO: break processing of any node that doesn't have parent
 903          //       and is not a root node.
 904          elseif ( $element->nodeName != 'section' )
 905          {
 906              return false;
 907          }
 908          return true;
 909      }
 910  
 911      // Remove only nodes that don't match schema (recursively)
 912      function fixSubtree( &$element, &$mainChild )
 913      {
 914          $parent =& $element->parentNode;
 915          $mainParent =& $mainChild->parentNode;
 916          if ( $element->hasChildNodes() )
 917          {
 918              foreach( array_keys( $element->Children ) as $child_key )
 919              {
 920                  $child =& $element->Children[$child_key];
 921  
 922                  $element->removeChild( $child );
 923                  // php5 TODO: use child returned by insertBefore (php dom manual)
 924                  $mainParent->insertBefore( $child, $mainChild );
 925  
 926                  if ( !$this->XMLSchema->check( $mainParent, $child ) )
 927                      $this->fixSubtree( $child, $mainChild );
 928              }
 929          }
 930          $parent->removeChild( $element );
 931      }
 932  
 933      function processAttributesBySchema( &$element, $verbose = true )
 934      {
 935          // Remove attributes that don't match schema
 936          $schemaAttributes = $this->XMLSchema->attributes( $element );
 937          if ( $this->eZPublishVersion >= 3.9 )
 938          {
 939              $schemaCustomAttributes = $this->XMLSchema->customAttributes( $element );
 940          }
 941  
 942          $attributes = $element->attributes();
 943          foreach( $attributes as $attr )
 944          {
 945              $allowed = false;
 946              $removeAttr = false;
 947  
 948              // php5 TODO: small letters
 949              if ( $attr->Prefix )
 950                  $fullName = $attr->Prefix . ':' . $attr->LocalName;
 951              else
 952                  $fullName = $attr->LocalName;
 953  
 954              if ( $this->eZPublishVersion >= 3.9 )
 955              {
 956                  // check for allowed custom attributes (3.9)
 957                  if ( $attr->Prefix == 'custom' && in_array( $attr->LocalName, $schemaCustomAttributes ) )
 958                  {
 959                      $allowed = true;
 960                  }
 961                  else
 962                  {
 963                      if ( in_array( $fullName, $schemaAttributes ) )
 964                      {
 965                         $allowed = true;
 966                      }
 967                      elseif ( in_array( $fullName, $schemaCustomAttributes ) )
 968                      {
 969                          // add 'custom' prefix if it is not given
 970                          $allowed = true;
 971                          $removeAttr = true;
 972                          $element->setAttributeNS( $this->Namespaces['custom'], 'custom:' . $fullName, $attr->content() );
 973                      }
 974                  }
 975              }
 976              else
 977              {
 978                  if ( $attr->Prefix == 'custom' ||
 979                       in_array( $fullName, $schemaAttributes ) )
 980                  {
 981                      $allowed = true;
 982                  }
 983              }
 984  
 985              if ( !$allowed )
 986              {
 987                  $removeAttr = true;
 988                  $this->isInputValid = false;
 989                  if ( $verbose && $this->errorLevel >= 1 )
 990                  {
 991                      $this->Messages[] = ezi18n( 'kernel/classes/datatypes/ezxmltext', "Attribute '%1' is not allowed in &lt;%2&gt; element.",
 992                                                          false, array( $fullName, $element->nodeName ) );
 993                  }
 994              }
 995              elseif ( $this->removeDefaultAttrs ) 
 996              {
 997                  // Remove attributes having default values
 998                  $default = $this->XMLSchema->attrDefaultValue( $element->nodeName, $fullName );
 999                  if ( $attr->Content == $default )
1000                  {
1001                      $removeAttr = true;
1002                  }
1003              }
1004  
1005              if ( $removeAttr )
1006              {
1007                  if ( $attr->Prefix )
1008                      $element->removeAttributeNS( $attr->NamespaceURI, $attr->LocalName );
1009                  else
1010                      $element->removeAttribute( $attr->nodeName );
1011              }
1012          }
1013      }
1014  
1015      function &callInputHandler( $handlerName, $tagName, &$attributes )
1016      {
1017          $result = null;
1018          $thisInputTag =& $this->InputTags[$tagName];
1019          if ( isset( $thisInputTag[$handlerName] ) )
1020          {
1021              if ( is_callable( array( $this, $thisInputTag[$handlerName] ) ) )
1022                  eval( '$result =& $this->' . $thisInputTag[$handlerName] . '( $tagName, $attributes );' );
1023              else
1024                  eZDebug::writeWarning( "'$handlerName' input handler for tag <$tagName> doesn't exist: '" . $thisInputTag[$handlerName] . "'.", 'eZXML input parser' );
1025          }
1026          return $result;
1027      }
1028  
1029      function &callOutputHandler( $handlerName, &$element, &$params )
1030      {
1031          $result = null;
1032          $thisOutputTag =& $this->OutputTags[$element->nodeName];
1033          if ( isset( $thisOutputTag[$handlerName] ) )
1034          {
1035              if ( is_callable( array( $this, $thisOutputTag[$handlerName] ) ) )
1036                  eval( '$result =& $this->' . $thisOutputTag[$handlerName] . '( $element, $params );' );
1037              else
1038                  eZDebug::writeWarning( "'$handlerName' output handler for tag <$element->nodeName> doesn't exist: '" . $thisOutputTag[$handlerName] . "'.", 'eZXML input parser' );
1039          }
1040          return $result;
1041      }
1042  
1043      // Creates new element and adds it to array for further post-processing.
1044      // Use this function if you need to process newly created element (check it by schema
1045      // and call 'structure' and 'publish' handlers)
1046      function &createAndPublishElement( $elementName, &$ret )
1047      {
1048          $element =& $this->Document->createElement( $elementName );
1049          //$element->setAttribute( 'ezparser-new-element', 'true' );
1050  
1051      if ( !isset( $ret['new_elements'] ) )
1052               $ret['new_elements'] = array();
1053  
1054      $ret['new_elements'][] =& $element;
1055          return $element;
1056      }
1057  
1058      function processNewElements( &$createdElements )
1059      {
1060          // Call publish handlers for newly created elements
1061          foreach( array_keys( $createdElements ) as $key )
1062          {
1063              $element =& $createdElements[$key];
1064  
1065              $tmp = null;
1066              // Call "Structure handler"
1067              $this->callOutputHandler( 'structHandler', $element, $tmp );
1068  
1069              if ( !$this->processElementBySchema( $element ) )
1070                  continue;
1071  
1072              $tmp2 = null;
1073              // Call "Publish handler"
1074              $this->callOutputHandler( 'publishHandler', $element, $tmp2 );
1075              //$element->removeAttribute( 'ezparser-new-element' );
1076          }
1077      }
1078  
1079      function getMessages()
1080      {
1081          return $this->Messages;
1082      }
1083  
1084      function isValid()
1085      {
1086          return $this->isInputValid;
1087      }
1088  
1089      var $DOMDocumentClass = 'eZDOMDocument';
1090  
1091      var $XMLSchema;
1092      var $Document;
1093      var $Messages = array();
1094      var $eZPublishVersion;
1095  
1096      var $ParentStack = array();
1097  
1098      var $errorLevel = 0;
1099  
1100      var $isInputValid = true;
1101      var $quitIfInvalid = false;
1102  
1103      // options that depend on settings
1104      var $TrimSpaces = true;
1105      var $AllowMultipleSpaces = false;
1106      var $AllowNumericEntities = false;
1107  
1108      // options that depend on parameters passed
1109      var $parseLineBreaks = false;
1110      var $removeDefaultAttrs = false;
1111  
1112      var $createdElements = array();
1113  }
1114  ?>


Généré le : Sat Feb 24 10:30:04 2007 par Balluche grâce à PHPXref 0.7