[ Index ]
 

Code source de b2evolution 2.1.0-beta

Accédez au Source d'autres logiciels libres

Classes | Fonctions | Variables | Constantes | Tables

title

Body

[fermer]

/blogs/inc/xhtml_validator/ -> _xhtml_validator.class.php (source)

   1  <?php
   2  /**

   3   * This file implements the SafeHtmlChecker class.

   4   *

   5   * Checks HTML against a subset of elements to ensure safety and XHTML validation.

   6   *

   7   * This file is part of the b2evolution/evocms project - {@link http://b2evolution.net/}.

   8   * See also {@link http://sourceforge.net/projects/evocms/}.

   9   *

  10   * @copyright (c)2003-2007 by Francois PLANQUE - {@link http://fplanque.net/}.

  11   * Parts of this file are copyright (c)2003 by Nobuo SAKIYAMA - {@link http://www.sakichan.org/}

  12   * Parts of this file are copyright (c)2004-2005 by Daniel HAHLER - {@link http://thequod.de/contact}.

  13   *

  14   * @license http://b2evolution.net/about/license.html GNU General Public License (GPL)

  15   *

  16   * {@internal Open Source relicensing agreement:

  17   * Daniel HAHLER grants Francois PLANQUE the right to license

  18   * Daniel HAHLER's contributions to this file and the b2evolution project

  19   * under any OSI approved OSS license (http://www.opensource.org/licenses/).

  20   * }}

  21   *

  22   * {@internal Origin:

  23   * This file was inspired by Simon Willison's SafeHtmlChecker released in

  24   * the public domain on 23rd Feb 2003.

  25   * {@link http://simon.incutio.com/code/php/SafeHtmlChecker.class.php.txt}

  26   * }}

  27   *

  28   * @package evocore

  29   *

  30   * {@internal Below is a list of authors who have contributed to design/coding of this file: }}

  31   * @author blueyed: Daniel HAHLER.

  32   * @author fplanque: Francois PLANQUE.

  33   * @author sakichan: Nobuo SAKIYAMA.

  34   * @author Simon Willison.

  35   *

  36   * @version $Id: _xhtml_validator.class.php,v 1.2 2007/09/13 02:37:22 fplanque Exp $

  37   */
  38  if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' );
  39  
  40  /**

  41   * SafeHtmlChecker

  42   *

  43   * checks HTML against a subset of elements to ensure safety and XHTML validation.

  44   *

  45   * @package evocore

  46   */
  47  class SafeHtmlChecker
  48  {
  49      var $tags;      // Array showing allowed attributes for tags

  50      var $tagattrs;  // Array showing URI attributes

  51      var $uri_attrs;
  52      var $allowed_uri_scheme;
  53  
  54      // Internal variables

  55      var $parser;
  56      var $stack = array();
  57      var $last_checked_pos;
  58      var $error;
  59  
  60      /**

  61       * Constructor

  62       *

  63       * {@internal This gets tested in _libs.misc.simpletest.php}}

  64       *

  65       * @param array

  66       * @param array

  67       * @param array

  68       * @param array

  69       * @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect)

  70       */
  71  	function SafeHtmlChecker( & $allowed_tags, & $allowed_attributes, & $uri_attrs, & $allowed_uri_scheme, $encoding = '' )
  72      {
  73          $this->tags = & $allowed_tags;
  74          $this->tagattrs = & $allowed_attributes;
  75          $this->uri_attrs = & $uri_attrs;
  76          $this->allowed_uri_scheme = & $allowed_uri_scheme;
  77  
  78          $encoding = strtoupper($encoding); // we might get 'iso-8859-1' for example

  79          $this->encoding = $encoding;
  80          if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) )
  81          { // passed encoding not supported by xml_parser_create()
  82              $this->xml_parser_encoding = ''; // auto-detect (in PHP4, in PHP5 anyway)

  83          }
  84          else
  85          {
  86              $this->xml_parser_encoding = $this->encoding;
  87          }
  88          $this->parser = xml_parser_create( $this->xml_parser_encoding );
  89  
  90          $this->last_checked_pos = 0;
  91          $this->error = false;
  92  
  93          // Creates the parser

  94          xml_set_object( $this->parser, $this);
  95  
  96          // set functions to call when a start or end tag is encountered

  97          xml_set_element_handler($this->parser, 'tag_open', 'tag_close');
  98          // set function to call for the actual data

  99          xml_set_character_data_handler($this->parser, 'cdata');
 100  
 101          xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false);
 102      }
 103  
 104  
 105      /**

 106       * check(-)

 107       */
 108  	function check($xhtml)
 109      {
 110          // Convert encoding:

 111          // TODO: use convert_encoding()

 112          if( empty($this->xml_parser_encoding) || $this->encoding != $this->xml_parser_encoding )
 113          { // we need to convert encoding:
 114              if( function_exists( 'mb_convert_encoding' ) )
 115              { // we can convert encoding to UTF-8
 116                  $this->encoding = 'UTF-8';
 117  
 118                  // Convert XHTML:

 119                  $xhtml = mb_convert_encoding( $xhtml, 'UTF-8' );
 120              }
 121              elseif( ($this->encoding == 'ISO-8859-1' || empty($this->encoding)) && function_exists('utf8_encode') )
 122              {
 123                  $this->encoding = 'UTF-8';
 124  
 125                  $xhtml = utf8_encode( $xhtml );
 126              }
 127          }
 128  
 129          // Open comments or '<![CDATA[' are dangerous

 130          $xhtml = str_replace('<!', '', $xhtml);
 131  
 132          // Convert isolated & chars

 133          $xhtml = preg_replace( '#(\s)&(\s)#', '\\1&amp;\\2', $xhtml );
 134  
 135          $xhtml_head = '<?xml version="1.0"';
 136          if( ! empty($this->encoding) )
 137          {
 138              $xhtml_head .= ' encoding="'.$this->encoding.'"';
 139          }
 140  
 141          $xhtml_head .= '?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"';
 142  
 143          // Include entities:

 144          $xhtml_head .= '[';
 145          // Include latin1 entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent):

 146          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-lat1.ent' );
 147          // Include symbol entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent):

 148          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-symbol.ent' );
 149          // Include special entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent):

 150          $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-special.ent' );
 151          $xhtml_head .= ']>';
 152  
 153          $xhtml = $xhtml_head.'<body>'.$xhtml.'</body>';
 154          unset($xhtml_head);
 155  
 156          if( !xml_parse($this->parser, $xhtml) )
 157          {
 158              $xml_error_code = xml_get_error_code( $this->parser );
 159              $xml_error_string = xml_error_string( $xml_error_code );
 160              switch( $xml_error_code )
 161              {
 162                  case XML_ERROR_TAG_MISMATCH:
 163                      $xml_error_string .= ': <code>'.$this->stack[count($this->stack)-1].'</code>';
 164                      break;
 165              }
 166              $pos = xml_get_current_byte_index($this->parser);
 167              $xml_error_string .= ' near <code>'.htmlspecialchars( substr( $xhtml, $this->last_checked_pos, $pos-$this->last_checked_pos+20 ) ).'</code>';
 168  
 169              $this->html_error( T_('Parser error: ').$xml_error_string );
 170          }
 171      }
 172  
 173      /**

 174       * tag_open(-)

 175       *

 176       * Called when the parser finds an opening tag

 177       */
 178  	function tag_open($parser, $tag, $attrs)
 179      {
 180          global $debug;
 181      
 182          // echo "processing tag: $tag <br />\n";

 183          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 184  
 185          if ($tag == 'body')
 186          {
 187              if( count($this->stack) > 0 )
 188                  $this->html_error( T_('Tag <code>body</code> can only be used once!') );
 189              $this->stack[] = $tag;
 190              return;
 191          }
 192          $previous = $this->stack[count($this->stack)-1];
 193  
 194          // If previous tag is illegal, no point in running tests

 195          if (!in_array($previous, array_keys($this->tags))) {
 196              $this->stack[] = $tag;
 197              return;
 198          }
 199          // Is tag a legal tag?

 200          if (!in_array($tag, array_keys($this->tags))) {
 201              $this->html_error( T_('Illegal tag'). ": <code>$tag</code>" );
 202              $this->stack[] = $tag;
 203              return;
 204          }
 205          // Is tag allowed in the current context?

 206          if (!in_array($tag, explode(' ', $this->tags[$previous]))) {
 207              if ($previous == 'body') {
 208                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; must occur inside another tag'), '<code>'.$tag.'</code>' ) );
 209              } else {
 210                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; is not allowed within tag &lt;%s&gt;'), '<code>'.$tag.'</code>', '<code>'.$previous.'</code>') );
 211              }
 212          }
 213          // Are tag attributes valid?

 214          foreach( $attrs as $attr => $value )
 215          {
 216              if (!isset($this->tagattrs[$tag]) || !in_array($attr, explode(' ', $this->tagattrs[$tag])))
 217              {
 218                  $this->html_error( sprintf( T_('Tag &lt;%s&gt; may not have attribute %s'), '<code>'.$tag.'</code>', '<code>'.$attr.'</code>' ) );
 219              }
 220              if (in_array($attr, $this->uri_attrs))
 221              { // Must this attribute be checked for URIs
 222                  $matches = array();
 223                  $value = trim($value);
 224                  if( $error = validate_url( $value, $this->allowed_uri_scheme, false, $debug ) )
 225                  {
 226                      $this->html_error( T_('Found invalid URL: ').$error );
 227                  }
 228              }
 229          }
 230          // Set previous, used for checking nesting context rules

 231          $this->stack[] = $tag;
 232      }
 233  
 234      /**

 235       * cdata(-)

 236       */
 237  	function cdata($parser, $cdata)
 238      {
 239          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 240  
 241          // Simply check that the 'previous' tag allows CDATA

 242          $previous = $this->stack[count($this->stack)-1];
 243          // If previous tag is illegal, no point in running test

 244          if (!in_array($previous, array_keys($this->tags))) {
 245              return;
 246          }
 247          if (trim($cdata) != '') {
 248              if (!in_array('#PCDATA', explode(' ', $this->tags[$previous]))) {
 249                  $this->html_error(    sprintf( T_('Tag &lt;%s&gt; may not contain raw character data'), '<code>'.$previous.'</code>' ) );
 250              }
 251          }
 252      }
 253  
 254      /**

 255       * tag_close(-)

 256       */
 257  	function tag_close($parser, $tag)
 258      {
 259          $this->last_checked_pos = xml_get_current_byte_index($this->parser);
 260  
 261          // Move back one up the stack

 262          array_pop($this->stack);
 263      }
 264  
 265  	function html_error( $string )
 266      {
 267          global $Messages;
 268          $this->error = true;
 269          $Messages->add( $string, 'error' );
 270      }
 271  
 272      /**

 273       * isOK(-)

 274       */
 275  	function isOK()
 276      {
 277          return ! $this->error;
 278      }
 279  
 280  }
 281  
 282  
 283  /*

 284   * $Log: _xhtml_validator.class.php,v $

 285   * Revision 1.2  2007/09/13 02:37:22  fplanque

 286   * special cases

 287   *

 288   * Revision 1.1  2007/06/25 11:02:27  fplanque

 289   * MODULES (refactored MVC)

 290   *

 291   * Revision 1.13  2007/04/26 00:11:07  fplanque

 292   * (c) 2007

 293   *

 294   * Revision 1.12  2006/11/27 02:29:53  blueyed

 295   * Committed test changes by accident. Test added for it as an exercise.

 296   *

 297   * Revision 1.11  2006/11/26 02:30:39  fplanque

 298   * doc / todo

 299   *

 300   * Revision 1.10  2006/11/06 22:56:53  blueyed

 301   * Added full(?) XHTML entities support to the html checker

 302   *

 303   * Revision 1.9  2006/11/04 21:44:59  blueyed

 304   * Include latin1 entities to let xml_parse() not choke on those

 305   */
 306  ?>


Généré le : Thu Nov 29 23:58:50 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics