[ Index ] |
|
Code source de b2evolution 2.1.0-beta |
1 <?php 2 /** 3 * This file implements the SafeHtmlChecker class. 4 * 5 * Checks HTML against a subset of elements to ensure safety and XHTML validation. 6 * 7 * This file is part of the b2evolution/evocms project - {@link http://b2evolution.net/}. 8 * See also {@link http://sourceforge.net/projects/evocms/}. 9 * 10 * @copyright (c)2003-2007 by Francois PLANQUE - {@link http://fplanque.net/}. 11 * Parts of this file are copyright (c)2003 by Nobuo SAKIYAMA - {@link http://www.sakichan.org/} 12 * Parts of this file are copyright (c)2004-2005 by Daniel HAHLER - {@link http://thequod.de/contact}. 13 * 14 * @license http://b2evolution.net/about/license.html GNU General Public License (GPL) 15 * 16 * {@internal Open Source relicensing agreement: 17 * Daniel HAHLER grants Francois PLANQUE the right to license 18 * Daniel HAHLER's contributions to this file and the b2evolution project 19 * under any OSI approved OSS license (http://www.opensource.org/licenses/). 20 * }} 21 * 22 * {@internal Origin: 23 * This file was inspired by Simon Willison's SafeHtmlChecker released in 24 * the public domain on 23rd Feb 2003. 25 * {@link http://simon.incutio.com/code/php/SafeHtmlChecker.class.php.txt} 26 * }} 27 * 28 * @package evocore 29 * 30 * {@internal Below is a list of authors who have contributed to design/coding of this file: }} 31 * @author blueyed: Daniel HAHLER. 32 * @author fplanque: Francois PLANQUE. 33 * @author sakichan: Nobuo SAKIYAMA. 34 * @author Simon Willison. 35 * 36 * @version $Id: _xhtml_validator.class.php,v 1.2 2007/09/13 02:37:22 fplanque Exp $ 37 */ 38 if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' ); 39 40 /** 41 * SafeHtmlChecker 42 * 43 * checks HTML against a subset of elements to ensure safety and XHTML validation. 44 * 45 * @package evocore 46 */ 47 class SafeHtmlChecker 48 { 49 var $tags; // Array showing allowed attributes for tags 50 var $tagattrs; // Array showing URI attributes 51 var $uri_attrs; 52 var $allowed_uri_scheme; 53 54 // Internal variables 55 var $parser; 56 var $stack = array(); 57 var $last_checked_pos; 58 var $error; 59 60 /** 61 * Constructor 62 * 63 * {@internal This gets tested in _libs.misc.simpletest.php}} 64 * 65 * @param array 66 * @param array 67 * @param array 68 * @param array 69 * @param string Input encoding to use ('ISO-8859-1', 'UTF-8', 'US-ASCII' or '' for auto-detect) 70 */ 71 function SafeHtmlChecker( & $allowed_tags, & $allowed_attributes, & $uri_attrs, & $allowed_uri_scheme, $encoding = '' ) 72 { 73 $this->tags = & $allowed_tags; 74 $this->tagattrs = & $allowed_attributes; 75 $this->uri_attrs = & $uri_attrs; 76 $this->allowed_uri_scheme = & $allowed_uri_scheme; 77 78 $encoding = strtoupper($encoding); // we might get 'iso-8859-1' for example 79 $this->encoding = $encoding; 80 if( ! in_array( $encoding, array( 'ISO-8859-1', 'UTF-8', 'US-ASCII' ) ) ) 81 { // passed encoding not supported by xml_parser_create() 82 $this->xml_parser_encoding = ''; // auto-detect (in PHP4, in PHP5 anyway) 83 } 84 else 85 { 86 $this->xml_parser_encoding = $this->encoding; 87 } 88 $this->parser = xml_parser_create( $this->xml_parser_encoding ); 89 90 $this->last_checked_pos = 0; 91 $this->error = false; 92 93 // Creates the parser 94 xml_set_object( $this->parser, $this); 95 96 // set functions to call when a start or end tag is encountered 97 xml_set_element_handler($this->parser, 'tag_open', 'tag_close'); 98 // set function to call for the actual data 99 xml_set_character_data_handler($this->parser, 'cdata'); 100 101 xml_parser_set_option($this->parser, XML_OPTION_CASE_FOLDING, false); 102 } 103 104 105 /** 106 * check(-) 107 */ 108 function check($xhtml) 109 { 110 // Convert encoding: 111 // TODO: use convert_encoding() 112 if( empty($this->xml_parser_encoding) || $this->encoding != $this->xml_parser_encoding ) 113 { // we need to convert encoding: 114 if( function_exists( 'mb_convert_encoding' ) ) 115 { // we can convert encoding to UTF-8 116 $this->encoding = 'UTF-8'; 117 118 // Convert XHTML: 119 $xhtml = mb_convert_encoding( $xhtml, 'UTF-8' ); 120 } 121 elseif( ($this->encoding == 'ISO-8859-1' || empty($this->encoding)) && function_exists('utf8_encode') ) 122 { 123 $this->encoding = 'UTF-8'; 124 125 $xhtml = utf8_encode( $xhtml ); 126 } 127 } 128 129 // Open comments or '<![CDATA[' are dangerous 130 $xhtml = str_replace('<!', '', $xhtml); 131 132 // Convert isolated & chars 133 $xhtml = preg_replace( '#(\s)&(\s)#', '\\1&\\2', $xhtml ); 134 135 $xhtml_head = '<?xml version="1.0"'; 136 if( ! empty($this->encoding) ) 137 { 138 $xhtml_head .= ' encoding="'.$this->encoding.'"'; 139 } 140 141 $xhtml_head .= '?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"'; 142 143 // Include entities: 144 $xhtml_head .= '['; 145 // Include latin1 entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent): 146 $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-lat1.ent' ); 147 // Include symbol entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent): 148 $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-symbol.ent' ); 149 // Include special entities (http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent): 150 $xhtml_head .= file_get_contents( dirname(__FILE__).'/_xhtml-special.ent' ); 151 $xhtml_head .= ']>'; 152 153 $xhtml = $xhtml_head.'<body>'.$xhtml.'</body>'; 154 unset($xhtml_head); 155 156 if( !xml_parse($this->parser, $xhtml) ) 157 { 158 $xml_error_code = xml_get_error_code( $this->parser ); 159 $xml_error_string = xml_error_string( $xml_error_code ); 160 switch( $xml_error_code ) 161 { 162 case XML_ERROR_TAG_MISMATCH: 163 $xml_error_string .= ': <code>'.$this->stack[count($this->stack)-1].'</code>'; 164 break; 165 } 166 $pos = xml_get_current_byte_index($this->parser); 167 $xml_error_string .= ' near <code>'.htmlspecialchars( substr( $xhtml, $this->last_checked_pos, $pos-$this->last_checked_pos+20 ) ).'</code>'; 168 169 $this->html_error( T_('Parser error: ').$xml_error_string ); 170 } 171 } 172 173 /** 174 * tag_open(-) 175 * 176 * Called when the parser finds an opening tag 177 */ 178 function tag_open($parser, $tag, $attrs) 179 { 180 global $debug; 181 182 // echo "processing tag: $tag <br />\n"; 183 $this->last_checked_pos = xml_get_current_byte_index($this->parser); 184 185 if ($tag == 'body') 186 { 187 if( count($this->stack) > 0 ) 188 $this->html_error( T_('Tag <code>body</code> can only be used once!') ); 189 $this->stack[] = $tag; 190 return; 191 } 192 $previous = $this->stack[count($this->stack)-1]; 193 194 // If previous tag is illegal, no point in running tests 195 if (!in_array($previous, array_keys($this->tags))) { 196 $this->stack[] = $tag; 197 return; 198 } 199 // Is tag a legal tag? 200 if (!in_array($tag, array_keys($this->tags))) { 201 $this->html_error( T_('Illegal tag'). ": <code>$tag</code>" ); 202 $this->stack[] = $tag; 203 return; 204 } 205 // Is tag allowed in the current context? 206 if (!in_array($tag, explode(' ', $this->tags[$previous]))) { 207 if ($previous == 'body') { 208 $this->html_error( sprintf( T_('Tag <%s> must occur inside another tag'), '<code>'.$tag.'</code>' ) ); 209 } else { 210 $this->html_error( sprintf( T_('Tag <%s> is not allowed within tag <%s>'), '<code>'.$tag.'</code>', '<code>'.$previous.'</code>') ); 211 } 212 } 213 // Are tag attributes valid? 214 foreach( $attrs as $attr => $value ) 215 { 216 if (!isset($this->tagattrs[$tag]) || !in_array($attr, explode(' ', $this->tagattrs[$tag]))) 217 { 218 $this->html_error( sprintf( T_('Tag <%s> may not have attribute %s'), '<code>'.$tag.'</code>', '<code>'.$attr.'</code>' ) ); 219 } 220 if (in_array($attr, $this->uri_attrs)) 221 { // Must this attribute be checked for URIs 222 $matches = array(); 223 $value = trim($value); 224 if( $error = validate_url( $value, $this->allowed_uri_scheme, false, $debug ) ) 225 { 226 $this->html_error( T_('Found invalid URL: ').$error ); 227 } 228 } 229 } 230 // Set previous, used for checking nesting context rules 231 $this->stack[] = $tag; 232 } 233 234 /** 235 * cdata(-) 236 */ 237 function cdata($parser, $cdata) 238 { 239 $this->last_checked_pos = xml_get_current_byte_index($this->parser); 240 241 // Simply check that the 'previous' tag allows CDATA 242 $previous = $this->stack[count($this->stack)-1]; 243 // If previous tag is illegal, no point in running test 244 if (!in_array($previous, array_keys($this->tags))) { 245 return; 246 } 247 if (trim($cdata) != '') { 248 if (!in_array('#PCDATA', explode(' ', $this->tags[$previous]))) { 249 $this->html_error( sprintf( T_('Tag <%s> may not contain raw character data'), '<code>'.$previous.'</code>' ) ); 250 } 251 } 252 } 253 254 /** 255 * tag_close(-) 256 */ 257 function tag_close($parser, $tag) 258 { 259 $this->last_checked_pos = xml_get_current_byte_index($this->parser); 260 261 // Move back one up the stack 262 array_pop($this->stack); 263 } 264 265 function html_error( $string ) 266 { 267 global $Messages; 268 $this->error = true; 269 $Messages->add( $string, 'error' ); 270 } 271 272 /** 273 * isOK(-) 274 */ 275 function isOK() 276 { 277 return ! $this->error; 278 } 279 280 } 281 282 283 /* 284 * $Log: _xhtml_validator.class.php,v $ 285 * Revision 1.2 2007/09/13 02:37:22 fplanque 286 * special cases 287 * 288 * Revision 1.1 2007/06/25 11:02:27 fplanque 289 * MODULES (refactored MVC) 290 * 291 * Revision 1.13 2007/04/26 00:11:07 fplanque 292 * (c) 2007 293 * 294 * Revision 1.12 2006/11/27 02:29:53 blueyed 295 * Committed test changes by accident. Test added for it as an exercise. 296 * 297 * Revision 1.11 2006/11/26 02:30:39 fplanque 298 * doc / todo 299 * 300 * Revision 1.10 2006/11/06 22:56:53 blueyed 301 * Added full(?) XHTML entities support to the html checker 302 * 303 * Revision 1.9 2006/11/04 21:44:59 blueyed 304 * Include latin1 entities to let xml_parse() not choke on those 305 */ 306 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Thu Nov 29 23:58:50 2007 | par Balluche grâce à PHPXref 0.7 |
![]() |