[ Index ] |
|
Code source de PRADO 3.0.6 |
1 <?php 2 /* vim: set expandtab tabstop=4 shiftwidth=4: */ 3 // 4 // +----------------------------------------------------------------------+ 5 // | PHP Version 4 | 6 // +----------------------------------------------------------------------+ 7 // | Copyright (c) 1997-2002 The PHP Group | 8 // +----------------------------------------------------------------------+ 9 // | This source file is subject to version 2.02 of the PHP license, | 10 // | that is bundled with this package in the file LICENSE, and is | 11 // | available at through the world-wide-web at | 12 // | http://www.php.net/license/3_0.txt. | 13 // | If you did not receive a copy of the PHP license and are unable to | 14 // | obtain it through the world-wide-web, please send a note to | 15 // | license@php.net so we can mail you a copy immediately. | 16 // +----------------------------------------------------------------------+ 17 // | Authors: Alexander Zhukov <alex@veresk.ru> Original port from Python | 18 // | Authors: Harry Fuecks <hfuecks@phppatterns.com> Port to PEAR + more | 19 // | Authors: Many @ Sitepointforums Advanced PHP Forums | 20 // +----------------------------------------------------------------------+ 21 // 22 // $Id: HTMLSax3.php 1397 2006-09-07 07:55:53Z wei $ 23 // 24 /** 25 * Main parser components 26 * @package System.Security.SafeHtml 27 * @version $Id: HTMLSax3.php 1397 2006-09-07 07:55:53Z wei $ 28 */ 29 /** 30 * Required classes 31 */ 32 33 require_once(dirname(__FILE__).'/HTMLSax3/States.php'); 34 require_once(dirname(__FILE__).'/HTMLSax3/Decorators.php'); 35 36 /** 37 * Base State Parser 38 * @package System.Security.SafeHtml 39 * @access protected 40 * @abstract 41 */ 42 class TSax3_StateParser { 43 /** 44 * Instance of user front end class to be passed to callbacks 45 * @var TSax3 46 * @access private 47 */ 48 public $htmlsax; 49 /** 50 * User defined object for handling elements 51 * @var object 52 * @access private 53 */ 54 public $handler_object_element; 55 /** 56 * User defined open tag handler method 57 * @var string 58 * @access private 59 */ 60 public $handler_method_opening; 61 /** 62 * User defined close tag handler method 63 * @var string 64 * @access private 65 */ 66 public $handler_method_closing; 67 /** 68 * User defined object for handling data in elements 69 * @var object 70 * @access private 71 */ 72 public $handler_object_data; 73 /** 74 * User defined data handler method 75 * @var string 76 * @access private 77 */ 78 public $handler_method_data; 79 /** 80 * User defined object for handling processing instructions 81 * @var object 82 * @access private 83 */ 84 public $handler_object_pi; 85 /** 86 * User defined processing instruction handler method 87 * @var string 88 * @access private 89 */ 90 public $handler_method_pi; 91 /** 92 * User defined object for handling JSP/ASP tags 93 * @var object 94 * @access private 95 */ 96 public $handler_object_jasp; 97 /** 98 * User defined JSP/ASP handler method 99 * @var string 100 * @access private 101 */ 102 public $handler_method_jasp; 103 /** 104 * User defined object for handling XML escapes 105 * @var object 106 * @access private 107 */ 108 public $handler_object_escape; 109 /** 110 * User defined XML escape handler method 111 * @var string 112 * @access private 113 */ 114 public $handler_method_escape; 115 /** 116 * User defined handler object or NullHandler 117 * @var object 118 * @access private 119 */ 120 public $handler_default; 121 /** 122 * Parser options determining parsing behavior 123 * @var array 124 * @access private 125 */ 126 protected $parser_options = array(); 127 /** 128 * XML document being parsed 129 * @var string 130 * @access private 131 */ 132 protected $rawtext; 133 /** 134 * Position in XML document relative to start (0) 135 * @var int 136 * @access private 137 */ 138 protected $position; 139 /** 140 * Length of the XML document in characters 141 * @var int 142 * @access private 143 */ 144 protected $length; 145 /** 146 * Array of state objects 147 * @var array 148 * @access private 149 */ 150 protected $State = array(); 151 152 const TSAX3_STATE_STOP = 0; 153 const TSAX3_STATE_START = 1; 154 const TSAX3_STATE_TAG = 2; 155 const TSAX3_STATE_OPENING_TAG = 3; 156 const TSAX3_STATE_CLOSING_TAG = 4; 157 const TSAX3_STATE_ESCAPE = 6; 158 const TSAX3_STATE_JASP = 7; 159 const TSAX3_STATE_PI = 8; 160 161 /** 162 * Constructs TSax3_StateParser setting up states 163 * @var TSax3 instance of user front end class 164 * @access protected 165 */ 166 protected function __construct($htmlsax) { 167 $this->htmlsax = $htmlsax; 168 $this->State[self::TSAX3_STATE_START] = new TSax3_StartingState(); 169 170 $this->State[self::TSAX3_STATE_CLOSING_TAG] = new TSax3_ClosingTagState(); 171 $this->State[self::TSAX3_STATE_TAG] = new TSax3_TagState(); 172 $this->State[self::TSAX3_STATE_OPENING_TAG] = new TSax3_OpeningTagState(); 173 174 $this->State[self::TSAX3_STATE_PI] = new TSax3_PiState(); 175 $this->State[self::TSAX3_STATE_JASP] = new TSax3_JaspState(); 176 $this->State[self::TSAX3_STATE_ESCAPE] = new TSax3_EscapeState(); 177 } 178 179 /** 180 * Moves the position back one character 181 * @access protected 182 * @return void 183 */ 184 function unscanCharacter() { 185 $this->position -= 1; 186 } 187 188 /** 189 * Moves the position forward one character 190 * @access protected 191 * @return void 192 */ 193 function ignoreCharacter() { 194 $this->position += 1; 195 } 196 197 /** 198 * Returns the next character from the XML document or void if at end 199 * @access protected 200 * @return mixed 201 */ 202 function scanCharacter() { 203 if ($this->position < $this->length) { 204 return $this->rawtext{$this->position++}; 205 } 206 } 207 208 /** 209 * Returns a string from the current position to the next occurance 210 * of the supplied string 211 * @param string string to search until 212 * @access protected 213 * @return string 214 */ 215 function scanUntilString($string) { 216 $start = $this->position; 217 $this->position = strpos($this->rawtext, $string, $start); 218 if ($this->position === FALSE) { 219 $this->position = $this->length; 220 } 221 return substr($this->rawtext, $start, $this->position - $start); 222 } 223 224 /** 225 * Returns a string from the current position until the first instance of 226 * one of the characters in the supplied string argument 227 * @param string string to search until 228 * @access protected 229 * @return string 230 * @abstract 231 */ 232 function scanUntilCharacters($string) {} 233 234 /** 235 * Moves the position forward past any whitespace characters 236 * @access protected 237 * @return void 238 * @abstract 239 */ 240 function ignoreWhitespace() {} 241 242 /** 243 * Begins the parsing operation, setting up any decorators, depending on 244 * parse options invoking _parse() to execute parsing 245 * @param string XML document to parse 246 * @access protected 247 * @return void 248 */ 249 function parse($data) { 250 if ($this->parser_options['XML_OPTION_TRIM_DATA_NODES']==1) { 251 $decorator = new TSax3_Trim( 252 $this->handler_object_data, 253 $this->handler_method_data); 254 $this->handler_object_data =& $decorator; 255 $this->handler_method_data = 'trimData'; 256 } 257 if ($this->parser_options['XML_OPTION_CASE_FOLDING']==1) { 258 $open_decor = new TSax3_CaseFolding( 259 $this->handler_object_element, 260 $this->handler_method_opening, 261 $this->handler_method_closing); 262 $this->handler_object_element =& $open_decor; 263 $this->handler_method_opening ='foldOpen'; 264 $this->handler_method_closing ='foldClose'; 265 } 266 if ($this->parser_options['XML_OPTION_LINEFEED_BREAK']==1) { 267 $decorator = new TSax3_Linefeed( 268 $this->handler_object_data, 269 $this->handler_method_data); 270 $this->handler_object_data =& $decorator; 271 $this->handler_method_data = 'breakData'; 272 } 273 if ($this->parser_options['XML_OPTION_TAB_BREAK']==1) { 274 $decorator = new TSax3_Tab( 275 $this->handler_object_data, 276 $this->handler_method_data); 277 $this->handler_object_data =& $decorator; 278 $this->handler_method_data = 'breakData'; 279 } 280 if ($this->parser_options['XML_OPTION_ENTITIES_UNPARSED']==1) { 281 $decorator = new TSax3_Entities_Unparsed( 282 $this->handler_object_data, 283 $this->handler_method_data); 284 $this->handler_object_data =& $decorator; 285 $this->handler_method_data = 'breakData'; 286 } 287 if ($this->parser_options['XML_OPTION_ENTITIES_PARSED']==1) { 288 $decorator = new TSax3_Entities_Parsed( 289 $this->handler_object_data, 290 $this->handler_method_data); 291 $this->handler_object_data =& $decorator; 292 $this->handler_method_data = 'breakData'; 293 } 294 // Note switched on by default 295 if ($this->parser_options['XML_OPTION_STRIP_ESCAPES']==1) { 296 $decorator = new TSax3_Escape_Stripper( 297 $this->handler_object_escape, 298 $this->handler_method_escape); 299 $this->handler_object_escape =& $decorator; 300 $this->handler_method_escape = 'strip'; 301 } 302 $this->rawtext = $data; 303 $this->length = strlen($data); 304 $this->position = 0; 305 $this->_parse(); 306 } 307 308 /** 309 * Performs the parsing itself, delegating calls to a specific parser 310 * state 311 * @param constant state object to parse with 312 * @access protected 313 * @return void 314 */ 315 function _parse($state = self::TSAX3_STATE_START) { 316 do { 317 $state = $this->State[$state]->parse($this); 318 } while ($state != self::TSAX3_STATE_STOP && 319 $this->position < $this->length); 320 } 321 } 322 323 /** 324 * Parser for PHP Versions below 4.3.0. Uses a slower parsing mechanism than 325 * the equivalent PHP 4.3.0+ subclass of StateParser 326 * @package System.Security.SafeHtml 327 * @access protected 328 * @see TSax3_StateParser_Gtet430 329 */ 330 class TSax3_StateParser_Lt430 extends TSax3_StateParser { 331 /** 332 * Constructs TSax3_StateParser_Lt430 defining available 333 * parser options 334 * @var TSax3 instance of user front end class 335 * @access protected 336 */ 337 function __construct(& $htmlsax) { 338 parent::__construct($htmlsax); 339 $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; 340 $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; 341 $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; 342 $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; 343 $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; 344 $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; 345 $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; 346 //var_dump($this->parser_options); 347 } 348 349 /** 350 * Returns a string from the current position until the first instance of 351 * one of the characters in the supplied string argument 352 * @param string string to search until 353 * @access protected 354 * @return string 355 */ 356 function scanUntilCharacters($string) { 357 $startpos = $this->position; 358 while ($this->position < $this->length && strpos($string, $this->rawtext{$this->position}) === FALSE) { 359 $this->position++; 360 } 361 return substr($this->rawtext, $startpos, $this->position - $startpos); 362 } 363 364 /** 365 * Moves the position forward past any whitespace characters 366 * @access protected 367 * @return void 368 */ 369 function ignoreWhitespace() { 370 while ($this->position < $this->length && 371 strpos(" \n\r\t", $this->rawtext{$this->position}) !== FALSE) { 372 $this->position++; 373 } 374 } 375 376 /** 377 * Begins the parsing operation, setting up the unparsed XML entities 378 * decorator if necessary then delegating further work to parent 379 * @param string XML document to parse 380 * @access protected 381 * @return void 382 */ 383 function parse($data) { 384 parent::parse($data); 385 } 386 } 387 388 /** 389 * Parser for PHP Versions equal to or greater than 4.3.0. Uses a faster 390 * parsing mechanism than the equivalent PHP < 4.3.0 subclass of StateParser 391 * @package System.Security.SafeHtml 392 * @access protected 393 * @see TSax3_StateParser_Lt430 394 */ 395 class TSax3_StateParser_Gtet430 extends TSax3_StateParser { 396 /** 397 * Constructs TSax3_StateParser_Gtet430 defining available 398 * parser options 399 * @var TSax3 instance of user front end class 400 * @access protected 401 */ 402 function __construct(& $htmlsax) { 403 parent::__construct($htmlsax); 404 $this->parser_options['XML_OPTION_TRIM_DATA_NODES'] = 0; 405 $this->parser_options['XML_OPTION_CASE_FOLDING'] = 0; 406 $this->parser_options['XML_OPTION_LINEFEED_BREAK'] = 0; 407 $this->parser_options['XML_OPTION_TAB_BREAK'] = 0; 408 $this->parser_options['XML_OPTION_ENTITIES_PARSED'] = 0; 409 $this->parser_options['XML_OPTION_ENTITIES_UNPARSED'] = 0; 410 $this->parser_options['XML_OPTION_STRIP_ESCAPES'] = 0; 411 } 412 /** 413 * Returns a string from the current position until the first instance of 414 * one of the characters in the supplied string argument. 415 * @param string string to search until 416 * @access protected 417 * @return string 418 */ 419 function scanUntilCharacters($string) { 420 $startpos = $this->position; 421 $length = strcspn($this->rawtext, $string, $startpos); 422 $this->position += $length; 423 return substr($this->rawtext, $startpos, $length); 424 } 425 426 /** 427 * Moves the position forward past any whitespace characters 428 * @access protected 429 * @return void 430 */ 431 function ignoreWhitespace() { 432 $this->position += strspn($this->rawtext, " \n\r\t", $this->position); 433 } 434 435 /** 436 * Begins the parsing operation, setting up the parsed and unparsed 437 * XML entity decorators if necessary then delegating further work 438 * to parent 439 * @param string XML document to parse 440 * @access protected 441 * @return void 442 */ 443 function parse($data) { 444 parent::parse($data); 445 } 446 } 447 448 /** 449 * Default NullHandler for methods which were not set by user 450 * @package System.Security.SafeHtml 451 * @access protected 452 */ 453 class TSax3_NullHandler { 454 /** 455 * Generic handler method which does nothing 456 * @access protected 457 * @return void 458 */ 459 function DoNothing() { 460 } 461 } 462 463 /** 464 * User interface class. All user calls should only be made to this class 465 * @package System.Security.SafeHtml 466 * @access public 467 */ 468 class TSax3 { 469 /** 470 * Instance of concrete subclass of TSax3_StateParser 471 * @var TSax3_StateParser 472 * @access private 473 */ 474 private $state_parser; 475 476 /** 477 * Constructs TSax3 selecting concrete StateParser subclass 478 * depending on PHP version being used as well as setting the default 479 * NullHandler for all callbacks<br /> 480 * <b>Example:</b> 481 * <pre> 482 * $myHandler = & new MyHandler(); 483 * $parser = new TSax3(); 484 * $parser->set_object($myHandler); 485 * $parser->set_option('XML_OPTION_CASE_FOLDING'); 486 * $parser->set_element_handler('myOpenHandler','myCloseHandler'); 487 * $parser->set_data_handler('myDataHandler'); 488 * $parser->parser($xml); 489 * </pre> 490 * @access public 491 */ 492 function __construct() { 493 if (version_compare(phpversion(), '4.3', 'ge')) { 494 $this->state_parser = new TSax3_StateParser_Gtet430($this); 495 } else { 496 $this->state_parser = new TSax3_StateParser_Lt430($this); 497 } 498 $nullhandler = new TSax3_NullHandler(); 499 $this->set_object($nullhandler); 500 $this->set_element_handler('DoNothing', 'DoNothing'); 501 $this->set_data_handler('DoNothing'); 502 $this->set_pi_handler('DoNothing'); 503 $this->set_jasp_handler('DoNothing'); 504 $this->set_escape_handler('DoNothing'); 505 } 506 507 /** 508 * Sets the user defined handler object. Returns a PEAR Error 509 * if supplied argument is not an object. 510 * @param object handler object containing SAX callback methods 511 * @access public 512 * @return mixed 513 */ 514 function set_object(&$object) { 515 if ( is_object($object) ) { 516 $this->state_parser->handler_default =& $object; 517 return true; 518 } else { 519 require_once('PEAR.php'); 520 PEAR::raiseError('TSax3::set_object requires '. 521 'an object instance'); 522 } 523 } 524 525 /** 526 * Sets a parser option. By default all options are switched off. 527 * Returns a PEAR Error if option is invalid<br /> 528 * <b>Available options:</b> 529 * <ul> 530 * <li>XML_OPTION_TRIM_DATA_NODES: trim whitespace off the beginning 531 * and end of data passed to the data handler</li> 532 * <li>XML_OPTION_LINEFEED_BREAK: linefeeds result in additional data 533 * handler calls</li> 534 * <li>XML_OPTION_TAB_BREAK: tabs result in additional data handler 535 * calls</li> 536 * <li>XML_OPTION_ENTITIES_UNPARSED: XML entities are returned as 537 * seperate data handler calls in unparsed form</li> 538 * <li>XML_OPTION_ENTITIES_PARSED: (PHP 4.3.0+ only) XML entities are 539 * returned as seperate data handler calls and are parsed with 540 * PHP's html_entity_decode() function</li> 541 * <li>XML_OPTION_STRIP_ESCAPES: strips out the -- -- comment markers 542 * or CDATA markup inside an XML escape, if found.</li> 543 * </ul> 544 * To get HTMLSax to behave in the same way as the native PHP SAX parser, 545 * using it's default state, you need to switch on XML_OPTION_LINEFEED_BREAK, 546 * XML_OPTION_ENTITIES_PARSED and XML_OPTION_CASE_FOLDING 547 * @param string name of parser option 548 * @param int (optional) 1 to switch on, 0 for off 549 * @access public 550 * @return boolean 551 */ 552 function set_option($name, $value=1) { 553 if ( array_key_exists($name,$this->state_parser->parser_options) ) { 554 $this->state_parser->parser_options[$name] = $value; 555 return true; 556 } else { 557 require_once('PEAR.php'); 558 PEAR::raiseError('TSax3::set_option('.$name.') illegal'); 559 } 560 } 561 562 /** 563 * Sets the data handler method which deals with the contents of XML 564 * elements.<br /> 565 * The handler method must accept two arguments, the first being an 566 * instance of TSax3 and the second being the contents of an 567 * XML element e.g. 568 * <pre> 569 * function myDataHander(& $parser,$data){} 570 * </pre> 571 * @param string name of method 572 * @access public 573 * @return void 574 * @see set_object 575 */ 576 function set_data_handler($data_method) { 577 $this->state_parser->handler_object_data =& $this->state_parser->handler_default; 578 $this->state_parser->handler_method_data = $data_method; 579 } 580 581 /** 582 * Sets the open and close tag handlers 583 * <br />The open handler method must accept three arguments; the parser, 584 * the tag name and an array of attributes e.g. 585 * <pre> 586 * function myOpenHander(& $parser,$tagname,$attrs=array()){} 587 * </pre> 588 * The close handler method must accept two arguments; the parser and 589 * the tag name e.g. 590 * <pre> 591 * function myCloseHander(& $parser,$tagname){} 592 * </pre> 593 * @param string name of open method 594 * @param string name of close method 595 * @access public 596 * @return void 597 * @see set_object 598 */ 599 function set_element_handler($opening_method, $closing_method) { 600 $this->state_parser->handler_object_element =& $this->state_parser->handler_default; 601 $this->state_parser->handler_method_opening = $opening_method; 602 $this->state_parser->handler_method_closing = $closing_method; 603 } 604 605 /** 606 * Sets the processing instruction handler method e.g. for PHP open 607 * and close tags<br /> 608 * The handler method must accept three arguments; the parser, the 609 * PI target and data inside the PI 610 * <pre> 611 * function myPIHander(& $parser,$target, $data){} 612 * </pre> 613 * @param string name of method 614 * @access public 615 * @return void 616 * @see set_object 617 */ 618 function set_pi_handler($pi_method) { 619 $this->state_parser->handler_object_pi =& $this->state_parser->handler_default; 620 $this->state_parser->handler_method_pi = $pi_method; 621 } 622 623 /** 624 * Sets the XML escape handler method e.g. for comments and doctype 625 * declarations<br /> 626 * The handler method must accept two arguments; the parser and the 627 * contents of the escaped section 628 * <pre> 629 * function myEscapeHander(& $parser, $data){} 630 * </pre> 631 * @param string name of method 632 * @access public 633 * @return void 634 * @see set_object 635 */ 636 function set_escape_handler($escape_method) { 637 $this->state_parser->handler_object_escape =& $this->state_parser->handler_default; 638 $this->state_parser->handler_method_escape = $escape_method; 639 } 640 641 /** 642 * Sets the JSP/ASP markup handler<br /> 643 * The handler method must accept two arguments; the parser and 644 * body of the JASP tag 645 * <pre> 646 * function myJaspHander(& $parser, $data){} 647 * </pre> 648 * @param string name of method 649 * @access public 650 * @return void 651 * @see set_object 652 */ 653 function set_jasp_handler ($jasp_method) { 654 $this->state_parser->handler_object_jasp =& $this->state_parser->handler_default; 655 $this->state_parser->handler_method_jasp = $jasp_method; 656 } 657 658 /** 659 * Returns the current string position of the "cursor" inside the XML 660 * document 661 * <br />Intended for use from within a user defined handler called 662 * via the $parser reference e.g. 663 * <pre> 664 * function myDataHandler(& $parser,$data) { 665 * echo( 'Current position: '.$parser->get_current_position() ); 666 * } 667 * </pre> 668 * @access public 669 * @return int 670 * @see get_length 671 */ 672 function get_current_position() { 673 return $this->state_parser->position; 674 } 675 676 /** 677 * Returns the string length of the XML document being parsed 678 * @access public 679 * @return int 680 */ 681 function get_length() { 682 return $this->state_parser->length; 683 } 684 685 /** 686 * Start parsing some XML 687 * @param string XML document 688 * @access public 689 * @return void 690 */ 691 function parse($data) { 692 $this->state_parser->parse($data); 693 } 694 } 695 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 21:07:04 2007 | par Balluche grâce à PHPXref 0.7 |