| [ Index ] |
|
Code source de GeekLog 1.4.1 |
1 <?php 2 /* 3 * ========================================================================================== 4 * 5 * This program is free software and open source software; you can redistribute 6 * it and/or modify it under the terms of the GNU General Public License as 7 * published by the Free Software Foundation; either version 2 of the License, 8 * or (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along 16 * with this program; if not, write to the Free Software Foundation, Inc., 17 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 18 * http://www.gnu.org/licenses/gpl.html 19 * 20 * ========================================================================================== 21 */ 22 23 /** 24 * Class file for PHP4 OOP version of kses 25 * 26 * This is an updated version of kses to work with PHP4 that works under E_STRICT. 27 * 28 * This upgrade provides the following: 29 * + Version number synced to procedural version number 30 * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info. 31 * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics. 32 * + Kses4 now works in E_STRICT 33 * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol() and RemoveProtocols() 34 * + Deprecated _hook(), Protocols() 35 * + Integrated code from kses 0.2.2 into class. 36 * + Added methods DumpProtocols(), DumpMethods() 37 * 38 * @package kses 39 * @subpackage kses4 40 */ 41 42 if(substr(phpversion(), 0, 1) < 4) 43 { 44 die("Class kses requires PHP 4 or higher."); 45 } 46 47 /** 48 * Only install KSES4 once 49 */ 50 if(!defined('KSES_CLASS_PHP4')) 51 { 52 define('KSES_CLASS_PHP4', true); 53 54 /** 55 * Kses strips evil scripts! 56 * 57 * This class provides the capability for removing unwanted HTML/XHTML, attributes from 58 * tags, and protocols contained in links. The net result is a much more powerful tool 59 * than the PHP internal strip_tags() 60 * 61 * This is a fork of a slick piece of procedural code called 'kses' written by Ulf Harnhammar 62 * The entire set of functions was wrapped in a PHP object with some internal modifications 63 * by Richard Vasquez (http://www.chaos.org/) 7/25/2003 64 * 65 * This upgrade provides the following: 66 * + Version number synced to procedural version number 67 * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info. 68 * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics. 69 * + Kses4 now works in E_STRICT 70 * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol(), RemoveProtocols() and SetProtocols() 71 * + Deprecated _hook(), Protocols() 72 * + Integrated code from kses 0.2.2 into class. 73 * 74 * @author Richard R. Vásquez, Jr. (Original procedural code by Ulf Härnhammar) 75 * @link http://sourceforge.net/projects/kses/ Home Page for Kses 76 * @link http://chaos.org/contact/ Contact page with current email address for Richard Vasquez 77 * @copyright Richard R. Vásquez, Jr. 2003-2005 78 * @version PHP4 OOP 0.2.2 79 * @license http://www.gnu.org/licenses/gpl.html GNU Public License 80 * @package kses 81 */ 82 class kses4 83 { 84 /**#@+ 85 * @access private 86 * @var array 87 */ 88 var $allowed_protocols = array(); 89 var $allowed_html = array(); 90 /**#@-*/ 91 92 /** 93 * Constructor for kses. 94 * 95 * This sets a default collection of protocols allowed in links, and creates an 96 * empty set of allowed HTML tags. 97 * @since PHP4 OOP 0.0.1 98 */ 99 function kses4() 100 { 101 /** 102 * You could add protocols such as ftp, new, gopher, mailto, irc, etc. 103 * 104 * The base values the original kses provided were: 105 * 'http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto' 106 */ 107 $this->allowed_protocols = array('http', 'ftp', 'mailto'); 108 $this->allowed_html = array(); 109 } 110 111 /** 112 * Basic task of kses - parses $string and strips it as required. 113 * 114 * This method strips all the disallowed (X)HTML tags, attributes 115 * and protocols from the input $string. 116 * 117 * @access public 118 * @param string $string String to be stripped of 'evil scripts' 119 * @return string The stripped string 120 * @since PHP4 OOP 0.2.1 121 */ 122 function Parse($string = "") 123 { 124 if (get_magic_quotes_gpc()) 125 { 126 $string = stripslashes($string); 127 } 128 $string = $this->_no_null($string); 129 $string = $this->_js_entities($string); 130 $string = $this->_normalize_entities($string); 131 $string = $this->filterKsesTextHook($string); 132 return $this->_split($string); 133 } 134 135 /** 136 * Allows for single/batch addition of protocols 137 * 138 * This method accepts one argument that can be either a string 139 * or an array of strings. Invalid data will be ignored. 140 * 141 * The argument will be processed, and each string will be added 142 * via AddProtocol(). 143 * 144 * @access public 145 * @param mixed , A string or array of protocols that will be added to the internal list of allowed protocols. 146 * @return bool Status of adding valid protocols. 147 * @see AddProtocol() 148 * @since PHP4 OOP 0.2.1 149 */ 150 function AddProtocols() 151 { 152 $c_args = func_num_args(); 153 if($c_args != 1) 154 { 155 trigger_error("kses4::AddProtocols() did not receive an argument.", E_USER_WARNING); 156 return false; 157 } 158 159 $protocol_data = func_get_arg(0); 160 161 if(is_array($protocol_data) && count($protocol_data) > 0) 162 { 163 foreach($protocol_data as $protocol) 164 { 165 $this->AddProtocol($protocol); 166 } 167 return true; 168 } 169 elseif(is_string($protocol_data)) 170 { 171 $this->AddProtocol($protocol_data); 172 return true; 173 } 174 else 175 { 176 trigger_error("kses4::AddProtocols() did not receive a string or an array.", E_USER_WARNING); 177 return false; 178 } 179 } 180 181 /** 182 * Allows for single/batch addition of protocols 183 * 184 * @deprecated Use AddProtocols() 185 * @see AddProtocols() 186 * @return bool 187 * @since PHP4 OOP 0.0.1 188 */ 189 function Protocols() 190 { 191 $c_args = func_num_args(); 192 if($c_args != 1) 193 { 194 trigger_error("kses4::Protocols() did not receive an argument.", E_USER_WARNING); 195 return false; 196 } 197 198 return $this->AddProtocols(func_get_arg(0)); 199 } 200 201 /** 202 * Adds a single protocol to $this->allowed_protocols. 203 * 204 * This method accepts a string argument and adds it to 205 * the list of allowed protocols to keep when performing 206 * Parse(). 207 * 208 * @access public 209 * @param string $protocol The name of the protocol to be added. 210 * @return bool Status of adding valid protocol. 211 * @since PHP4 OOP 0.0.1 212 */ 213 function AddProtocol($protocol = "") 214 { 215 if(!is_string($protocol)) 216 { 217 trigger_error("kses4::AddProtocol() requires a string.", E_USER_WARNING); 218 return false; 219 } 220 221 $protocol = strtolower(trim($protocol)); 222 if($protocol == "") 223 { 224 trigger_error("kses4::AddProtocol() tried to add an empty/NULL protocol.", E_USER_WARNING); 225 return false; 226 } 227 228 // Remove any inadvertent ':' at the end of the protocol. 229 if(substr($protocol, strlen($protocol) - 1, 1) == ":") 230 { 231 $protocol = substr($protocol, 0, strlen($protocol) - 1); 232 } 233 234 if(!in_array($protocol, $this->allowed_protocols)) 235 { 236 array_push($this->allowed_protocols, $protocol); 237 sort($this->allowed_protocols); 238 } 239 return true; 240 } 241 242 /** 243 * Allows for single/batch replacement of protocols 244 * 245 * This method accepts one argument that can be either a string 246 * or an array of strings. Invalid data will be ignored. 247 * 248 * Existing protocols will be removed, then the argument will be 249 * processed, and each string will be added via AddProtocol(). 250 * 251 * @access public 252 * @param mixed , A string or array of protocols that will be the new internal list of allowed protocols. 253 * @return bool Status of replacing valid protocols. 254 * @since PHP4 OOP 0.2.2 255 * @see AddProtocol() 256 */ 257 function SetProtocols() 258 { 259 $c_args = func_num_args(); 260 if($c_args != 1) 261 { 262 trigger_error("kses4::SetProtocols() did not receive an argument.", E_USER_WARNING); 263 return false; 264 } 265 266 $protocol_data = func_get_arg(0); 267 268 if(is_array($protocol_data) && count($protocol_data) > 0) 269 { 270 $this->allowed_protocols = array(); 271 foreach($protocol_data as $protocol) 272 { 273 $this->AddProtocol($protocol); 274 } 275 return true; 276 } 277 elseif(is_string($protocol_data)) 278 { 279 $this->allowed_protocols = array(); 280 $this->AddProtocol($protocol_data); 281 return true; 282 } 283 else 284 { 285 trigger_error("kses4::SetProtocols() did not receive a string or an array.", E_USER_WARNING); 286 return false; 287 } 288 } 289 290 /** 291 * Raw dump of allowed protocols 292 * 293 * This returns an indexed array of allowed protocols for a particular KSES 294 * instantiation. 295 * 296 * @access public 297 * @return array The list of allowed protocols. 298 * @since PHP4 OOP 0.2.2 299 */ 300 function DumpProtocols() 301 { 302 return $this->allowed_protocols; 303 } 304 305 /** 306 * Raw dump of allowed (X)HTML elements 307 * 308 * This returns an indexed array of allowed (X)HTML elements and attributes 309 * for a particular KSES instantiation. 310 * 311 * @access public 312 * @return array The list of allowed elements. 313 * @since PHP4 OOP 0.2.2 314 */ 315 function DumpElements() 316 { 317 return $this->allowed_html; 318 } 319 320 /** 321 * Adds valid (X)HTML with corresponding attributes that will be kept when stripping 'evil scripts'. 322 * 323 * This method accepts one argument that can be either a string 324 * or an array of strings. Invalid data will be ignored. 325 * 326 * @access public 327 * @param string $tag (X)HTML tag that will be allowed after stripping text. 328 * @param array $attribs Associative array of allowed attributes - key => attribute name - value => attribute parameter 329 * @return bool Status of Adding (X)HTML and attributes. 330 * @since PHP4 OOP 0.0.1 331 */ 332 function AddHTML($tag = "", $attribs = array()) 333 { 334 if(!is_string($tag)) 335 { 336 trigger_error("kses4::AddHTML() requires the tag to be a string", E_USER_WARNING); 337 return false; 338 } 339 340 $tag = strtolower(trim($tag)); 341 if($tag == "") 342 { 343 trigger_error("kses4::AddHTML() tried to add an empty/NULL tag", E_USER_WARNING); 344 return false; 345 } 346 347 if(!is_array($attribs)) 348 { 349 trigger_error("kses4::AddHTML() requires an array (even an empty one) of attributes for '$tag'", E_USER_WARNING); 350 return false; 351 } 352 353 $new_attribs = array(); 354 if(is_array($attribs) && count($attribs) > 0) 355 { 356 foreach($attribs as $idx1 => $val1) 357 { 358 $new_idx1 = strtolower($idx1); 359 $new_val1 = $attribs[$idx1]; 360 361 if(is_array($new_val1) && count($new_val1) > 0) 362 { 363 $tmp_val = array(); 364 foreach($new_val1 as $idx2 => $val2) 365 { 366 $new_idx2 = strtolower($idx2); 367 $tmp_val[$new_idx2] = $val2; 368 } 369 $new_val1 = $tmp_val; 370 } 371 372 $new_attribs[$new_idx1] = $new_val1; 373 } 374 } 375 376 $this->allowed_html[$tag] = $new_attribs; 377 return true; 378 } 379 380 /** 381 * Removes a single protocol from $this->allowed_protocols. 382 * 383 * This method accepts a string argument and removes it from 384 * the list of allowed protocols to keep when performing 385 * Parse(). 386 * 387 * @access public 388 * @param string $protocol The name of the protocol to be removed. 389 * @return bool Status of removing valid protocol. 390 * @since PHP4 OOP 0.2.1 391 */ 392 function RemoveProtocol($protocol = "") 393 { 394 if(!is_string($protocol)) 395 { 396 trigger_error("kses4::RemoveProtocol() requires a string.", E_USER_WARNING); 397 return false; 398 } 399 400 // Remove any inadvertent ':' at the end of the protocol. 401 if(substr($protocol, strlen($protocol) - 1, 1) == ":") 402 { 403 $protocol = substr($protocol, 0, strlen($protocol) - 1); 404 } 405 406 $protocol = strtolower(trim($protocol)); 407 if($protocol == "") 408 { 409 trigger_error("kses4::RemoveProtocol() tried to remove an empty/NULL protocol.", E_USER_WARNING); 410 return false; 411 } 412 413 // Ensures that the protocol exists before removing it. 414 if(in_array($protocol, $this->allowed_protocols)) 415 { 416 $this->allowed_protocols = array_diff($this->allowed_protocols, array($protocol)); 417 sort($this->allowed_protocols); 418 } 419 420 return true; 421 } 422 423 /** 424 * Allows for single/batch removal of protocols 425 * 426 * This method accepts one argument that can be either a string 427 * or an array of strings. Invalid data will be ignored. 428 * 429 * The argument will be processed, and each string will be removed 430 * via RemoveProtocol(). 431 * 432 * @access public 433 * @param mixed , A string or array of protocols that will be removed from the internal list of allowed protocols. 434 * @return bool Status of removing valid protocols. 435 * @see RemoveProtocol() 436 * @since PHP5 OOP 0.2.1 437 */ 438 function RemoveProtocols() 439 { 440 $c_args = func_num_args(); 441 if($c_args != 1) 442 { 443 return false; 444 } 445 446 $protocol_data = func_get_arg(0); 447 448 if(is_array($protocol_data) && count($protocol_data) > 0) 449 { 450 foreach($protocol_data as $protocol) 451 { 452 $this->RemoveProtocol($protocol); 453 } 454 } 455 elseif(is_string($protocol_data)) 456 { 457 $this->RemoveProtocol($protocol_data); 458 return true; 459 } 460 else 461 { 462 trigger_error("kses4::RemoveProtocols() did not receive a string or an array.", E_USER_WARNING); 463 return false; 464 } 465 } 466 467 /** 468 * This method removes any NULL or characters in $string. 469 * 470 * @access private 471 * @param string $string 472 * @return string String without any NULL/chr(173) 473 * @since PHP4 OOP 0.0.1 474 */ 475 function _no_null($string) 476 { 477 $string = preg_replace('/\0+/', '', $string); 478 $string = preg_replace('/(\\\\0)+/', '', $string); 479 return $string; 480 } 481 482 /** 483 * This function removes the HTML JavaScript entities found in early versions of 484 * Netscape 4. 485 * 486 * @access private 487 * @param string $string 488 * @return string String without any NULL/chr(173) 489 * @since PHP4 OOP 0.0.1 490 */ 491 function _js_entities($string) 492 { 493 return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); 494 } 495 496 /** 497 * Normalizes HTML entities 498 * 499 * This function normalizes HTML entities. It will convert "AT&T" to the correct 500 * "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. 501 * 502 * @access private 503 * @param string $string 504 * @return string String with normalized entities 505 * @since PHP4 OOP 0.0.1 506 */ 507 function _normalize_entities($string) 508 { 509 # Disarm all entities by converting & to & 510 $string = str_replace('&', '&', $string); 511 512 # Change back the allowed entities in our entity white list 513 514 $string = preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string); 515 $string = preg_replace('/&#0*([0-9]{1,5});/e', '\$this->_normalize_entities2("\\1")', $string); 516 $string = preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string); 517 518 return $string; 519 } 520 521 /** 522 * Helper method used by normalizeEntites() 523 * 524 * This method helps normalizeEntities() to only accept 16 bit values 525 * and nothing more for &#number; entities. 526 * 527 * This method helps normalize_entities() during a preg_replace() 528 * where a &#(0)*XXXXX; occurs. The '(0)*XXXXXX' value is converted to 529 * a number and the result is returned as a numeric entity if the number 530 * is less than 65536. Otherwise, the value is returned 'as is'. 531 * 532 * @access private 533 * @param string $i 534 * @return string Normalized numeric entity 535 * @see _normalize_entities() 536 * @since PHP4 OOP 0.0.1 537 */ 538 function _normalize_entities2($i) 539 { 540 return (($i > 65535) ? "&#$i;" : "&#$i;"); 541 } 542 543 /** 544 * Allows for additional user defined modifications to text. 545 * 546 * @deprecated use filterKsesTextHook() 547 * @param string $string 548 * @see filterKsesTextHook() 549 * @return string 550 * @since PHP4 OOP 0.0.1 551 */ 552 function _hook($string) 553 { 554 return $this->filterKsesTextHook($string); 555 } 556 557 /** 558 * Allows for additional user defined modifications to text. 559 * 560 * This method allows for additional modifications to be performed on 561 * a string that's being run through Parse(). Currently, it returns the 562 * input string 'as is'. 563 * 564 * This method is provided for users to extend the kses class for their own 565 * requirements. 566 * 567 * @access public 568 * @param string $string String to perfrom additional modifications on. 569 * @return string User modified string. 570 * @see Parse() 571 * @since PHP5 OOP 1.0.0 572 */ 573 function filterKsesTextHook($string) 574 { 575 return $string; 576 } 577 578 /** 579 * This method goes through an array, and changes the keys to all lower case. 580 * 581 * @access private 582 * @param array $in_array Associative array 583 * @return array Modified array 584 * @since PHP4 OOP 0.0.1 585 */ 586 function _array_lc($inarray) 587 { 588 $outarray = array(); 589 590 if(is_array($inarray) && count($inarray) > 0) 591 { 592 foreach ($inarray as $inkey => $inval) 593 { 594 $outkey = strtolower($inkey); 595 $outarray[$outkey] = array(); 596 597 if(is_array($inval) && count($inval) > 0) 598 { 599 foreach ($inval as $inkey2 => $inval2) 600 { 601 $outkey2 = strtolower($inkey2); 602 $outarray[$outkey][$outkey2] = $inval2; 603 } 604 } 605 } 606 } 607 608 return $outarray; 609 } 610 611 /** 612 * This method searched for HTML tags, no matter how malformed. It also 613 * matches stray ">" characters. 614 * 615 * @access private 616 * @param string $string 617 * @return string HTML tags 618 * @since PHP4 OOP 0.0.1 619 */ 620 function _split($string) 621 { 622 return preg_replace( 623 '%(<'. # EITHER: < 624 '[^>]*'. # things that aren't > 625 '(>|$)'. # > or end of string 626 '|>)%e', # OR: just a > 627 "\$this->_split2('\\1')", 628 $string); 629 } 630 631 /** 632 * This method strips out disallowed and/or mangled (X)HTML tags along with assigned attributes. 633 * 634 * This method does a lot of work. It rejects some very malformed things 635 * like <:::>. It returns an empty string if the element isn't allowed (look 636 * ma, no strip_tags()!). Otherwise it splits the tag into an element and an 637 * allowed attribute list. 638 * 639 * @access private 640 * @param string $string 641 * @return string Modified string minus disallowed/mangled (X)HTML and attributes 642 * @since PHP4 OOP 0.0.1 643 */ 644 function _split2($string) 645 { 646 $string = $this->_stripslashes($string); 647 648 if (substr($string, 0, 1) != '<') 649 { 650 # It matched a ">" character 651 return '>'; 652 } 653 654 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) 655 { 656 # It's seriously malformed 657 return ''; 658 } 659 660 $slash = trim($matches[1]); 661 $elem = $matches[2]; 662 $attrlist = $matches[3]; 663 664 if ( 665 !isset($this->allowed_html[strtolower($elem)]) || 666 !is_array($this->allowed_html[strtolower($elem)]) 667 ) 668 { 669 # They are using a not allowed HTML element 670 return ''; 671 } 672 673 if ($slash != '') 674 { 675 return "<$slash$elem>"; 676 } 677 # No attributes are allowed for closing elements 678 679 return $this->_attr("$slash$elem", $attrlist); 680 } 681 682 /** 683 * This method strips out disallowed attributes for (X)HTML tags. 684 * 685 * This method removes all attributes if none are allowed for this element. 686 * If some are allowed it calls $this->_hair() to split them further, and then it 687 * builds up new HTML code from the data that $this->_hair() returns. It also 688 * removes "<" and ">" characters, if there are any left. One more thing it 689 * does is to check if the tag has a closing XHTML slash, and if it does, 690 * it puts one in the returned code as well. 691 * 692 * @access private 693 * @param string $element (X)HTML tag to check 694 * @param string $attr Text containing attributes to check for validity. 695 * @return string Resulting valid (X)HTML or '' 696 * @see _hair() 697 * @since PHP4 OOP 0.0.1 698 */ 699 function _attr($element, $attr) 700 { 701 # Is there a closing XHTML slash at the end of the attributes? 702 $xhtml_slash = ''; 703 if (preg_match('%\s/\s*$%', $attr)) 704 { 705 $xhtml_slash = ' /'; 706 } 707 708 # Are any attributes allowed at all for this element? 709 if ( 710 !isset($this->allowed_html[strtolower($element)]) || 711 count($this->allowed_html[strtolower($element)]) == 0 712 ) 713 { 714 return "<$element$xhtml_slash>"; 715 } 716 717 # Split it 718 $attrarr = $this->_hair($attr); 719 720 # Go through $attrarr, and save the allowed attributes for this element 721 # in $attr2 722 $attr2 = ''; 723 if(is_array($attrarr) && count($attrarr) > 0) 724 { 725 foreach ($attrarr as $arreach) 726 { 727 if(!isset($this->allowed_html[strtolower($element)][strtolower($arreach['name'])])) 728 { 729 continue; 730 } 731 732 $current = $this->allowed_html[strtolower($element)][strtolower($arreach['name'])]; 733 if ($current == '') 734 { 735 # the attribute is not allowed 736 continue; 737 } 738 739 if (!is_array($current)) 740 { 741 # there are no checks 742 $attr2 .= ' '.$arreach['whole']; 743 744 } 745 else 746 { 747 # there are some checks 748 $ok = true; 749 if(is_array($current) && count($current) > 0) 750 { 751 foreach ($current as $currkey => $currval) 752 { 753 if (!$this->_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval)) 754 { 755 $ok = false; 756 break; 757 } 758 } 759 760 if ($ok) 761 { 762 # it passed them 763 $attr2 .= ' '.$arreach['whole']; 764 } 765 } 766 } 767 } 768 } 769 770 # Remove any "<" or ">" characters 771 $attr2 = preg_replace('/[<>]/', '', $attr2); 772 return "<$element$attr2$xhtml_slash>"; 773 } 774 775 /** 776 * This method combs through an attribute list string and returns an associative array of attributes and values. 777 * 778 * This method does a lot of work. It parses an attribute list into an array 779 * with attribute data, and tries to do the right thing even if it gets weird 780 * input. It will add quotes around attribute values that don't have any quotes 781 * or apostrophes around them, to make it easier to produce HTML code that will 782 * conform to W3C's HTML specification. It will also remove bad URL protocols 783 * from attribute values. 784 * 785 * @access private 786 * @param string $attr Text containing tag attributes for parsing 787 * @return array Associative array containing data on attribute and value 788 * @since PHP4 OOP 0.0.1 789 */ 790 function _hair($attr) 791 { 792 $attrarr = array(); 793 $mode = 0; 794 $attrname = ''; 795 796 # Loop through the whole attribute list 797 798 while (strlen($attr) != 0) 799 { 800 # Was the last operation successful? 801 $working = 0; 802 803 switch ($mode) 804 { 805 case 0: # attribute name, href for instance 806 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) 807 { 808 $attrname = $match[1]; 809 $working = $mode = 1; 810 $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr); 811 } 812 break; 813 case 1: # equals sign or valueless ("selected") 814 if (preg_match('/^\s*=\s*/', $attr)) # equals sign 815 { 816 $working = 1; 817 $mode = 2; 818 $attr = preg_replace('/^\s*=\s*/', '', $attr); 819 break; 820 } 821 if (preg_match('/^\s+/', $attr)) # valueless 822 { 823 $working = 1; 824 $mode = 0; 825 $attrarr[] = array( 826 'name' => $attrname, 827 'value' => '', 828 'whole' => $attrname, 829 'vless' => 'y' 830 ); 831 $attr = preg_replace('/^\s+/', '', $attr); 832 } 833 break; 834 case 2: # attribute value, a URL after href= for instance 835 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value" 836 { 837 $thisval = $this->_bad_protocol($match[1]); 838 $attrarr[] = array( 839 'name' => $attrname, 840 'value' => $thisval, 841 'whole' => "$attrname=\"$thisval\"", 842 'vless' => 'n' 843 ); 844 $working = 1; 845 $mode = 0; 846 $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); 847 break; 848 } 849 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value' 850 { 851 $thisval = $this->_bad_protocol($match[1]); 852 $attrarr[] = array( 853 'name' => $attrname, 854 'value' => $thisval, 855 'whole' => "$attrname='$thisval'", 856 'vless' => 'n' 857 ); 858 $working = 1; 859 $mode = 0; 860 $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr); 861 break; 862 } 863 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value 864 { 865 $thisval = $this->_bad_protocol($match[1]); 866 $attrarr[] = array( 867 'name' => $attrname, 868 'value' => $thisval, 869 'whole' => "$attrname=\"$thisval\"", 870 'vless' => 'n' 871 ); 872 # We add quotes to conform to W3C's HTML spec. 873 $working = 1; 874 $mode = 0; 875 $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); 876 } 877 break; 878 } 879 880 if ($working == 0) # not well formed, remove and try again 881 { 882 $attr = $this->_html_error($attr); 883 $mode = 0; 884 } 885 } 886 887 # special case, for when the attribute list ends with a valueless 888 # attribute like "selected" 889 if ($mode == 1) 890 { 891 $attrarr[] = array( 892 'name' => $attrname, 893 'value' => '', 894 'whole' => $attrname, 895 'vless' => 'y' 896 ); 897 } 898 899 return $attrarr; 900 } 901 902 /** 903 * This method removes disallowed protocols. 904 * 905 * This method removes all non-allowed protocols from the beginning of 906 * $string. It ignores whitespace and the case of the letters, and it does 907 * understand HTML entities. It does its work in a while loop, so it won't be 908 * fooled by a string like "javascript:javascript:alert(57)". 909 * 910 * @access private 911 * @param string $string String to check for protocols 912 * @return string String with removed protocols 913 * @since PHP4 OOP 0.0.1 914 */ 915 function _bad_protocol($string) 916 { 917 $string = $this->_no_null($string); 918 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" 919 $string2 = $string.'a'; 920 921 while ($string != $string2) 922 { 923 $string2 = $string; 924 $string = $this->_bad_protocol_once($string); 925 } # while 926 927 return $string; 928 } 929 930 /** 931 * Helper method used by _bad_protocol() 932 * 933 * This function searches for URL protocols at the beginning of $string, while 934 * handling whitespace and HTML entities. 935 * 936 * @access private 937 * @param string $string String to check for protocols 938 * @return string String with removed protocols 939 * @see _bad_protocol() 940 * @since PHP4 OOP 0.0.1 941 */ 942 function _bad_protocol_once($string) 943 { 944 return preg_replace( 945 '/^((&[^;]*;|[\sA-Za-z0-9])*)'. 946 '(:|:|&#[Xx]3[Aa];)\s*/e', 947 '\$this->_bad_protocol_once2("\\1")', 948 $string 949 ); 950 } 951 952 /** 953 * Helper method used by _bad_protocol_once() regex 954 * 955 * This function processes URL protocols, checks to see if they're in the white- 956 * list or not, and returns different data depending on the answer. 957 * 958 * @access private 959 * @param string $string String to check for protocols 960 * @return string String with removed protocols 961 * @see _bad_protocol() 962 * @see _bad_protocol_once() 963 * @since PHP4 OOP 0.0.1 964 */ 965 function _bad_protocol_once2($string) 966 { 967 $string = $this->_decode_entities($string); 968 $string = preg_replace('/\s/', '', $string); 969 $string = $this->_no_null($string); 970 $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature" 971 $string = strtolower($string); 972 973 $allowed = false; 974 if(is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0) 975 { 976 foreach ($this->allowed_protocols as $one_protocol) 977 { 978 if (strtolower($one_protocol) == $string) 979 { 980 $allowed = true; 981 break; 982 } 983 } 984 } 985 986 if ($allowed) 987 { 988 return "$string:"; 989 } 990 else 991 { 992 return ''; 993 } 994 } 995 996 /** 997 * This function performs different checks for attribute values. 998 * 999 * The currently implemented checks are "maxlen", "minlen", "maxval", 1000 * "minval" and "valueless" with even more checks to come soon. 1001 * 1002 * @access private 1003 * @param string $value The value of the attribute to be checked. 1004 * @param string $vless Indicates whether the the value is supposed to be valueless 1005 * @param string $checkname The check to be performed 1006 * @param string $checkvalue The value that is to be checked against 1007 * @return bool Indicates whether the check passed or not 1008 * @since PHP4 OOP 0.0.1 1009 */ 1010 function _check_attr_val($value, $vless, $checkname, $checkvalue) 1011 { 1012 $ok = true; 1013 1014 switch (strtolower($checkname)) 1015 { 1016 /** 1017 * The maxlen check makes sure that the attribute value has a length not 1018 * greater than the given value. This can be used to avoid Buffer Overflows 1019 * in WWW clients and various Internet servers. 1020 */ 1021 case 'maxlen': 1022 if (strlen($value) > $checkvalue) 1023 { 1024 $ok = false; 1025 } 1026 break; 1027 1028 /** 1029 * The minlen check makes sure that the attribute value has a length not 1030 * smaller than the given value. 1031 */ 1032 case 'minlen': 1033 if (strlen($value) < $checkvalue) 1034 { 1035 $ok = false; 1036 } 1037 break; 1038 1039 /** 1040 * The maxval check does two things: it checks that the attribute value is 1041 * an integer from 0 and up, without an excessive amount of zeroes or 1042 * whitespace (to avoid Buffer Overflows). It also checks that the attribute 1043 * value is not greater than the given value. 1044 * This check can be used to avoid Denial of Service attacks. 1045 */ 1046 case 'maxval': 1047 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 1048 { 1049 $ok = false; 1050 } 1051 if ($value > $checkvalue) 1052 { 1053 $ok = false; 1054 } 1055 break; 1056 1057 /** 1058 * The minval check checks that the attribute value is a positive integer, 1059 * and that it is not smaller than the given value. 1060 */ 1061 case 'minval': 1062 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 1063 { 1064 $ok = false; 1065 } 1066 if ($value < $checkvalue) 1067 { 1068 $ok = false; 1069 } 1070 break; 1071 1072 /** 1073 * The valueless check checks if the attribute has a value 1074 * (like <a href="blah">) or not (<option selected>). If the given value 1075 * is a "y" or a "Y", the attribute must not have a value. 1076 * If the given value is an "n" or an "N", the attribute must have one. 1077 */ 1078 case 'valueless': 1079 if (strtolower($checkvalue) != $vless) 1080 { 1081 $ok = false; 1082 } 1083 break; 1084 1085 } 1086 1087 return $ok; 1088 } 1089 1090 /** 1091 * Changes \" to " 1092 * 1093 * This function changes the character sequence \" to just " 1094 * It leaves all other slashes alone. It's really weird, but the quoting from 1095 * preg_replace(//e) seems to require this. 1096 * 1097 * @access private 1098 * @param string $string The string to be stripped. 1099 * @return string string stripped of \" 1100 * @since PHP4 OOP 0.0.1 1101 */ 1102 function _stripslashes($string) 1103 { 1104 return preg_replace('%\\\\"%', '"', $string); 1105 } 1106 1107 /** 1108 * helper method for _hair() 1109 * 1110 * This function deals with parsing errors in _hair(). The general plan is 1111 * to remove everything to and including some whitespace, but it deals with 1112 * quotes and apostrophes as well. 1113 * 1114 * @access private 1115 * @param string $string The string to be stripped. 1116 * @return string string stripped of whitespace 1117 * @see _hair() 1118 * @since PHP4 OOP 0.0.1 1119 */ 1120 function _html_error($string) 1121 { 1122 return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); 1123 } 1124 1125 /** 1126 * Decodes numeric HTML entities 1127 * 1128 * This method decodes numeric HTML entities (A and A). It doesn't 1129 * do anything with other entities like ä, but we don't need them in the 1130 * URL protocol white listing system anyway. 1131 * 1132 * @access private 1133 * @param string $value The entitiy to be decoded. 1134 * @return string Decoded entity 1135 * @since PHP4 OOP 0.0.1 1136 */ 1137 function _decode_entities($string) 1138 { 1139 $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); 1140 $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string); 1141 return $string; 1142 } 1143 1144 /** 1145 * Returns PHP4 OOP version # of kses. 1146 * 1147 * Since this class has been refactored and documented and proven to work, 1148 * I'm syncing the version number to procedural kses. 1149 * 1150 * @access public 1151 * @return string Version number 1152 * @since PHP4 OOP 0.0.1 1153 */ 1154 function _version() 1155 { 1156 return 'PHP4 0.2.2 (OOP fork of procedural kses 0.2.2)'; 1157 } 1158 } 1159 } 1160 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
| Généré le : Wed Nov 21 12:27:40 2007 | par Balluche grâce à PHPXref 0.7 |
|