| [ Index ] |
|
Code source de PHP NUKE 7.9 |
1 <?php 2 3 # kses 0.2.1 - HTML/XHTML filter that only allows some elements and attributes 4 # Copyright (C) 2002, 2003 Ulf Harnhammar 5 # 6 # This program is free software and open source software; you can redistribute 7 # it and/or modify it under the terms of the GNU General Public License as 8 # published by the Free Software Foundation; either version 2 of the License, 9 # or (at your option) any later version. 10 # 11 # This program is distributed in the hope that it will be useful, but WITHOUT 12 # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 14 # more details. 15 # 16 # You should have received a copy of the GNU General Public License along 17 # with this program; if not, write to the Free Software Foundation, Inc., 18 # 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit 19 # http://www.gnu.org/licenses/gpl.html 20 # 21 # *** CONTACT INFORMATION *** 22 # 23 # E-mail: metaur at users dot sourceforge dot net 24 # Web page: http://sourceforge.net/projects/kses 25 # Paper mail: (not at the moment) 26 # 27 # [kses strips evil scripts!] 28 /* Journal 2.0 Enhanced and Debugged 2004 */ 29 /* by sixonetonoffun -- http://www.netflake.com -- */ 30 /* Images Created by GanjaUK -- http://www.GanjaUK.com */ 31 /************************************************************************/ 32 33 if ( !defined('MODULE_FILE') ) 34 { 35 die("You can't access this file directly..."); 36 } 37 38 function kses($string, $allowed_html, $allowed_protocols = 39 array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 40 'gopher', 'mailto')) 41 ############################################################################### 42 # This function makes sure that only the allowed HTML element names, attribute 43 # names and attribute values plus only sane HTML entities will occur in 44 # $string. You have to remove any slashes from PHP's magic quotes before you 45 # call this function. 46 ############################################################################### 47 { 48 $string = kses_no_null($string); 49 $string = kses_js_entities($string); 50 $string = kses_normalize_entities($string); 51 $string = kses_hook($string); 52 $allowed_html_fixed = kses_array_lc($allowed_html); 53 return kses_split($string, $allowed_html_fixed, $allowed_protocols); 54 } # function kses 55 56 57 function kses_hook($string) 58 ############################################################################### 59 # You add any kses hooks here. 60 ############################################################################### 61 { 62 return $string; 63 } # function kses_hook 64 65 66 function kses_version() 67 ############################################################################### 68 # This function returns kses' version number. 69 ############################################################################### 70 { 71 return '0.2.1'; 72 } # function kses_version 73 74 75 function kses_split($string, $allowed_html, $allowed_protocols) 76 ############################################################################### 77 # This function searches for HTML tags, no matter how malformed. It also 78 # matches stray ">" characters. 79 ############################################################################### 80 { 81 return @preg_replace('%(<'. # EITHER: < 82 '[^>]*'. # things that aren't > 83 '(>|$)'. # > or end of string 84 '|>)%e', # OR: just a > 85 "kses_split2('\\1', \$allowed_html, ". 86 '$allowed_protocols)', 87 $string); 88 } # function kses_split 89 90 91 function kses_split2($string, $allowed_html, $allowed_protocols) 92 ############################################################################### 93 # This function does a lot of work. It rejects some very malformed things 94 # like <:::>. It returns an empty string, if the element isn't allowed (look 95 # ma, no strip_tags()!). Otherwise it splits the tag into an element and an 96 # attribute list. 97 ############################################################################### 98 { 99 $string = kses_stripslashes($string); 100 101 if (substr($string, 0, 1) != '<') 102 return '>'; 103 # It matched a ">" character 104 105 if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches)) 106 return ''; 107 # It's seriously malformed 108 109 $slash = trim($matches[1]); 110 $elem = $matches[2]; 111 $attrlist = $matches[3]; 112 113 if (!is_array($allowed_html[strtolower($elem)])) 114 return ''; 115 # They are using a not allowed HTML element 116 117 return kses_attr("$slash$elem", $attrlist, $allowed_html, 118 $allowed_protocols); 119 } # function kses_split2 120 121 122 function kses_attr($element, $attr, $allowed_html, $allowed_protocols) 123 ############################################################################### 124 # This function removes all attributes, if none are allowed for this element. 125 # If some are allowed it calls kses_hair() to split them further, and then it 126 # builds up new HTML code from the data that kses_hair() returns. It also 127 # removes "<" and ">" characters, if there are any left. One more thing it 128 # does is to check if the tag has a closing XHTML slash, and if it does, 129 # it puts one in the returned code as well. 130 ############################################################################### 131 { 132 # Is there a closing XHTML slash at the end of the attributes? 133 134 $xhtml_slash = ''; 135 if (preg_match('%\s/\s*$%', $attr)) 136 $xhtml_slash = ' /'; 137 138 # Are any attributes allowed at all for this element? 139 140 if (count($allowed_html[strtolower($element)]) == 0) 141 return "<$element$xhtml_slash>"; 142 143 # Split it 144 145 $attrarr = kses_hair($attr, $allowed_protocols); 146 147 # Go through $attrarr, and save the allowed attributes for this element 148 # in $attr2 149 150 $attr2 = ''; 151 152 foreach ($attrarr as $arreach) 153 { 154 $current = $allowed_html[strtolower($element)] 155 [strtolower($arreach['name'])]; 156 if (empty($current)) 157 continue; # the attribute is not allowed 158 159 if (!is_array($current)) 160 $attr2 .= ' '.$arreach['whole']; 161 # there are no checks 162 163 else 164 { 165 # there are some checks 166 $ok = true; 167 foreach ($current as $currkey => $currval) 168 if (!kses_check_attr_val($arreach['value'], $arreach['vless'], 169 $currkey, $currval)) 170 { $ok = false; break; } 171 172 if ($ok) 173 $attr2 .= ' '.$arreach['whole']; # it passed them 174 } # if !is_array($current) 175 } # foreach 176 177 # Remove any "<" or ">" characters 178 179 $attr2 = @preg_replace('/[<>]/', '', $attr2); 180 181 return "<$element$attr2$xhtml_slash>"; 182 } # function kses_attr 183 184 185 function kses_hair($attr, $allowed_protocols) 186 ############################################################################### 187 # This function does a lot of work. It parses an attribute list into an array 188 # with attribute data, and tries to do the right thing even if it gets weird 189 # input. It will add quotes around attribute values that don't have any quotes 190 # or apostrophes around them, to make it easier to produce HTML code that will 191 # conform to W3C's HTML specification. It will also remove bad URL protocols 192 # from attribute values. 193 ############################################################################### 194 { 195 $attrarr = array(); 196 $mode = 0; 197 $attrname = ''; 198 199 # Loop through the whole attribute list 200 201 while (strlen($attr) != 0) 202 { 203 $working = 0; # Was the last operation successful? 204 205 switch ($mode) 206 { 207 case 0: # attribute name, href for instance 208 209 if (preg_match('/^([-a-zA-Z]+)/', $attr, $match)) 210 { 211 $attrname = $match[1]; 212 $working = $mode = 1; 213 $attr = @preg_replace('/^[-a-zA-Z]+/', '', $attr); 214 } 215 216 break; 217 218 case 1: # equals sign or valueless ("selected") 219 220 if (preg_match('/^\s*=\s*/', $attr)) # equals sign 221 { 222 $working = 1; $mode = 2; 223 $attr = @preg_replace('/^\s*=\s*/', '', $attr); 224 break; 225 } 226 227 if (preg_match('/^\s+/', $attr)) # valueless 228 { 229 $working = 1; $mode = 0; 230 $attrarr[] = array 231 ('name' => $attrname, 232 'value' => '', 233 'whole' => $attrname, 234 'vless' => 'y'); 235 $attr = @preg_replace('/^\s+/', '', $attr); 236 } 237 238 break; 239 240 case 2: # attribute value, a URL after href= for instance 241 242 if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) 243 # "value" 244 { 245 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 246 247 $attrarr[] = array 248 ('name' => $attrname, 249 'value' => $thisval, 250 'whole' => "$attrname=\"$thisval\"", 251 'vless' => 'n'); 252 $working = 1; $mode = 0; 253 $attr = @preg_replace('/^"[^"]*"(\s+|$)/', '', $attr); 254 break; 255 } 256 257 if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) 258 # 'value' 259 { 260 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 261 262 $attrarr[] = array 263 ('name' => $attrname, 264 'value' => $thisval, 265 'whole' => "$attrname='$thisval'", 266 'vless' => 'n'); 267 $working = 1; $mode = 0; 268 $attr = @preg_replace("/^'[^']*'(\s+|$)/", '', $attr); 269 break; 270 } 271 272 if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) 273 # value 274 { 275 $thisval = kses_bad_protocol($match[1], $allowed_protocols); 276 277 $attrarr[] = array 278 ('name' => $attrname, 279 'value' => $thisval, 280 'whole' => "$attrname=\"$thisval\"", 281 'vless' => 'n'); 282 # We add quotes to conform to W3C's HTML spec. 283 $working = 1; $mode = 0; 284 $attr = @preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr); 285 } 286 287 break; 288 } # switch 289 290 if ($working == 0) # not well formed, remove and try again 291 { 292 $attr = kses_html_error($attr); 293 $mode = 0; 294 } 295 } # while 296 297 if ($mode == 1) 298 # special case, for when the attribute list ends with a valueless 299 # attribute like "selected" 300 $attrarr[] = array 301 ('name' => $attrname, 302 'value' => '', 303 'whole' => $attrname, 304 'vless' => 'y'); 305 306 return $attrarr; 307 } # function kses_hair 308 309 310 function kses_check_attr_val($value, $vless, $checkname, $checkvalue) 311 ############################################################################### 312 # This function performs different checks for attribute values. The currently 313 # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless" 314 # with even more checks to come soon. 315 ############################################################################### 316 { 317 $ok = true; 318 319 switch (strtolower($checkname)) 320 { 321 case 'maxlen': 322 # The maxlen check makes sure that the attribute value has a length not 323 # greater than the given value. This can be used to avoid Buffer Overflows 324 # in WWW clients and various Internet servers. 325 326 if (strlen($value) > $checkvalue) 327 $ok = false; 328 break; 329 330 case 'minlen': 331 # The minlen check makes sure that the attribute value has a length not 332 # smaller than the given value. 333 334 if (strlen($value) < $checkvalue) 335 $ok = false; 336 break; 337 338 case 'maxval': 339 # The maxval check does two things: it checks that the attribute value is 340 # an integer from 0 and up, without an excessive amount of zeroes or 341 # whitespace (to avoid Buffer Overflows). It also checks that the attribute 342 # value is not greater than the given value. 343 # This check can be used to avoid Denial of Service attacks. 344 345 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 346 $ok = false; 347 if ($value > $checkvalue) 348 $ok = false; 349 break; 350 351 case 'minval': 352 # The minval check checks that the attribute value is a positive integer, 353 # and that it is not smaller than the given value. 354 355 if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value)) 356 $ok = false; 357 if ($value < $checkvalue) 358 $ok = false; 359 break; 360 361 case 'valueless': 362 # The valueless check checks if the attribute has a value 363 # (like <a href="blah">) or not (<option selected>). If the given value 364 # is a "y" or a "Y", the attribute must not have a value. 365 # If the given value is an "n" or an "N", the attribute must have one. 366 367 if (strtolower($checkvalue) != $vless) 368 $ok = false; 369 break; 370 } # switch 371 372 return $ok; 373 } # function kses_check_attr_val 374 375 376 function kses_bad_protocol($string, $allowed_protocols) 377 ############################################################################### 378 # This function removes all non-allowed protocols from the beginning of 379 # $string. It ignores whitespace and the case of the letters, and it does 380 # understand HTML entities. It does its work in a while loop, so it won't be 381 # fooled by a string like "javascript:javascript:alert(57)". 382 ############################################################################### 383 { 384 $string = kses_no_null($string); 385 $string2 = $string.'a'; 386 387 while ($string != $string2) 388 { 389 $string2 = $string; 390 $string = kses_bad_protocol_once($string, $allowed_protocols); 391 } # while 392 393 return $string; 394 } # function kses_bad_protocol 395 396 397 function kses_no_null($string) 398 ############################################################################### 399 # This function removes any NULL or chr(173) characters in $string. 400 ############################################################################### 401 { 402 $string = @preg_replace('/\0+/', '', $string); 403 $string = @preg_replace('/(\\\\0)+/', '', $string); 404 405 $string = @preg_replace('/\xad+/', '', $string); # deals with Opera "feature" 406 407 return $string; 408 } # function kses_no_null 409 410 411 function kses_stripslashes($string) 412 ############################################################################### 413 # This function changes the character sequence \" to just " 414 # It leaves all other slashes alone. It's really weird, but the quoting from 415 # preg_replace(//e) seems to require this. 416 ############################################################################### 417 { 418 return @preg_replace('%\\\\"%', '"', $string); 419 } # function kses_stripslashes 420 421 422 function kses_array_lc($inarray) 423 ############################################################################### 424 # This function goes through an array, and changes the keys to all lower case. 425 ############################################################################### 426 { 427 $outarray = array(); 428 429 foreach ($inarray as $inkey => $inval) 430 { 431 $outkey = strtolower($inkey); 432 $outarray[$outkey] = array(); 433 434 foreach ($inval as $inkey2 => $inval2) 435 { 436 $outkey2 = strtolower($inkey2); 437 $outarray[$outkey][$outkey2] = $inval2; 438 } # foreach $inval 439 } # foreach $inarray 440 441 return $outarray; 442 } # function kses_array_lc 443 444 445 function kses_js_entities($string) 446 ############################################################################### 447 # This function removes the HTML JavaScript entities found in early versions of 448 # Netscape 4. 449 ############################################################################### 450 { 451 return @preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string); 452 } # function kses_js_entities 453 454 455 function kses_html_error($string) 456 ############################################################################### 457 # This function deals with parsing errors in kses_hair(). The general plan is 458 # to remove everything to and including some whitespace, but it deals with 459 # quotes and apostrophes as well. 460 ############################################################################### 461 { 462 return @preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string); 463 } # function kses_html_error 464 465 466 function kses_bad_protocol_once($string, $allowed_protocols) 467 ############################################################################### 468 # This function searches for URL protocols at the beginning of $string, while 469 # handling whitespace and HTML entities. 470 ############################################################################### 471 { 472 return @preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'. 473 '(:|:|&#[Xx]3[Aa];)\s*/e', 474 'kses_bad_protocol_once2("\\1", $allowed_protocols)', 475 $string); 476 } # function kses_bad_protocol_once 477 478 479 function kses_bad_protocol_once2($string, $allowed_protocols) 480 ############################################################################### 481 # This function processes URL protocols, checks to see if they're in the white- 482 # list or not, and returns different data depending on the answer. 483 ############################################################################### 484 { 485 $string2 = kses_decode_entities($string); 486 $string2 = @preg_replace('/\s/', '', $string2); 487 $string2 = kses_no_null($string2); 488 $string2 = strtolower($string2); 489 490 $allowed = false; 491 foreach ($allowed_protocols as $one_protocol) 492 if (strtolower($one_protocol) == $string2) 493 { 494 $allowed = true; 495 break; 496 } 497 498 if ($allowed) 499 return "$string2:"; 500 else 501 return ''; 502 } # function kses_bad_protocol_once2 503 504 505 function kses_normalize_entities($string) 506 ############################################################################### 507 # This function normalizes HTML entities. It will convert "AT&T" to the correct 508 # "AT&T", ":" to ":", "&#XYZZY;" to "&#XYZZY;" and so on. 509 ############################################################################### 510 { 511 # Disarm all entities by converting & to & 512 513 $string = str_replace('&', '&', $string); 514 515 # Change back the allowed entities in our entity whitelist 516 517 $string = @preg_replace('/&([A-Za-z][A-Za-z0-9]{0,19});/', 518 '&\\1;', $string); 519 $string = @preg_replace('/&#0*([0-9]{1,5});/e', 520 'kses_normalize_entities2("\\1")', $string); 521 $string = @preg_replace('/&#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', 522 '&#\\1\\2;', $string); 523 524 return $string; 525 } # function kses_normalize_entities 526 527 528 function kses_normalize_entities2($i) 529 ############################################################################### 530 # This function helps kses_normalize_entities() to only accept 16 bit values 531 # and nothing more for &#number; entities. 532 ############################################################################### 533 { 534 return (($i > 65535) ? "&#$i;" : "&#$i;"); 535 } # function kses_normalize_entities2 536 537 538 function kses_decode_entities($string) 539 ############################################################################### 540 # This function decodes numeric HTML entities (A and A). It doesn't 541 # do anything with other entities like ä, but we don't need them in the 542 # URL protocol whitelisting system anyway. 543 ############################################################################### 544 { 545 $string = @preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string); 546 $string = @preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', 547 $string); 548 549 return $string; 550 } # function kses_decode_entities 551 552 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
| Généré le : Sun Apr 1 11:11:59 2007 | par Balluche grâce à PHPXref 0.7 |