[ Index ]
 

Code source de PHP NUKE 7.9

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/modules/Journal/ -> kses.php (source)

   1  <?php
   2  
   3  # kses 0.2.1 - HTML/XHTML filter that only allows some elements and attributes

   4  # Copyright (C) 2002, 2003  Ulf Harnhammar

   5  #

   6  # This program is free software and open source software; you can redistribute

   7  # it and/or modify it under the terms of the GNU General Public License as

   8  # published by the Free Software Foundation; either version 2 of the License,

   9  # or (at your option) any later version.

  10  #

  11  # This program is distributed in the hope that it will be useful, but WITHOUT

  12  # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or

  13  # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for

  14  # more details.

  15  #

  16  # You should have received a copy of the GNU General Public License along

  17  # with this program; if not, write to the Free Software Foundation, Inc.,

  18  # 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit

  19  # http://www.gnu.org/licenses/gpl.html

  20  #

  21  # *** CONTACT INFORMATION ***

  22  #

  23  # E-mail:      metaur at users dot sourceforge dot net

  24  # Web page:    http://sourceforge.net/projects/kses

  25  # Paper mail:  (not at the moment)

  26  #

  27  # [kses strips evil scripts!]

  28  /* Journal 2.0 Enhanced and Debugged 2004                               */

  29  /* by sixonetonoffun -- http://www.netflake.com --                      */

  30  /* Images Created by GanjaUK -- http://www.GanjaUK.com                  */

  31  /************************************************************************/

  32  
  33  if ( !defined('MODULE_FILE') )
  34  {
  35      die("You can't access this file directly...");
  36  }
  37  
  38  function kses($string, $allowed_html, $allowed_protocols =
  39                 array('http', 'https', 'ftp', 'news', 'nntp', 'telnet',
  40                       'gopher', 'mailto'))
  41  ###############################################################################

  42  # This function makes sure that only the allowed HTML element names, attribute

  43  # names and attribute values plus only sane HTML entities will occur in

  44  # $string. You have to remove any slashes from PHP's magic quotes before you

  45  # call this function.

  46  ###############################################################################

  47  {
  48    $string = kses_no_null($string);
  49    $string = kses_js_entities($string);
  50    $string = kses_normalize_entities($string);
  51    $string = kses_hook($string);
  52    $allowed_html_fixed = kses_array_lc($allowed_html);
  53    return kses_split($string, $allowed_html_fixed, $allowed_protocols);
  54  } # function kses

  55  
  56  
  57  function kses_hook($string)
  58  ###############################################################################

  59  # You add any kses hooks here.

  60  ###############################################################################

  61  {
  62    return $string;
  63  } # function kses_hook

  64  
  65  
  66  function kses_version()
  67  ###############################################################################

  68  # This function returns kses' version number.

  69  ###############################################################################

  70  {
  71    return '0.2.1';
  72  } # function kses_version

  73  
  74  
  75  function kses_split($string, $allowed_html, $allowed_protocols)
  76  ###############################################################################

  77  # This function searches for HTML tags, no matter how malformed. It also

  78  # matches stray ">" characters.

  79  ###############################################################################

  80  {
  81    return @preg_replace('%(<'.   # EITHER: <
  82                        '[^>]*'. # things that aren't >
  83                        '(>|$)'. # > or end of string
  84                        '|>)%e', # OR: just a >
  85                        "kses_split2('\\1', \$allowed_html, ".
  86                        '$allowed_protocols)',
  87                        $string);
  88  } # function kses_split

  89  
  90  
  91  function kses_split2($string, $allowed_html, $allowed_protocols)
  92  ###############################################################################

  93  # This function does a lot of work. It rejects some very malformed things

  94  # like <:::>. It returns an empty string, if the element isn't allowed (look

  95  # ma, no strip_tags()!). Otherwise it splits the tag into an element and an

  96  # attribute list.

  97  ###############################################################################

  98  {
  99    $string = kses_stripslashes($string);
 100  
 101    if (substr($string, 0, 1) != '<')
 102      return '&gt;';
 103      # It matched a ">" character

 104  
 105    if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 106      return '';
 107      # It's seriously malformed

 108  
 109    $slash = trim($matches[1]);
 110    $elem = $matches[2];
 111    $attrlist = $matches[3];
 112  
 113    if (!is_array($allowed_html[strtolower($elem)]))
 114      return '';
 115      # They are using a not allowed HTML element

 116  
 117    return kses_attr("$slash$elem", $attrlist, $allowed_html,
 118                     $allowed_protocols);
 119  } # function kses_split2

 120  
 121  
 122  function kses_attr($element, $attr, $allowed_html, $allowed_protocols)
 123  ###############################################################################

 124  # This function removes all attributes, if none are allowed for this element.

 125  # If some are allowed it calls kses_hair() to split them further, and then it

 126  # builds up new HTML code from the data that kses_hair() returns. It also

 127  # removes "<" and ">" characters, if there are any left. One more thing it

 128  # does is to check if the tag has a closing XHTML slash, and if it does,

 129  # it puts one in the returned code as well.

 130  ###############################################################################

 131  {
 132  # Is there a closing XHTML slash at the end of the attributes?

 133  
 134    $xhtml_slash = '';
 135    if (preg_match('%\s/\s*$%', $attr))
 136      $xhtml_slash = ' /';
 137  
 138  # Are any attributes allowed at all for this element?

 139  
 140    if (count($allowed_html[strtolower($element)]) == 0)
 141      return "<$element$xhtml_slash>";
 142  
 143  # Split it

 144  
 145    $attrarr = kses_hair($attr, $allowed_protocols);
 146  
 147  # Go through $attrarr, and save the allowed attributes for this element

 148  # in $attr2

 149  
 150    $attr2 = '';
 151  
 152    foreach ($attrarr as $arreach)
 153    {
 154      $current = $allowed_html[strtolower($element)]
 155                              [strtolower($arreach['name'])];
 156      if (empty($current))
 157        continue; # the attribute is not allowed

 158  
 159      if (!is_array($current))
 160        $attr2 .= ' '.$arreach['whole'];
 161      # there are no checks

 162  
 163      else
 164      {
 165      # there are some checks

 166        $ok = true;
 167        foreach ($current as $currkey => $currval)
 168          if (!kses_check_attr_val($arreach['value'], $arreach['vless'],
 169                                   $currkey, $currval))
 170          { $ok = false; break; }
 171  
 172        if ($ok)
 173          $attr2 .= ' '.$arreach['whole']; # it passed them

 174      } # if !is_array($current)

 175    } # foreach

 176  
 177  # Remove any "<" or ">" characters

 178  
 179    $attr2 = @preg_replace('/[<>]/', '', $attr2);
 180  
 181    return "<$element$attr2$xhtml_slash>";
 182  } # function kses_attr

 183  
 184  
 185  function kses_hair($attr, $allowed_protocols)
 186  ###############################################################################

 187  # This function does a lot of work. It parses an attribute list into an array

 188  # with attribute data, and tries to do the right thing even if it gets weird

 189  # input. It will add quotes around attribute values that don't have any quotes

 190  # or apostrophes around them, to make it easier to produce HTML code that will

 191  # conform to W3C's HTML specification. It will also remove bad URL protocols

 192  # from attribute values.

 193  ###############################################################################

 194  {
 195    $attrarr = array();
 196    $mode = 0;
 197    $attrname = '';
 198  
 199  # Loop through the whole attribute list

 200  
 201    while (strlen($attr) != 0)
 202    {
 203      $working = 0; # Was the last operation successful?

 204  
 205      switch ($mode)
 206      {
 207        case 0: # attribute name, href for instance
 208  
 209          if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
 210          {
 211            $attrname = $match[1];
 212            $working = $mode = 1;
 213            $attr = @preg_replace('/^[-a-zA-Z]+/', '', $attr);
 214          }
 215  
 216          break;
 217  
 218        case 1: # equals sign or valueless ("selected")
 219  
 220          if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 221          {
 222            $working = 1; $mode = 2;
 223            $attr = @preg_replace('/^\s*=\s*/', '', $attr);
 224            break;
 225          }
 226  
 227          if (preg_match('/^\s+/', $attr)) # valueless
 228          {
 229            $working = 1; $mode = 0;
 230            $attrarr[] = array
 231                          ('name'  => $attrname,
 232                           'value' => '',
 233                           'whole' => $attrname,
 234                           'vless' => 'y');
 235            $attr = @preg_replace('/^\s+/', '', $attr);
 236          }
 237  
 238          break;
 239  
 240        case 2: # attribute value, a URL after href= for instance
 241  
 242          if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match))
 243           # "value"

 244          {
 245            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 246  
 247            $attrarr[] = array
 248                          ('name'  => $attrname,
 249                           'value' => $thisval,
 250                           'whole' => "$attrname=\"$thisval\"",
 251                           'vless' => 'n');
 252            $working = 1; $mode = 0;
 253            $attr = @preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 254            break;
 255          }
 256  
 257          if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match))
 258           # 'value'

 259          {
 260            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 261  
 262            $attrarr[] = array
 263                          ('name'  => $attrname,
 264                           'value' => $thisval,
 265                           'whole' => "$attrname='$thisval'",
 266                           'vless' => 'n');
 267            $working = 1; $mode = 0;
 268            $attr = @preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 269            break;
 270          }
 271  
 272          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match))
 273           # value

 274          {
 275            $thisval = kses_bad_protocol($match[1], $allowed_protocols);
 276  
 277            $attrarr[] = array
 278                          ('name'  => $attrname,
 279                           'value' => $thisval,
 280                           'whole' => "$attrname=\"$thisval\"",
 281                           'vless' => 'n');
 282                           # We add quotes to conform to W3C's HTML spec.

 283            $working = 1; $mode = 0;
 284            $attr = @preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 285          }
 286  
 287          break;
 288      } # switch

 289  
 290      if ($working == 0) # not well formed, remove and try again
 291      {
 292        $attr = kses_html_error($attr);
 293        $mode = 0;
 294      }
 295    } # while

 296  
 297    if ($mode == 1)
 298    # special case, for when the attribute list ends with a valueless

 299    # attribute like "selected"

 300      $attrarr[] = array
 301                    ('name'  => $attrname,
 302                     'value' => '',
 303                     'whole' => $attrname,
 304                     'vless' => 'y');
 305  
 306    return $attrarr;
 307  } # function kses_hair

 308  
 309  
 310  function kses_check_attr_val($value, $vless, $checkname, $checkvalue)
 311  ###############################################################################

 312  # This function performs different checks for attribute values. The currently

 313  # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless"

 314  # with even more checks to come soon.

 315  ###############################################################################

 316  {
 317    $ok = true;
 318  
 319    switch (strtolower($checkname))
 320    {
 321      case 'maxlen':
 322      # The maxlen check makes sure that the attribute value has a length not

 323      # greater than the given value. This can be used to avoid Buffer Overflows

 324      # in WWW clients and various Internet servers.

 325  
 326        if (strlen($value) > $checkvalue)
 327          $ok = false;
 328        break;
 329  
 330      case 'minlen':
 331      # The minlen check makes sure that the attribute value has a length not

 332      # smaller than the given value.

 333  
 334        if (strlen($value) < $checkvalue)
 335          $ok = false;
 336        break;
 337  
 338      case 'maxval':
 339      # The maxval check does two things: it checks that the attribute value is

 340      # an integer from 0 and up, without an excessive amount of zeroes or

 341      # whitespace (to avoid Buffer Overflows). It also checks that the attribute

 342      # value is not greater than the given value.

 343      # This check can be used to avoid Denial of Service attacks.

 344  
 345        if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 346          $ok = false;
 347        if ($value > $checkvalue)
 348          $ok = false;
 349        break;
 350  
 351      case 'minval':
 352      # The minval check checks that the attribute value is a positive integer,

 353      # and that it is not smaller than the given value.

 354  
 355        if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 356          $ok = false;
 357        if ($value < $checkvalue)
 358          $ok = false;
 359        break;
 360  
 361      case 'valueless':
 362      # The valueless check checks if the attribute has a value

 363      # (like <a href="blah">) or not (<option selected>). If the given value

 364      # is a "y" or a "Y", the attribute must not have a value.

 365      # If the given value is an "n" or an "N", the attribute must have one.

 366  
 367        if (strtolower($checkvalue) != $vless)
 368          $ok = false;
 369        break;
 370    } # switch

 371  
 372    return $ok;
 373  } # function kses_check_attr_val

 374  
 375  
 376  function kses_bad_protocol($string, $allowed_protocols)
 377  ###############################################################################

 378  # This function removes all non-allowed protocols from the beginning of

 379  # $string. It ignores whitespace and the case of the letters, and it does

 380  # understand HTML entities. It does its work in a while loop, so it won't be

 381  # fooled by a string like "javascript:javascript:alert(57)".

 382  ###############################################################################

 383  {
 384    $string = kses_no_null($string);
 385    $string2 = $string.'a';
 386  
 387    while ($string != $string2)
 388    {
 389      $string2 = $string;
 390      $string = kses_bad_protocol_once($string, $allowed_protocols);
 391    } # while

 392  
 393    return $string;
 394  } # function kses_bad_protocol

 395  
 396  
 397  function kses_no_null($string)
 398  ###############################################################################

 399  # This function removes any NULL or chr(173) characters in $string.

 400  ###############################################################################

 401  {
 402    $string = @preg_replace('/\0+/', '', $string);
 403    $string = @preg_replace('/(\\\\0)+/', '', $string);
 404  
 405    $string = @preg_replace('/\xad+/', '', $string); # deals with Opera "feature"

 406  
 407    return $string;
 408  } # function kses_no_null

 409  
 410  
 411  function kses_stripslashes($string)
 412  ###############################################################################

 413  # This function changes the character sequence  \"  to just  "

 414  # It leaves all other slashes alone. It's really weird, but the quoting from

 415  # preg_replace(//e) seems to require this.

 416  ###############################################################################

 417  {
 418    return @preg_replace('%\\\\"%', '"', $string);
 419  } # function kses_stripslashes

 420  
 421  
 422  function kses_array_lc($inarray)
 423  ###############################################################################

 424  # This function goes through an array, and changes the keys to all lower case.

 425  ###############################################################################

 426  {
 427    $outarray = array();
 428  
 429    foreach ($inarray as $inkey => $inval)
 430    {
 431      $outkey = strtolower($inkey);
 432      $outarray[$outkey] = array();
 433  
 434      foreach ($inval as $inkey2 => $inval2)
 435      {
 436        $outkey2 = strtolower($inkey2);
 437        $outarray[$outkey][$outkey2] = $inval2;
 438      } # foreach $inval

 439    } # foreach $inarray

 440  
 441    return $outarray;
 442  } # function kses_array_lc

 443  
 444  
 445  function kses_js_entities($string)
 446  ###############################################################################

 447  # This function removes the HTML JavaScript entities found in early versions of

 448  # Netscape 4.

 449  ###############################################################################

 450  {
 451    return @preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 452  } # function kses_js_entities

 453  
 454  
 455  function kses_html_error($string)
 456  ###############################################################################

 457  # This function deals with parsing errors in kses_hair(). The general plan is

 458  # to remove everything to and including some whitespace, but it deals with

 459  # quotes and apostrophes as well.

 460  ###############################################################################

 461  {
 462    return @preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 463  } # function kses_html_error

 464  
 465  
 466  function kses_bad_protocol_once($string, $allowed_protocols)
 467  ###############################################################################

 468  # This function searches for URL protocols at the beginning of $string, while

 469  # handling whitespace and HTML entities.

 470  ###############################################################################

 471  {
 472    return @preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)'.
 473                        '(:|&#58;|&#[Xx]3[Aa];)\s*/e',
 474                        'kses_bad_protocol_once2("\\1", $allowed_protocols)',
 475                        $string);
 476  } # function kses_bad_protocol_once

 477  
 478  
 479  function kses_bad_protocol_once2($string, $allowed_protocols)
 480  ###############################################################################

 481  # This function processes URL protocols, checks to see if they're in the white-

 482  # list or not, and returns different data depending on the answer.

 483  ###############################################################################

 484  {
 485    $string2 = kses_decode_entities($string);
 486    $string2 = @preg_replace('/\s/', '', $string2);
 487    $string2 = kses_no_null($string2);
 488    $string2 = strtolower($string2);
 489  
 490    $allowed = false;
 491    foreach ($allowed_protocols as $one_protocol)
 492      if (strtolower($one_protocol) == $string2)
 493      {
 494        $allowed = true;
 495        break;
 496      }
 497  
 498    if ($allowed)
 499      return "$string2:";
 500    else
 501      return '';
 502  } # function kses_bad_protocol_once2

 503  
 504  
 505  function kses_normalize_entities($string)
 506  ###############################################################################

 507  # This function normalizes HTML entities. It will convert "AT&T" to the correct

 508  # "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.

 509  ###############################################################################

 510  {
 511  # Disarm all entities by converting & to &amp;

 512  
 513    $string = str_replace('&', '&amp;', $string);
 514  
 515  # Change back the allowed entities in our entity whitelist

 516  
 517    $string = @preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/',
 518                           '&\\1;', $string);
 519    $string = @preg_replace('/&amp;#0*([0-9]{1,5});/e',

 520                           'kses_normalize_entities2("\\1")', $string);
 521    $string = @preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/',

 522                           '&#\\1\\2;', $string);
 523  
 524    return $string;
 525  } # function kses_normalize_entities

 526  
 527  
 528  function kses_normalize_entities2($i)
 529  ###############################################################################

 530  # This function helps kses_normalize_entities() to only accept 16 bit values

 531  # and nothing more for &#number; entities.

 532  ###############################################################################

 533  {
 534    return (($i > 65535) ? "&amp;#$i;" : "&#$i;");

 535  } # function kses_normalize_entities2

 536  
 537  
 538  function kses_decode_entities($string)
 539  ###############################################################################

 540  # This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't

 541  # do anything with other entities like &auml;, but we don't need them in the

 542  # URL protocol whitelisting system anyway.

 543  ###############################################################################

 544  {
 545    $string = @preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
 546    $string = @preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))',
 547                           $string);
 548  
 549    return $string;
 550  } # function kses_decode_entities

 551  
 552  ?>


Généré le : Sun Apr 1 11:11:59 2007 par Balluche grâce à PHPXref 0.7