[ Index ]
 

Code source de eGroupWare 1.2.106-2

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/phpgwapi/inc/ -> class.kses.inc.php (source)

   1  <?php
   2      /*
   3       *    This is a fork of a slick piece of procedural code called 'kses' written by Ulf Harnhammar
   4       * The entire set of functions was wrapped in a PHP object with some internal modifications
   5       * by Richard Vasquez (http://www.chaos.org/) 7/25/2003
   6       *
   7       *    The original (procedural) version of the code can be found at:
   8       * http://sourceforge.net/projects/kses/
   9       *
  10       *    [kses strips evil scripts!]
  11       *
  12       * ==========================================================================================
  13       *
  14       * class.kses.php 0.0.2 - PHP class that filters HTML/XHTML only allowing some elements and
  15       *                           attributes to be passed through.
  16       *
  17       * Copyright (C) 2003 Richard R. Vasquez, Jr.
  18       *
  19       * Derived from kses 0.2.1 - HTML/XHTML filter that only allows some elements and attributes
  20       * Copyright (C) 2002, 2003  Ulf Harnhammar
  21       *
  22       * ==========================================================================================
  23       *
  24       * This program is free software and open source software; you can redistribute
  25       * it and/or modify it under the terms of the GNU General Public License as
  26       * published by the Free Software Foundation; either version 2 of the License,
  27       * or (at your option) any later version.
  28       *
  29       * This program is distributed in the hope that it will be useful, but WITHOUT
  30       * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  31       * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  32       * more details.
  33       *
  34       * You should have received a copy of the GNU General Public License along
  35       * with this program; if not, write to the Free Software Foundation, Inc.,
  36       * 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit
  37       * http://www.gnu.org/licenses/gpl.html
  38       *
  39       * ==========================================================================================
  40       * CONTACT INFORMATION:
  41       *
  42       * Email:    View current valid email address at http://www.chaos.org/contact/
  43       */
  44  
  45      class kses
  46      {
  47          var $allowed_protocols = array('http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto');
  48          var $allowed_html      = array();
  49  
  50  		function kses()
  51          {
  52          }
  53  
  54  		function Parse($string = "")
  55          {
  56              if (get_magic_quotes_gpc())
  57              {
  58                    $string = stripslashes($string);
  59              }
  60              $string = $this->_no_null($string);
  61              $string = $this->_js_entities($string);
  62              $string = $this->_normalize_entities($string);
  63              $string = $this->_hook($string);
  64              return    $this->_split($string);
  65          }
  66  
  67  		function Protocols()
  68          {
  69              $c_args = func_num_args();
  70              if($c_args != 1)
  71              {
  72                  return false;
  73              }
  74  
  75              $protocol_data = func_get_arg(0);
  76  
  77              if(is_array($protocol_data))
  78              {
  79                  foreach($protocol_data as $protocol)
  80                  {
  81                      $this->AddProtocol($protocol);
  82                  }
  83              }
  84              elseif(is_string($protocol_data))
  85              {
  86                  $this->AddProtocol($protocol_data);
  87                  return true;
  88              }
  89              else
  90              {
  91                  trigger_error("kses::Protocols() did not receive a string or an array.", E_USER_WARNING);
  92                  return false;
  93              }
  94          }
  95  
  96  		function AddProtocol($protocol = "")
  97          {
  98              if(!is_string($protocol))
  99              {
 100                  trigger_error("kses::AddProtocol() requires a string.", E_USER_WARNING);
 101                  return false;
 102              }
 103  
 104              $protocol = strtolower(trim($protocol));
 105              if($protocol == "")
 106              {
 107                  trigger_error("kses::AddProtocol() tried to add an empty/NULL protocol.", E_USER_WARNING);
 108                  return false;
 109              }
 110  
 111              // Remove any inadvertent ':' at the end of the protocol.
 112              if(substr($protocol, strlen($protocol) - 1, 1) == ":")
 113              {
 114                  $protocol = substr($protocol, 0, strlen($protocol) - 1);
 115              }
 116  
 117              if(!in_array($protocol, $this->allowed_protocols))
 118              {
 119                  array_push($this->allowed_protocols, $protocol);
 120                  sort($this->allowed_protocols);
 121              }
 122              return true;
 123          }
 124  
 125  		function AddHTML($tag = "", $attribs = array())
 126          {
 127              if(!is_string($tag))
 128              {
 129                  trigger_error("kses::AddHTML() requires the tag to be a string", E_USER_WARNING);
 130                  return false;
 131              }
 132  
 133              $tag = strtolower(trim($tag));
 134              if($tag == "")
 135              {
 136                  trigger_error("kses::AddHTML() tried to add an empty/NULL tag", E_USER_WARNING);
 137                  return false;
 138              }
 139  
 140              if(!is_array($attribs))
 141              {
 142                  trigger_error("kses::AddHTML() requires an array (even an empty one) of attributes for '$tag'", E_USER_WARNING);
 143                  return false;
 144              }
 145  
 146              $new_attribs = array();
 147              foreach($attribs as $idx1 => $val1)
 148              {
 149                  $new_idx1 = strtolower($idx1);
 150                  $new_val1 = $attribs[$idx1];
 151  
 152                  if(is_array($new_val1))
 153                  {
 154                      $tmp_val = array();
 155                      foreach($new_val1 as $idx2 => $val2)
 156                      {
 157                          $new_idx2 = strtolower($idx2);
 158                          $tmp_val[$new_idx2] = $val2;
 159                      }
 160                      $new_val1 = $tmp_val;
 161                  }
 162  
 163                  $new_attribs[$new_idx1] = $new_val1;
 164              }
 165  
 166              $this->allowed_html[$tag] = $new_attribs;
 167              return true;
 168          }
 169  
 170          ###############################################################################
 171          # This function removes any NULL or chr(173) characters in $string.
 172          ###############################################################################
 173  		function _no_null($string)
 174          {
 175              $string = preg_replace('/\0+/', '', $string);
 176              $string = preg_replace('/(\\\\0)+/', '', $string);
 177              # commented out, because it breaks chinese chars
 178              #$string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
 179              return $string;
 180          } # function _no_null
 181  
 182          ###############################################################################
 183          # This function removes the HTML JavaScript entities found in early versions of
 184          # Netscape 4.
 185          ###############################################################################
 186  		function _js_entities($string)
 187          {
 188            return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 189          } # function _js_entities
 190  
 191  
 192          ###############################################################################
 193          # This function normalizes HTML entities. It will convert "AT&T" to the correct
 194          # "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 195          ###############################################################################
 196  		function _normalize_entities($string)
 197          {
 198              # Disarm all entities by converting & to &amp;
 199            $string = str_replace('&', '&amp;', $string);
 200  
 201              # Change back the allowed entities in our entity white list
 202  
 203            $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
 204            $string = preg_replace('/&amp;#0*([0-9]{1,5});/e', '\$this->_normalize_entities2("\\1")', $string);
 205            $string = preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string);
 206  
 207            return $string;
 208          } # function _normalize_entities
 209  
 210  
 211          ###############################################################################
 212          # This function helps _normalize_entities() to only accept 16 bit values
 213          # and nothing more for &#number; entities.
 214          ###############################################################################
 215  		function _normalize_entities2($i)
 216          {
 217            return (($i > 65535) ? "&amp;#$i;" : "&#$i;");
 218          } # function _normalize_entities2
 219  
 220          ###############################################################################
 221          # You add any kses hooks here.
 222          ###############################################################################
 223  		function _hook($string)
 224          {
 225            return $string;
 226          } # function _hook
 227  
 228          ###############################################################################
 229          # This function goes through an array, and changes the keys to all lower case.
 230          ###############################################################################
 231  		function _array_lc($inarray)
 232          {
 233            $outarray = array();
 234  
 235            foreach ($inarray as $inkey => $inval)
 236            {
 237               $outkey = strtolower($inkey);
 238               $outarray[$outkey] = array();
 239  
 240               foreach ($inval as $inkey2 => $inval2)
 241               {
 242                  $outkey2 = strtolower($inkey2);
 243                  $outarray[$outkey][$outkey2] = $inval2;
 244               } # foreach $inval
 245            } # foreach $inarray
 246  
 247            return $outarray;
 248          } # function _array_lc
 249  
 250          ###############################################################################
 251          # This function searches for HTML tags, no matter how malformed. It also
 252          # matches stray ">" characters.
 253          ###############################################################################
 254  		function _split($string)
 255          {
 256              return preg_replace(
 257                  '%(<'.   # EITHER: <
 258                  '[^>]*'. # things that aren't >
 259                  '(>|$)'. # > or end of string
 260                  '|>)%e', # OR: just a >
 261                  "\$this->_split2('\\1')",
 262                  $string);
 263          } # function _split
 264  
 265  		function _split2($string)
 266          ###############################################################################
 267          # This function does a lot of work. It rejects some very malformed things
 268          # like <:::>. It returns an empty string, if the element isn't allowed (look
 269          # ma, no strip_tags()!). Otherwise it splits the tag into an element and an
 270          # attribute list.
 271          ###############################################################################
 272          {
 273              $string = $this->_stripslashes($string);
 274  
 275              if (substr($string, 0, 1) != '<')
 276              {
 277                  # It matched a ">" character
 278                  return '&gt;';
 279              }
 280  
 281              if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 282              {
 283                  # It's seriously malformed
 284                  return '';
 285              }
 286  
 287              $slash    = trim($matches[1]);
 288              $elem     = $matches[2];
 289              $attrlist = $matches[3];
 290  
 291              if (!is_array($this->allowed_html[strtolower($elem)]))
 292              {
 293                  # They are using a not allowed HTML element
 294                  return '';
 295              }
 296  
 297              return $this->_attr("$slash$elem", $attrlist);
 298          } # function _split2
 299  
 300          ###############################################################################
 301          # This function removes all attributes, if none are allowed for this element.
 302          # If some are allowed it calls s_hair() to split them further, and then it
 303          # builds up new HTML code from the data that _hair() returns. It also
 304          # removes "<" and ">" characters, if there are any left. One more thing it
 305          # does is to check if the tag has a closing XHTML slash, and if it does,
 306          # it puts one in the returned code as well.
 307          ###############################################################################
 308  		function _attr($element, $attr)
 309          {
 310              # Is there a closing XHTML slash at the end of the attributes?
 311              $xhtml_slash = '';
 312              if (preg_match('%\s/\s*$%', $attr))
 313              {
 314                  $xhtml_slash = ' /';
 315              }
 316  
 317              # Are any attributes allowed at all for this element?
 318              if (count($this->allowed_html[strtolower($element)]) == 0)
 319              {
 320                  return "<$element$xhtml_slash>";
 321              }
 322  
 323              # Split it
 324              $attrarr = $this->_hair($attr);
 325  
 326              # Go through $attrarr, and save the allowed attributes for this element
 327              # in $attr2
 328              $attr2 = '';
 329              foreach ($attrarr as $arreach)
 330              {
 331                  $current = $this->allowed_html[strtolower($element)][strtolower($arreach['name'])];
 332                  if ($current == '')
 333                  {
 334                      # the attribute is not allowed
 335                      continue;
 336                  }
 337  
 338                  if (!is_array($current))
 339                  {
 340                      # there are no checks
 341                      $attr2 .= ' '.$arreach['whole'];
 342                  }
 343                  else
 344                  {
 345                      # there are some checks
 346                      $ok = true;
 347                      foreach ($current as $currkey => $currval)
 348                      {
 349                          if (!$this->_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval))
 350                          {
 351                              $ok = false;
 352                              break;
 353                          }
 354                      }
 355  
 356                      if ($ok)
 357                      {
 358                          # it passed them
 359                          $attr2 .= ' '.$arreach['whole'];
 360                      }
 361                  } # if !is_array($current)
 362              } # foreach
 363  
 364              # Remove any "<" or ">" characters
 365              $attr2 = preg_replace('/[<>]/', '', $attr2);
 366              return "<$element$attr2$xhtml_slash>";
 367          } # function _attr
 368  
 369          ###############################################################################
 370          # This function does a lot of work. It parses an attribute list into an array
 371          # with attribute data, and tries to do the right thing even if it gets weird
 372          # input. It will add quotes around attribute values that don't have any quotes
 373          # or apostrophes around them, to make it easier to produce HTML code that will
 374          # conform to W3C's HTML specification. It will also remove bad URL protocols
 375          # from attribute values.
 376          ###############################################################################
 377  		function _hair($attr)
 378          {
 379              $attrarr  = array();
 380              $mode     = 0;
 381              $attrname = '';
 382  
 383              # Loop through the whole attribute list
 384  
 385              while (strlen($attr) != 0)
 386              {
 387                  # Was the last operation successful?
 388                  $working = 0;
 389  
 390                  switch ($mode)
 391                  {
 392                      case 0:    # attribute name, href for instance
 393                          if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
 394                          {
 395                              $attrname = $match[1];
 396                              $working = $mode = 1;
 397                              $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 398                          }
 399                          break;
 400                      case 1:    # equals sign or valueless ("selected")
 401                          if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 402                          {
 403                              $working = 1;
 404                              $mode    = 2;
 405                              $attr    = preg_replace('/^\s*=\s*/', '', $attr);
 406                              break;
 407                          }
 408                          if (preg_match('/^\s+/', $attr)) # valueless
 409                          {
 410                              $working   = 1;
 411                              $mode      = 0;
 412                              $attrarr[] = array(
 413                                  'name'  => $attrname,
 414                                  'value' => '',
 415                                  'whole' => $attrname,
 416                                  'vless' => 'y'
 417                              );
 418                              $attr      = preg_replace('/^\s+/', '', $attr);
 419                          }
 420                          break;
 421                      case 2: # attribute value, a URL after href= for instance
 422                          if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
 423                          {
 424                              $thisval   = $this->_bad_protocol($match[1]);
 425                              $attrarr[] = array(
 426                                  'name'  => $attrname,
 427                                  'value' => $thisval,
 428                                  'whole' => "$attrname=\"$thisval\"",
 429                                  'vless' => 'n'
 430                              );
 431                              $working   = 1;
 432                              $mode      = 0;
 433                              $attr      = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 434                              break;
 435                          }
 436                          if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
 437                          {
 438                              $thisval   = $this->_bad_protocol($match[1]);
 439                              $attrarr[] = array(
 440                                  'name'  => $attrname,
 441                                  'value' => $thisval,
 442                                  'whole' => "$attrname='$thisval'",
 443                                  'vless' => 'n'
 444                              );
 445                              $working   = 1;
 446                              $mode      = 0;
 447                              $attr      = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 448                              break;
 449                          }
 450                          if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
 451                          {
 452                              $thisval   = $this->_bad_protocol($match[1]);
 453                              $attrarr[] = array(
 454                                  'name'  => $attrname,
 455                                  'value' => $thisval,
 456                                  'whole' => "$attrname=\"$thisval\"",
 457                                  'vless' => 'n'
 458                              );
 459                              # We add quotes to conform to W3C's HTML spec.
 460                              $working   = 1;
 461                              $mode      = 0;
 462                              $attr      = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 463                          }
 464                          break;
 465                  } # switch
 466  
 467                  if ($working == 0) # not well formed, remove and try again
 468                  {
 469                      $attr = $this->_html_error($attr);
 470                      $mode = 0;
 471                  }
 472              } # while
 473  
 474              # special case, for when the attribute list ends with a valueless
 475              # attribute like "selected"
 476              if ($mode == 1)
 477              {
 478                  $attrarr[] = array(
 479                      'name'  => $attrname,
 480                      'value' => '',
 481                      'whole' => $attrname,
 482                      'vless' => 'y'
 483                  );
 484              }
 485  
 486              return $attrarr;
 487          } # function _hair
 488  
 489          ###############################################################################
 490          # This function removes all non-allowed protocols from the beginning of
 491          # $string. It ignores whitespace and the case of the letters, and it does
 492          # understand HTML entities. It does its work in a while loop, so it won't be
 493          # fooled by a string like "javascript:javascript:alert(57)".
 494          ###############################################################################
 495  		function _bad_protocol($string)
 496          {
 497              $string  = $this->_no_null($string);
 498              $string2 = $string.'a';
 499  
 500              while ($string != $string2)
 501              {
 502                  $string2 = $string;
 503                  $string  = $this->_bad_protocol_once($string);
 504              } # while
 505  
 506              return $string;
 507          } # function _bad_protocol
 508  
 509          ###############################################################################
 510          # This function searches for URL protocols at the beginning of $string, while
 511          # handling whitespace and HTML entities.
 512          ###############################################################################
 513  		function _bad_protocol_once($string)
 514          {
 515              return preg_replace(
 516                  '/^((&[^;]*;|[\sA-Za-z0-9])*)'.
 517                  '(:|&#58;|&#[Xx]3[Aa];)\s*/e',
 518                  '\$this->_bad_protocol_once2("\\1")',
 519                  $string
 520              );
 521              return $string;
 522          } # function _bad_protocol_once
 523  
 524  
 525          ###############################################################################
 526          # This function processes URL protocols, checks to see if they're in the white-
 527          # list or not, and returns different data depending on the answer.
 528          ###############################################################################
 529  		function _bad_protocol_once2($string)
 530          {
 531              $string2 = $this->_decode_entities($string2);
 532              $string2 = preg_replace('/\s/', '', $string);
 533              $string2 = $this->_no_null($string2);
 534              $string2 = strtolower($string2);
 535  
 536              $allowed = false;
 537              foreach ($this->allowed_protocols as $one_protocol)
 538              {
 539                  if (strtolower($one_protocol) == $string2)
 540                  {
 541                      $allowed = true;
 542                      break;
 543                  }
 544              }
 545  
 546              if ($allowed)
 547              {
 548                  return "$string2:";
 549              }
 550              else
 551              {
 552                  return '';
 553              }
 554          } # function _bad_protocol_once2
 555  
 556          ###############################################################################
 557          # This function performs different checks for attribute values. The currently
 558          # implemented checks are "maxlen", "minlen", "maxval", "minval" and "valueless"
 559          # with even more checks to come soon.
 560          ###############################################################################
 561  		function _check_attr_val($value, $vless, $checkname, $checkvalue)
 562          {
 563              $ok = true;
 564  
 565              switch (strtolower($checkname))
 566              {
 567                  # The maxlen check makes sure that the attribute value has a length not
 568                  # greater than the given value. This can be used to avoid Buffer Overflows
 569                  # in WWW clients and various Internet servers.
 570                  case 'maxlen':
 571                      if (strlen($value) > $checkvalue)
 572                      {
 573                          $ok = false;
 574                      }
 575                      break;
 576  
 577                  # The minlen check makes sure that the attribute value has a length not
 578                  # smaller than the given value.
 579                  case 'minlen':
 580                      if (strlen($value) < $checkvalue)
 581                      {
 582                          $ok = false;
 583                      }
 584                      break;
 585  
 586                  # The maxval check does two things: it checks that the attribute value is
 587                  # an integer from 0 and up, without an excessive amount of zeroes or
 588                  # whitespace (to avoid Buffer Overflows). It also checks that the attribute
 589                  # value is not greater than the given value.
 590                  # This check can be used to avoid Denial of Service attacks.
 591                  case 'maxval':
 592                      if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 593                      {
 594                          $ok = false;
 595                      }
 596                      if ($value > $checkvalue)
 597                      {
 598                          $ok = false;
 599                      }
 600                      break;
 601  
 602                  # The minval check checks that the attribute value is a positive integer,
 603                  # and that it is not smaller than the given value.
 604                  case 'minval':
 605                      if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
 606                      {
 607                          $ok = false;
 608                      }
 609                      if ($value < $checkvalue)
 610                      {
 611                          $ok = false;
 612                      }
 613                      break;
 614  
 615                  # The valueless check checks if the attribute has a value
 616                  # (like <a href="blah">) or not (<option selected>). If the given value
 617                  # is a "y" or a "Y", the attribute must not have a value.
 618                  # If the given value is an "n" or an "N", the attribute must have one.
 619                  case 'valueless':
 620                  if (strtolower($checkvalue) != $vless)
 621                  {
 622                      $ok = false;
 623                  }
 624                  break;
 625  
 626              } # switch
 627  
 628              return $ok;
 629          } # function _check_attr_val
 630  
 631          ###############################################################################
 632          # This function changes the character sequence  \"  to just  "
 633          # It leaves all other slashes alone. It's really weird, but the quoting from
 634          # preg_replace(//e) seems to require this.
 635          ###############################################################################
 636  		function _stripslashes($string)
 637          {
 638              return preg_replace('%\\\\"%', '"', $string);
 639          } # function _stripslashes
 640  
 641          ###############################################################################
 642          # This function deals with parsing errors in _hair(). The general plan is
 643          # to remove everything to and including some whitespace, but it deals with
 644          # quotes and apostrophes as well.
 645          ###############################################################################
 646  		function _html_error($string)
 647          {
 648              return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
 649          } # function _html_error
 650  
 651          ###############################################################################
 652          # This function decodes numeric HTML entities (&#65; and &#x41;). It doesn't
 653          # do anything with other entities like &auml;, but we don't need them in the
 654          # URL protocol white listing system anyway.
 655          ###############################################################################
 656  		function _decode_entities($string)
 657          {
 658              $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
 659              $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
 660              return $string;
 661          } # function _decode_entities
 662  
 663          ###############################################################################
 664          # This function returns kses' version number.
 665          ###############################################################################
 666  		function _version()
 667          {
 668              return '0.0.2 (OOP fork of kses 0.2.1)';
 669          } # function _version
 670      }
 671  ?>


Généré le : Sun Feb 25 17:20:01 2007 par Balluche grâce à PHPXref 0.7