[ Index ]
 

Code source de GeekLog 1.4.1

Accédez au Source d'autres logiciels libres

title

Body

[fermer]

/system/classes/ -> kses.class.php (source)

   1  <?php
   2      /*
   3       * ==========================================================================================
   4       *
   5       * This program is free software and open source software; you can redistribute
   6       * it and/or modify it under the terms of the GNU General Public License as
   7       * published by the Free Software Foundation; either version 2 of the License,
   8       * or (at your option) any later version.
   9       *
  10       * This program is distributed in the hope that it will be useful, but WITHOUT
  11       * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12       * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  13       * more details.
  14       *
  15       * You should have received a copy of the GNU General Public License along
  16       * with this program; if not, write to the Free Software Foundation, Inc.,
  17       * 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  or visit
  18       * http://www.gnu.org/licenses/gpl.html
  19       *
  20       * ==========================================================================================
  21       */
  22  
  23      /**
  24      *    Class file for PHP4 OOP version of kses
  25      *
  26      *    This is an updated version of kses to work with PHP4 that works under E_STRICT.
  27      *
  28      *    This upgrade provides the following:
  29      *    + Version number synced to procedural version number
  30      *    + PHPdoc style documentation has been added to the class.  See http://www.phpdoc.org/ for more info.
  31      *    + Some methods are now deprecated due to nomenclature style change.  See method documentation for specifics.
  32      *    + Kses4 now works in E_STRICT
  33      *    + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol() and RemoveProtocols()
  34      *    + Deprecated _hook(), Protocols()
  35      *    + Integrated code from kses 0.2.2 into class.
  36      *    + Added methods DumpProtocols(), DumpMethods()
  37      *
  38      *    @package    kses
  39      *    @subpackage kses4
  40      */
  41  
  42      if(substr(phpversion(), 0, 1) < 4)
  43      {
  44          die("Class kses requires PHP 4 or higher.");
  45      }
  46  
  47      /**
  48      *    Only install KSES4 once
  49      */
  50      if(!defined('KSES_CLASS_PHP4'))
  51      {
  52          define('KSES_CLASS_PHP4', true);
  53  
  54      /**
  55      *    Kses strips evil scripts!
  56      *
  57      *    This class provides the capability for removing unwanted HTML/XHTML, attributes from
  58      *    tags, and protocols contained in links.  The net result is a much more powerful tool
  59      *    than the PHP internal strip_tags()
  60      *
  61      *    This is a fork of a slick piece of procedural code called 'kses' written by Ulf Harnhammar
  62      *    The entire set of functions was wrapped in a PHP object with some internal modifications
  63      *    by Richard Vasquez (http://www.chaos.org/) 7/25/2003
  64      *
  65      *    This upgrade provides the following:
  66      *    + Version number synced to procedural version number
  67      *    + PHPdoc style documentation has been added to the class.  See http://www.phpdoc.org/ for more info.
  68      *    + Some methods are now deprecated due to nomenclature style change.  See method documentation for specifics.
  69      *    + Kses4 now works in E_STRICT
  70      *    + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol(), RemoveProtocols() and SetProtocols()
  71      *    + Deprecated _hook(), Protocols()
  72      *    + Integrated code from kses 0.2.2 into class.
  73      *
  74      *    @author     Richard R. Vásquez, Jr. (Original procedural code by Ulf Härnhammar)
  75      *    @link       http://sourceforge.net/projects/kses/ Home Page for Kses
  76      *    @link       http://chaos.org/contact/ Contact page with current email address for Richard Vasquez
  77      *    @copyright  Richard R. Vásquez, Jr. 2003-2005
  78      *    @version    PHP4 OOP 0.2.2
  79      *    @license    http://www.gnu.org/licenses/gpl.html GNU Public License
  80      *    @package    kses
  81      */
  82          class kses4
  83          {
  84              /**#@+
  85               *    @access private
  86               *    @var array
  87               */
  88              var $allowed_protocols = array();
  89              var $allowed_html      = array();
  90              /**#@-*/
  91  
  92              /**
  93               *    Constructor for kses.
  94               *
  95               *    This sets a default collection of protocols allowed in links, and creates an
  96               *    empty set of allowed HTML tags.
  97               *    @since PHP4 OOP 0.0.1
  98               */
  99  			function kses4()
 100              {
 101                  /**
 102                   *    You could add protocols such as ftp, new, gopher, mailto, irc, etc.
 103                   *
 104                   *    The base values the original kses provided were:
 105                   *        'http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto'
 106                   */
 107                  $this->allowed_protocols = array('http', 'ftp', 'mailto');
 108                  $this->allowed_html      = array();
 109              }
 110  
 111              /**
 112               *    Basic task of kses - parses $string and strips it as required.
 113               *
 114               *    This method strips all the disallowed (X)HTML tags, attributes
 115               *    and protocols from the input $string.
 116               *
 117               *    @access public
 118               *    @param string $string String to be stripped of 'evil scripts'
 119               *    @return string The stripped string
 120               *    @since PHP4 OOP 0.2.1
 121               */
 122  			function Parse($string = "")
 123              {
 124                  if (get_magic_quotes_gpc())
 125                  {
 126                      $string = stripslashes($string);
 127                  }
 128                  $string = $this->_no_null($string);
 129                  $string = $this->_js_entities($string);
 130                  $string = $this->_normalize_entities($string);
 131                  $string = $this->filterKsesTextHook($string);
 132                  return    $this->_split($string);
 133              }
 134  
 135              /**
 136               *    Allows for single/batch addition of protocols
 137               *
 138               *    This method accepts one argument that can be either a string
 139               *    or an array of strings.  Invalid data will be ignored.
 140               *
 141               *    The argument will be processed, and each string will be added
 142               *    via AddProtocol().
 143               *
 144               *    @access public
 145               *    @param mixed , A string or array of protocols that will be added to the internal list of allowed protocols.
 146               *    @return bool Status of adding valid protocols.
 147               *    @see AddProtocol()
 148               *    @since PHP4 OOP 0.2.1
 149               */
 150  			function AddProtocols()
 151              {
 152                  $c_args = func_num_args();
 153                  if($c_args != 1)
 154                  {
 155                      trigger_error("kses4::AddProtocols() did not receive an argument.", E_USER_WARNING);
 156                      return false;
 157                  }
 158  
 159                  $protocol_data = func_get_arg(0);
 160  
 161                  if(is_array($protocol_data) && count($protocol_data) > 0)
 162                  {
 163                      foreach($protocol_data as $protocol)
 164                      {
 165                          $this->AddProtocol($protocol);
 166                      }
 167                      return true;
 168                  }
 169                  elseif(is_string($protocol_data))
 170                  {
 171                      $this->AddProtocol($protocol_data);
 172                      return true;
 173                  }
 174                  else
 175                  {
 176                      trigger_error("kses4::AddProtocols() did not receive a string or an array.", E_USER_WARNING);
 177                      return false;
 178                  }
 179              }
 180  
 181              /**
 182               *    Allows for single/batch addition of protocols
 183               *
 184               *    @deprecated Use AddProtocols()
 185               *    @see AddProtocols()
 186               *    @return bool
 187               *    @since PHP4 OOP 0.0.1
 188               */
 189  			function Protocols()
 190              {
 191                  $c_args = func_num_args();
 192                  if($c_args != 1)
 193                  {
 194                      trigger_error("kses4::Protocols() did not receive an argument.", E_USER_WARNING);
 195                      return false;
 196                  }
 197  
 198                  return $this->AddProtocols(func_get_arg(0));
 199              }
 200  
 201              /**
 202               *    Adds a single protocol to $this->allowed_protocols.
 203               *
 204               *    This method accepts a string argument and adds it to
 205               *    the list of allowed protocols to keep when performing
 206               *    Parse().
 207               *
 208               *    @access public
 209               *    @param string $protocol The name of the protocol to be added.
 210               *    @return bool Status of adding valid protocol.
 211               *    @since PHP4 OOP 0.0.1
 212               */
 213  			function AddProtocol($protocol = "")
 214              {
 215                  if(!is_string($protocol))
 216                  {
 217                      trigger_error("kses4::AddProtocol() requires a string.", E_USER_WARNING);
 218                      return false;
 219                  }
 220  
 221                  $protocol = strtolower(trim($protocol));
 222                  if($protocol == "")
 223                  {
 224                      trigger_error("kses4::AddProtocol() tried to add an empty/NULL protocol.", E_USER_WARNING);
 225                      return false;
 226                  }
 227  
 228                  // Remove any inadvertent ':' at the end of the protocol.
 229                  if(substr($protocol, strlen($protocol) - 1, 1) == ":")
 230                  {
 231                      $protocol = substr($protocol, 0, strlen($protocol) - 1);
 232                  }
 233  
 234                  if(!in_array($protocol, $this->allowed_protocols))
 235                  {
 236                      array_push($this->allowed_protocols, $protocol);
 237                      sort($this->allowed_protocols);
 238                  }
 239                  return true;
 240              }
 241  
 242              /**
 243               *    Allows for single/batch replacement of protocols
 244               *
 245               *    This method accepts one argument that can be either a string
 246               *    or an array of strings.  Invalid data will be ignored.
 247               *
 248               *    Existing protocols will be removed, then the argument will be
 249               *    processed, and each string will be added via AddProtocol().
 250               *
 251               *    @access public
 252               *    @param mixed , A string or array of protocols that will be the new internal list of allowed protocols.
 253               *    @return bool Status of replacing valid protocols.
 254               *    @since PHP4 OOP 0.2.2
 255               *    @see AddProtocol()
 256               */
 257  			function SetProtocols()
 258              {
 259                  $c_args = func_num_args();
 260                  if($c_args != 1)
 261                  {
 262                      trigger_error("kses4::SetProtocols() did not receive an argument.", E_USER_WARNING);
 263                      return false;
 264                  }
 265  
 266                  $protocol_data = func_get_arg(0);
 267  
 268                  if(is_array($protocol_data) && count($protocol_data) > 0)
 269                  {
 270                      $this->allowed_protocols = array();
 271                      foreach($protocol_data as $protocol)
 272                      {
 273                          $this->AddProtocol($protocol);
 274                      }
 275                      return true;
 276                  }
 277                  elseif(is_string($protocol_data))
 278                  {
 279                      $this->allowed_protocols = array();
 280                      $this->AddProtocol($protocol_data);
 281                      return true;
 282                  }
 283                  else
 284                  {
 285                      trigger_error("kses4::SetProtocols() did not receive a string or an array.", E_USER_WARNING);
 286                      return false;
 287                  }
 288              }
 289  
 290              /**
 291               *    Raw dump of allowed protocols
 292               *
 293               *    This returns an indexed array of allowed protocols for a particular KSES
 294               *    instantiation.
 295               *
 296               *    @access public
 297               *    @return array The list of allowed protocols.
 298               *    @since PHP4 OOP 0.2.2
 299               */
 300  			function DumpProtocols()
 301              {
 302                  return $this->allowed_protocols;
 303              }
 304  
 305              /**
 306               *    Raw dump of allowed (X)HTML elements
 307               *
 308               *    This returns an indexed array of allowed (X)HTML elements and attributes
 309               *    for a particular KSES instantiation.
 310               *
 311               *    @access public
 312               *    @return array The list of allowed elements.
 313               *    @since PHP4 OOP 0.2.2
 314               */
 315  			function DumpElements()
 316              {
 317                  return $this->allowed_html;
 318              }
 319  
 320              /**
 321               *    Adds valid (X)HTML with corresponding attributes that will be kept when stripping 'evil scripts'.
 322               *
 323               *    This method accepts one argument that can be either a string
 324               *    or an array of strings.  Invalid data will be ignored.
 325               *
 326               *    @access public
 327               *    @param string $tag (X)HTML tag that will be allowed after stripping text.
 328               *    @param array $attribs Associative array of allowed attributes - key => attribute name - value => attribute parameter
 329               *    @return bool Status of Adding (X)HTML and attributes.
 330               *    @since PHP4 OOP 0.0.1
 331               */
 332  			function AddHTML($tag = "", $attribs = array())
 333              {
 334                  if(!is_string($tag))
 335                  {
 336                      trigger_error("kses4::AddHTML() requires the tag to be a string", E_USER_WARNING);
 337                      return false;
 338                  }
 339  
 340                  $tag = strtolower(trim($tag));
 341                  if($tag == "")
 342                  {
 343                      trigger_error("kses4::AddHTML() tried to add an empty/NULL tag", E_USER_WARNING);
 344                      return false;
 345                  }
 346  
 347                  if(!is_array($attribs))
 348                  {
 349                      trigger_error("kses4::AddHTML() requires an array (even an empty one) of attributes for '$tag'", E_USER_WARNING);
 350                      return false;
 351                  }
 352  
 353                  $new_attribs = array();
 354                  if(is_array($attribs) && count($attribs) > 0)
 355                  {
 356                      foreach($attribs as $idx1 => $val1)
 357                      {
 358                          $new_idx1 = strtolower($idx1);
 359                          $new_val1 = $attribs[$idx1];
 360  
 361                          if(is_array($new_val1) && count($new_val1) > 0)
 362                          {
 363                              $tmp_val = array();
 364                              foreach($new_val1 as $idx2 => $val2)
 365                              {
 366                                  $new_idx2 = strtolower($idx2);
 367                                  $tmp_val[$new_idx2] = $val2;
 368                              }
 369                              $new_val1 = $tmp_val;
 370                          }
 371  
 372                          $new_attribs[$new_idx1] = $new_val1;
 373                      }
 374                  }
 375  
 376                  $this->allowed_html[$tag] = $new_attribs;
 377                  return true;
 378              }
 379  
 380              /**
 381               *    Removes a single protocol from $this->allowed_protocols.
 382               *
 383               *    This method accepts a string argument and removes it from
 384               *    the list of allowed protocols to keep when performing
 385               *    Parse().
 386               *
 387               *    @access public
 388               *    @param string $protocol The name of the protocol to be removed.
 389               *    @return bool Status of removing valid protocol.
 390               *    @since PHP4 OOP 0.2.1
 391               */
 392  			function RemoveProtocol($protocol = "")
 393              {
 394                  if(!is_string($protocol))
 395                  {
 396                      trigger_error("kses4::RemoveProtocol() requires a string.", E_USER_WARNING);
 397                      return false;
 398                  }
 399  
 400                  // Remove any inadvertent ':' at the end of the protocol.
 401                  if(substr($protocol, strlen($protocol) - 1, 1) == ":")
 402                  {
 403                      $protocol = substr($protocol, 0, strlen($protocol) - 1);
 404                  }
 405  
 406                  $protocol = strtolower(trim($protocol));
 407                  if($protocol == "")
 408                  {
 409                      trigger_error("kses4::RemoveProtocol() tried to remove an empty/NULL protocol.", E_USER_WARNING);
 410                      return false;
 411                  }
 412  
 413                  //    Ensures that the protocol exists before removing it.
 414                  if(in_array($protocol, $this->allowed_protocols))
 415                  {
 416                      $this->allowed_protocols = array_diff($this->allowed_protocols, array($protocol));
 417                      sort($this->allowed_protocols);
 418                  }
 419  
 420                  return true;
 421              }
 422  
 423              /**
 424               *    Allows for single/batch removal of protocols
 425               *
 426               *    This method accepts one argument that can be either a string
 427               *    or an array of strings.  Invalid data will be ignored.
 428               *
 429               *    The argument will be processed, and each string will be removed
 430               *    via RemoveProtocol().
 431               *
 432               *    @access public
 433               *    @param mixed , A string or array of protocols that will be removed from the internal list of allowed protocols.
 434               *    @return bool Status of removing valid protocols.
 435               *    @see RemoveProtocol()
 436               *    @since PHP5 OOP 0.2.1
 437               */
 438  			function RemoveProtocols()
 439              {
 440                  $c_args = func_num_args();
 441                  if($c_args != 1)
 442                  {
 443                      return false;
 444                  }
 445  
 446                  $protocol_data = func_get_arg(0);
 447  
 448                  if(is_array($protocol_data) && count($protocol_data) > 0)
 449                  {
 450                      foreach($protocol_data as $protocol)
 451                      {
 452                          $this->RemoveProtocol($protocol);
 453                      }
 454                  }
 455                  elseif(is_string($protocol_data))
 456                  {
 457                      $this->RemoveProtocol($protocol_data);
 458                      return true;
 459                  }
 460                  else
 461                  {
 462                      trigger_error("kses4::RemoveProtocols() did not receive a string or an array.", E_USER_WARNING);
 463                      return false;
 464                  }
 465              }
 466  
 467              /**
 468               *    This method removes any NULL or characters in $string.
 469               *
 470               *    @access private
 471               *    @param string $string
 472               *    @return string String without any NULL/chr(173)
 473               *    @since PHP4 OOP 0.0.1
 474               */
 475  			function _no_null($string)
 476              {
 477                  $string = preg_replace('/\0+/', '', $string);
 478                  $string = preg_replace('/(\\\\0)+/', '', $string);
 479                  return $string;
 480              }
 481  
 482              /**
 483               *    This function removes the HTML JavaScript entities found in early versions of
 484               *    Netscape 4.
 485               *
 486               *    @access private
 487               *    @param string $string
 488               *    @return string String without any NULL/chr(173)
 489               *    @since PHP4 OOP 0.0.1
 490               */
 491  			function _js_entities($string)
 492              {
 493                return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
 494              }
 495  
 496              /**
 497               *    Normalizes HTML entities
 498               *
 499               *    This function normalizes HTML entities. It will convert "AT&T" to the correct
 500               *    "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
 501               *
 502               *    @access private
 503               *    @param string $string
 504               *    @return string String with normalized entities
 505               *    @since PHP4 OOP 0.0.1
 506               */
 507  			function _normalize_entities($string)
 508              {
 509                  # Disarm all entities by converting & to &amp;
 510                $string = str_replace('&', '&amp;', $string);
 511  
 512                  # Change back the allowed entities in our entity white list
 513  
 514                $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
 515                $string = preg_replace('/&amp;#0*([0-9]{1,5});/e', '\$this->_normalize_entities2("\\1")', $string);
 516                $string = preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string);
 517  
 518                return $string;
 519              }
 520  
 521              /**
 522               *    Helper method used by normalizeEntites()
 523               *
 524               *    This method helps normalizeEntities() to only accept 16 bit values
 525               *    and nothing more for &#number; entities.
 526               *
 527               *    This method helps normalize_entities() during a preg_replace()
 528               *    where a &#(0)*XXXXX; occurs.  The '(0)*XXXXXX' value is converted to
 529               *    a number and the result is returned as a numeric entity if the number
 530               *    is less than 65536.  Otherwise, the value is returned 'as is'.
 531               *
 532               *    @access private
 533               *    @param string $i
 534               *    @return string Normalized numeric entity
 535               *    @see _normalize_entities()
 536               *    @since PHP4 OOP 0.0.1
 537               */
 538  			function _normalize_entities2($i)
 539              {
 540                return (($i > 65535) ? "&amp;#$i;" : "&#$i;");
 541              }
 542  
 543              /**
 544               *    Allows for additional user defined modifications to text.
 545               *
 546               *    @deprecated use filterKsesTextHook()
 547               *    @param string $string
 548               *    @see filterKsesTextHook()
 549               *    @return string
 550               *    @since PHP4 OOP 0.0.1
 551               */
 552  			function _hook($string)
 553              {
 554                return $this->filterKsesTextHook($string);
 555              }
 556  
 557              /**
 558               *    Allows for additional user defined modifications to text.
 559               *
 560               *    This method allows for additional modifications to be performed on
 561               *    a string that's being run through Parse().  Currently, it returns the
 562               *    input string 'as is'.
 563               *
 564               *    This method is provided for users to extend the kses class for their own
 565               *    requirements.
 566               *
 567               *    @access public
 568               *    @param string $string String to perfrom additional modifications on.
 569               *    @return string User modified string.
 570               *    @see Parse()
 571               *    @since PHP5 OOP 1.0.0
 572               */
 573  			function filterKsesTextHook($string)
 574              {
 575                return $string;
 576              }
 577  
 578              /**
 579               *    This method goes through an array, and changes the keys to all lower case.
 580               *
 581               *    @access private
 582               *    @param array $in_array Associative array
 583               *    @return array Modified array
 584               *    @since PHP4 OOP 0.0.1
 585               */
 586  			function _array_lc($inarray)
 587              {
 588                $outarray = array();
 589  
 590                  if(is_array($inarray) && count($inarray) > 0)
 591                  {
 592                      foreach ($inarray as $inkey => $inval)
 593                      {
 594                          $outkey = strtolower($inkey);
 595                          $outarray[$outkey] = array();
 596  
 597                          if(is_array($inval) && count($inval) > 0)
 598                          {
 599                              foreach ($inval as $inkey2 => $inval2)
 600                              {
 601                                  $outkey2 = strtolower($inkey2);
 602                                  $outarray[$outkey][$outkey2] = $inval2;
 603                              }
 604                          }
 605                      }
 606                  }
 607  
 608                return $outarray;
 609              }
 610  
 611              /**
 612               *    This method searched for HTML tags, no matter how malformed.  It also
 613               *    matches stray ">" characters.
 614               *
 615               *    @access private
 616               *    @param string $string
 617               *    @return string HTML tags
 618               *    @since PHP4 OOP 0.0.1
 619               */
 620  			function _split($string)
 621              {
 622                  return preg_replace(
 623                      '%(<'.   # EITHER: <
 624                      '[^>]*'. # things that aren't >
 625                      '(>|$)'. # > or end of string
 626                      '|>)%e', # OR: just a >
 627                      "\$this->_split2('\\1')",
 628                      $string);
 629              }
 630  
 631              /**
 632               *    This method strips out disallowed and/or mangled (X)HTML tags along with assigned attributes.
 633               *
 634               *    This method does a lot of work. It rejects some very malformed things
 635               *    like <:::>. It returns an empty string if the element isn't allowed (look
 636               *    ma, no strip_tags()!). Otherwise it splits the tag into an element and an
 637               *    allowed attribute list.
 638               *
 639               *    @access private
 640               *    @param string $string
 641               *    @return string Modified string minus disallowed/mangled (X)HTML and attributes
 642               *    @since PHP4 OOP 0.0.1
 643               */
 644  			function _split2($string)
 645              {
 646                  $string = $this->_stripslashes($string);
 647  
 648                  if (substr($string, 0, 1) != '<')
 649                  {
 650                      # It matched a ">" character
 651                      return '&gt;';
 652                  }
 653  
 654                  if (!preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
 655                  {
 656                      # It's seriously malformed
 657                      return '';
 658                  }
 659  
 660                  $slash    = trim($matches[1]);
 661                  $elem     = $matches[2];
 662                  $attrlist = $matches[3];
 663  
 664                  if (
 665                      !isset($this->allowed_html[strtolower($elem)]) ||
 666                      !is_array($this->allowed_html[strtolower($elem)])
 667                  )
 668                  {
 669                      # They are using a not allowed HTML element
 670                      return '';
 671                  }
 672  
 673                  if ($slash != '')
 674                  {
 675                      return "<$slash$elem>";
 676                  }
 677                  # No attributes are allowed for closing elements
 678  
 679                  return $this->_attr("$slash$elem", $attrlist);
 680              }
 681  
 682              /**
 683               *    This method strips out disallowed attributes for (X)HTML tags.
 684               *
 685               *    This method removes all attributes if none are allowed for this element.
 686               *    If some are allowed it calls $this->_hair() to split them further, and then it
 687               *    builds up new HTML code from the data that $this->_hair() returns. It also
 688               *    removes "<" and ">" characters, if there are any left. One more thing it
 689               *    does is to check if the tag has a closing XHTML slash, and if it does,
 690               *    it puts one in the returned code as well.
 691               *
 692               *    @access private
 693               *    @param string $element (X)HTML tag to check
 694               *    @param string $attr Text containing attributes to check for validity.
 695               *    @return string Resulting valid (X)HTML or ''
 696               *    @see _hair()
 697               *    @since PHP4 OOP 0.0.1
 698               */
 699  			function _attr($element, $attr)
 700              {
 701                  # Is there a closing XHTML slash at the end of the attributes?
 702                  $xhtml_slash = '';
 703                  if (preg_match('%\s/\s*$%', $attr))
 704                  {
 705                      $xhtml_slash = ' /';
 706                  }
 707  
 708                  # Are any attributes allowed at all for this element?
 709                  if (
 710                      !isset($this->allowed_html[strtolower($element)]) ||
 711                      count($this->allowed_html[strtolower($element)]) == 0
 712                  )
 713                  {
 714                      return "<$element$xhtml_slash>";
 715                  }
 716  
 717                  # Split it
 718                  $attrarr = $this->_hair($attr);
 719  
 720                  # Go through $attrarr, and save the allowed attributes for this element
 721                  # in $attr2
 722                  $attr2 = '';
 723                  if(is_array($attrarr) && count($attrarr) > 0)
 724                  {
 725                      foreach ($attrarr as $arreach)
 726                      {
 727                          if(!isset($this->allowed_html[strtolower($element)][strtolower($arreach['name'])]))
 728                          {
 729                              continue;
 730                          }
 731  
 732                          $current = $this->allowed_html[strtolower($element)][strtolower($arreach['name'])];
 733                          if ($current == '')
 734                          {
 735                              # the attribute is not allowed
 736                              continue;
 737                          }
 738  
 739                          if (!is_array($current))
 740                          {
 741                              # there are no checks
 742                              $attr2 .= ' '.$arreach['whole'];
 743  
 744                      }
 745                          else
 746                          {
 747                              # there are some checks
 748                              $ok = true;
 749                              if(is_array($current) && count($current) > 0)
 750                              {
 751                                  foreach ($current as $currkey => $currval)
 752                                  {
 753                                      if (!$this->_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval))
 754                                      {
 755                                          $ok = false;
 756                                          break;
 757                                      }
 758                                  }
 759  
 760                                  if ($ok)
 761                                  {
 762                                      # it passed them
 763                                      $attr2 .= ' '.$arreach['whole'];
 764                                  }
 765                              }
 766                          }
 767                      }
 768                  }
 769  
 770                  # Remove any "<" or ">" characters
 771                  $attr2 = preg_replace('/[<>]/', '', $attr2);
 772                  return "<$element$attr2$xhtml_slash>";
 773              }
 774  
 775              /**
 776               *    This method combs through an attribute list string and returns an associative array of attributes and values.
 777               *
 778               *    This method does a lot of work. It parses an attribute list into an array
 779               *    with attribute data, and tries to do the right thing even if it gets weird
 780               *    input. It will add quotes around attribute values that don't have any quotes
 781               *    or apostrophes around them, to make it easier to produce HTML code that will
 782               *    conform to W3C's HTML specification. It will also remove bad URL protocols
 783               *    from attribute values.
 784               *
 785               *    @access private
 786               *    @param string $attr Text containing tag attributes for parsing
 787               *    @return array Associative array containing data on attribute and value
 788                 *    @since PHP4 OOP 0.0.1
 789               */
 790  			function _hair($attr)
 791              {
 792                  $attrarr  = array();
 793                  $mode     = 0;
 794                  $attrname = '';
 795  
 796                  # Loop through the whole attribute list
 797  
 798                  while (strlen($attr) != 0)
 799                  {
 800                      # Was the last operation successful?
 801                      $working = 0;
 802  
 803                      switch ($mode)
 804                      {
 805                          case 0:    # attribute name, href for instance
 806                              if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
 807                              {
 808                                  $attrname = $match[1];
 809                                  $working = $mode = 1;
 810                                  $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
 811                              }
 812                              break;
 813                          case 1:    # equals sign or valueless ("selected")
 814                              if (preg_match('/^\s*=\s*/', $attr)) # equals sign
 815                              {
 816                                  $working = 1;
 817                                  $mode    = 2;
 818                                  $attr    = preg_replace('/^\s*=\s*/', '', $attr);
 819                                  break;
 820                              }
 821                              if (preg_match('/^\s+/', $attr)) # valueless
 822                              {
 823                                  $working   = 1;
 824                                  $mode      = 0;
 825                                  $attrarr[] = array(
 826                                      'name'  => $attrname,
 827                                      'value' => '',
 828                                      'whole' => $attrname,
 829                                      'vless' => 'y'
 830                                  );
 831                                  $attr      = preg_replace('/^\s+/', '', $attr);
 832                              }
 833                              break;
 834                          case 2: # attribute value, a URL after href= for instance
 835                              if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
 836                              {
 837                                  $thisval   = $this->_bad_protocol($match[1]);
 838                                  $attrarr[] = array(
 839                                      'name'  => $attrname,
 840                                      'value' => $thisval,
 841                                      'whole' => "$attrname=\"$thisval\"",
 842                                      'vless' => 'n'
 843                                  );
 844                                  $working   = 1;
 845                                  $mode      = 0;
 846                                  $attr      = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
 847                                  break;
 848                              }
 849                              if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
 850                              {
 851                                  $thisval   = $this->_bad_protocol($match[1]);
 852                                  $attrarr[] = array(
 853                                      'name'  => $attrname,
 854                                      'value' => $thisval,
 855                                      'whole' => "$attrname='$thisval'",
 856                                      'vless' => 'n'
 857                                  );
 858                                  $working   = 1;
 859                                  $mode      = 0;
 860                                  $attr      = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
 861                                  break;
 862                              }
 863                              if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
 864                              {
 865                                  $thisval   = $this->_bad_protocol($match[1]);
 866                                  $attrarr[] = array(
 867                                      'name'  => $attrname,
 868                                      'value' => $thisval,
 869                                      'whole' => "$attrname=\"$thisval\"",
 870                                      'vless' => 'n'
 871                                  );
 872                                  # We add quotes to conform to W3C's HTML spec.
 873                                  $working   = 1;
 874                                  $mode      = 0;
 875                                  $attr      = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
 876                              }
 877                              break;
 878                      }
 879  
 880                      if ($working == 0) # not well formed, remove and try again
 881                      {
 882                          $attr = $this->_html_error($attr);
 883                          $mode = 0;
 884                      }
 885                  }
 886  
 887                  # special case, for when the attribute list ends with a valueless
 888                  # attribute like "selected"
 889                  if ($mode == 1)
 890                  {
 891                      $attrarr[] = array(
 892                          'name'  => $attrname,
 893                          'value' => '',
 894                          'whole' => $attrname,
 895                          'vless' => 'y'
 896                      );
 897                  }
 898  
 899                  return $attrarr;
 900              }
 901  
 902              /**
 903               *    This method removes disallowed protocols.
 904               *
 905               *    This method removes all non-allowed protocols from the beginning of
 906               *    $string. It ignores whitespace and the case of the letters, and it does
 907               *    understand HTML entities. It does its work in a while loop, so it won't be
 908               *    fooled by a string like "javascript:javascript:alert(57)".
 909               *
 910               *    @access private
 911               *    @param string $string String to check for protocols
 912               *    @return string String with removed protocols
 913               *    @since PHP4 OOP 0.0.1
 914               */
 915  			function _bad_protocol($string)
 916              {
 917                  $string  = $this->_no_null($string);
 918                  $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
 919                  $string2 = $string.'a';
 920  
 921                  while ($string != $string2)
 922                  {
 923                      $string2 = $string;
 924                      $string  = $this->_bad_protocol_once($string);
 925                  } # while
 926  
 927                  return $string;
 928              }
 929  
 930              /**
 931               *    Helper method used by _bad_protocol()
 932               *
 933               *    This function searches for URL protocols at the beginning of $string, while
 934               *    handling whitespace and HTML entities.
 935               *
 936               *    @access private
 937               *    @param string $string String to check for protocols
 938               *    @return string String with removed protocols
 939               *    @see _bad_protocol()
 940               *    @since PHP4 OOP 0.0.1
 941               */
 942  			function _bad_protocol_once($string)
 943              {
 944                  return preg_replace(
 945                      '/^((&[^;]*;|[\sA-Za-z0-9])*)'.
 946                      '(:|&#58;|&#[Xx]3[Aa];)\s*/e',
 947                      '\$this->_bad_protocol_once2("\\1")',
 948                      $string
 949                  );
 950              }
 951  
 952              /**
 953               *    Helper method used by _bad_protocol_once() regex
 954               *
 955               *    This function processes URL protocols, checks to see if they're in the white-
 956               *    list or not, and returns different data depending on the answer.
 957               *
 958               *    @access private
 959               *    @param string $string String to check for protocols
 960               *    @return string String with removed protocols
 961               *    @see _bad_protocol()
 962               *    @see _bad_protocol_once()
 963               *    @since PHP4 OOP 0.0.1
 964               */
 965  			function _bad_protocol_once2($string)
 966              {
 967                  $string = $this->_decode_entities($string);
 968                  $string = preg_replace('/\s/', '', $string);
 969                  $string = $this->_no_null($string);
 970                  $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
 971                  $string = strtolower($string);
 972  
 973                  $allowed = false;
 974                  if(is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
 975                  {
 976                      foreach ($this->allowed_protocols as $one_protocol)
 977                      {
 978                          if (strtolower($one_protocol) == $string)
 979                          {
 980                              $allowed = true;
 981                              break;
 982                          }
 983                      }
 984                  }
 985  
 986                  if ($allowed)
 987                  {
 988                      return "$string:";
 989                  }
 990                  else
 991                  {
 992                      return '';
 993                  }
 994              }
 995  
 996              /**
 997               *    This function performs different checks for attribute values.
 998               *
 999               *    The currently implemented checks are "maxlen", "minlen", "maxval",
1000               *    "minval" and "valueless" with even more checks to come soon.
1001               *
1002               *    @access private
1003               *    @param string $value The value of the attribute to be checked.
1004               *    @param string $vless Indicates whether the the value is supposed to be valueless
1005               *    @param string $checkname The check to be performed
1006               *    @param string $checkvalue The value that is to be checked against
1007               *    @return bool Indicates whether the check passed or not
1008               *    @since PHP4 OOP 0.0.1
1009               */
1010  			function _check_attr_val($value, $vless, $checkname, $checkvalue)
1011              {
1012                  $ok = true;
1013  
1014                  switch (strtolower($checkname))
1015                  {
1016                      /**
1017                      *    The maxlen check makes sure that the attribute value has a length not
1018                      *    greater than the given value. This can be used to avoid Buffer Overflows
1019                      *    in WWW clients and various Internet servers.
1020                      */
1021                      case 'maxlen':
1022                          if (strlen($value) > $checkvalue)
1023                          {
1024                              $ok = false;
1025                          }
1026                          break;
1027  
1028                      /**
1029                      *    The minlen check makes sure that the attribute value has a length not
1030                      *    smaller than the given value.
1031                      */
1032                      case 'minlen':
1033                          if (strlen($value) < $checkvalue)
1034                          {
1035                              $ok = false;
1036                          }
1037                          break;
1038  
1039                      /**
1040                      *    The maxval check does two things: it checks that the attribute value is
1041                      *    an integer from 0 and up, without an excessive amount of zeroes or
1042                      *    whitespace (to avoid Buffer Overflows). It also checks that the attribute
1043                      *    value is not greater than the given value.
1044                      *    This check can be used to avoid Denial of Service attacks.
1045                      */
1046                      case 'maxval':
1047                          if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
1048                          {
1049                              $ok = false;
1050                          }
1051                          if ($value > $checkvalue)
1052                          {
1053                              $ok = false;
1054                          }
1055                          break;
1056  
1057                      /**
1058                      *    The minval check checks that the attribute value is a positive integer,
1059                      *    and that it is not smaller than the given value.
1060                      */
1061                      case 'minval':
1062                          if (!preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
1063                          {
1064                              $ok = false;
1065                          }
1066                          if ($value < $checkvalue)
1067                          {
1068                              $ok = false;
1069                          }
1070                          break;
1071  
1072                      /**
1073                      *    The valueless check checks if the attribute has a value
1074                      *    (like <a href="blah">) or not (<option selected>). If the given value
1075                      *    is a "y" or a "Y", the attribute must not have a value.
1076                      *    If the given value is an "n" or an "N", the attribute must have one.
1077                      */
1078                      case 'valueless':
1079                      if (strtolower($checkvalue) != $vless)
1080                      {
1081                          $ok = false;
1082                      }
1083                      break;
1084  
1085                  }
1086  
1087                  return $ok;
1088              }
1089  
1090              /**
1091               *    Changes \" to "
1092               *
1093               *    This function changes the character sequence  \"  to just  "
1094               *    It leaves all other slashes alone. It's really weird, but the quoting from
1095               *    preg_replace(//e) seems to require this.
1096               *
1097               *    @access private
1098               *    @param string $string The string to be stripped.
1099               *    @return string string stripped of \"
1100               *    @since PHP4 OOP 0.0.1
1101               */
1102  			function _stripslashes($string)
1103              {
1104                  return preg_replace('%\\\\"%', '"', $string);
1105              }
1106  
1107              /**
1108               *    helper method for _hair()
1109               *
1110               *    This function deals with parsing errors in _hair(). The general plan is
1111               *    to remove everything to and including some whitespace, but it deals with
1112               *    quotes and apostrophes as well.
1113               *
1114               *    @access private
1115               *    @param string $string The string to be stripped.
1116               *    @return string string stripped of whitespace
1117               *    @see _hair()
1118               *    @since PHP4 OOP 0.0.1
1119               */
1120  			function _html_error($string)
1121              {
1122                  return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
1123              }
1124  
1125              /**
1126               *    Decodes numeric HTML entities
1127               *
1128               *    This method decodes numeric HTML entities (&#65; and &#x41;). It doesn't
1129               *    do anything with other entities like &auml;, but we don't need them in the
1130               *    URL protocol white listing system anyway.
1131               *
1132               *    @access private
1133               *    @param string $value The entitiy to be decoded.
1134               *    @return string Decoded entity
1135               *    @since PHP4 OOP 0.0.1
1136               */
1137  			function _decode_entities($string)
1138              {
1139                  $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
1140                  $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
1141                  return $string;
1142              }
1143  
1144              /**
1145               *    Returns PHP4 OOP version # of kses.
1146               *
1147               *    Since this class has been refactored and documented and proven to work,
1148               *    I'm syncing the version number to procedural kses.
1149               *
1150               *    @access public
1151               *    @return string Version number
1152               *    @since PHP4 OOP 0.0.1
1153               */
1154  			function _version()
1155              {
1156                  return 'PHP4 0.2.2 (OOP fork of procedural kses 0.2.2)';
1157              }
1158          }
1159      }
1160  ?>


Généré le : Wed Nov 21 12:27:40 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics