[ Index ]
 

Code source de PRADO 3.0.6

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/framework/3rdParty/SafeHtml/ -> TSafeHtmlParser.php (source)

   1  <?php
   2  /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4: */
   3  
   4  /**
   5   * SafeHTML Parser
   6   *
   7   * PHP versions 4 and 5
   8   *
   9   * @category   HTML
  10   * @package    System.Security
  11   * @author     Roman Ivanov <thingol@mail.ru>
  12   * @copyright  2004-2005 Roman Ivanov
  13   * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
  14   * @version    1.3.7
  15   * @link       http://pixel-apes.com/safehtml/
  16   */
  17  
  18  
  19  /**
  20   * This package requires HTMLSax3 package
  21   */
  22  Prado::using('System.3rdParty.SafeHtml.HTMLSax3');
  23  
  24  
  25  /**
  26   *
  27   * SafeHTML Parser
  28   *
  29   * This parser strips down all potentially dangerous content within HTML:
  30   * <ul>
  31   * <li>opening tag without its closing tag</li>
  32   * <li>closing tag without its opening tag</li>
  33   * <li>any of these tags: "base", "basefont", "head", "html", "body", "applet",
  34   * "object", "iframe", "frame", "frameset", "script", "layer", "ilayer", "embed",
  35   * "bgsound", "link", "meta", "style", "title", "blink", "xml" etc.</li>
  36   * <li>any of these attributes: on*, data*, dynsrc</li>
  37   * <li>javascript:/vbscript:/about: etc. protocols</li>
  38   * <li>expression/behavior etc. in styles</li>
  39   * <li>any other active content</li>
  40   * </ul>
  41   * It also tries to convert code to XHTML valid, but htmltidy is far better
  42   * solution for this task.
  43   *
  44   * <b>Example:</b>
  45   * <pre>
  46   * $parser =& new SafeHTML();
  47   * $result = $parser->parse($doc);
  48   * </pre>
  49   *
  50   * @category   HTML
  51   * @package    System.Security
  52   * @author     Roman Ivanov <thingol@mail.ru>
  53   * @copyright  1997-2005 Roman Ivanov
  54   * @license    http://www.debian.org/misc/bsd.license  BSD License (3 Clause)
  55   * @version    Release: @package_version@
  56   * @link       http://pear.php.net/package/SafeHTML
  57   */
  58  class TSafeHtmlParser
  59  {
  60      /**
  61       * Storage for resulting HTML output
  62       *
  63       * @var string
  64       * @access private
  65       */
  66      private $_xhtml = '';
  67  
  68      /**
  69       * Array of counters for each tag
  70       *
  71       * @var array
  72       * @access private
  73       */
  74      private $_counter = array();
  75  
  76      /**
  77       * Stack of unclosed tags
  78       *
  79       * @var array
  80       * @access private
  81       */
  82      private $_stack = array();
  83  
  84      /**
  85       * Array of counters for tags that must be deleted with all content
  86       *
  87       * @var array
  88       * @access private
  89       */
  90      private $_dcCounter = array();
  91  
  92      /**
  93       * Stack of unclosed tags that must be deleted with all content
  94       *
  95       * @var array
  96       * @access private
  97       */
  98      private $_dcStack = array();
  99  
 100      /**
 101       * Stores level of list (ol/ul) nesting
 102       *
 103       * @var int
 104       * @access private
 105       */
 106      private $_listScope = 0;
 107  
 108      /**
 109       * Stack of unclosed list tags
 110       *
 111       * @var array
 112       * @access private
 113       */
 114      private $_liStack = array();
 115  
 116      /**
 117       * Array of prepared regular expressions for protocols (schemas) matching
 118       *
 119       * @var array
 120       * @access private
 121       */
 122      private $_protoRegexps = array();
 123  
 124      /**
 125       * Array of prepared regular expressions for CSS matching
 126       *
 127       * @var array
 128       * @access private
 129       */
 130      private $_cssRegexps = array();
 131  
 132      /**
 133       * List of single tags ("<tag />")
 134       *
 135       * @var array
 136       * @access public
 137       */
 138      public $singleTags = array('area', 'br', 'img', 'input', 'hr', 'wbr', );
 139  
 140      /**
 141       * List of dangerous tags (such tags will be deleted)
 142       *
 143       * @var array
 144       * @access public
 145       */
 146      public $deleteTags = array(
 147          'applet', 'base',   'basefont', 'bgsound', 'blink',  'body',
 148          'embed',  'frame',  'frameset', 'head',    'html',   'ilayer',
 149          'iframe', 'layer',  'link',     'meta',    'object', 'style',
 150          'title',  'script',
 151          );
 152  
 153      /**
 154       * List of dangerous tags (such tags will be deleted, and all content
 155       * inside this tags will be also removed)
 156       *
 157       * @var array
 158       * @access public
 159       */
 160      public $deleteTagsContent = array('script', 'style', 'title', 'xml', );
 161  
 162      /**
 163       * Type of protocols filtering ('white' or 'black')
 164       *
 165       * @var string
 166       * @access public
 167       */
 168      public $protocolFiltering = 'white';
 169  
 170      /**
 171       * List of "dangerous" protocols (used for blacklist-filtering)
 172       *
 173       * @var array
 174       * @access public
 175       */
 176      public $blackProtocols = array(
 177          'about',   'chrome',     'data',       'disk',     'hcp',
 178          'help',    'javascript', 'livescript', 'lynxcgi',  'lynxexec',
 179          'ms-help', 'ms-its',     'mhtml',      'mocha',    'opera',
 180          'res',     'resource',   'shell',      'vbscript', 'view-source',
 181          'vnd.ms.radio',          'wysiwyg',
 182          );
 183  
 184      /**
 185       * List of "safe" protocols (used for whitelist-filtering)
 186       *
 187       * @var array
 188       * @access public
 189       */
 190      public $whiteProtocols = array(
 191          'ed2k',   'file', 'ftp',  'gopher', 'http',  'https',
 192          'irc',    'mailto', 'news', 'nntp', 'telnet', 'webcal',
 193          'xmpp',   'callto',
 194          );
 195  
 196      /**
 197       * List of attributes that can contain protocols
 198       *
 199       * @var array
 200       * @access public
 201       */
 202      public $protocolAttributes = array(
 203          'action', 'background', 'codebase', 'dynsrc', 'href', 'lowsrc', 'src',
 204          );
 205  
 206      /**
 207       * List of dangerous CSS keywords
 208       *
 209       * Whole style="" attribute will be removed, if parser will find one of
 210       * these keywords
 211       *
 212       * @var array
 213       * @access public
 214       */
 215      public $cssKeywords = array(
 216          'absolute', 'behavior',       'behaviour',   'content', 'expression',
 217          'fixed',    'include-source', 'moz-binding',
 218          );
 219  
 220      /**
 221       * List of tags that can have no "closing tag"
 222       *
 223       * @var array
 224       * @access public
 225       * @deprecated XHTML does not allow such tags
 226       */
 227      public $noClose = array();
 228  
 229      /**
 230       * List of block-level tags that terminates paragraph
 231       *
 232       * Paragraph will be closed when this tags opened
 233       *
 234       * @var array
 235       * @access public
 236       */
 237      public $closeParagraph = array(
 238          'address', 'blockquote', 'center', 'dd',      'dir',       'div',
 239          'dl',      'dt',         'h1',     'h2',      'h3',        'h4',
 240          'h5',      'h6',         'hr',     'isindex', 'listing',   'marquee',
 241          'menu',    'multicol',   'ol',     'p',       'plaintext', 'pre',
 242          'table',   'ul',         'xmp',
 243          );
 244  
 245      /**
 246       * List of table tags, all table tags outside a table will be removed
 247       *
 248       * @var array
 249       * @access public
 250       */
 251      public $tableTags = array(
 252          'caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',
 253          'thead',   'tr',
 254          );
 255  
 256      /**
 257       * List of list tags
 258       *
 259       * @var array
 260       * @access public
 261       */
 262      public $listTags = array('dir', 'menu', 'ol', 'ul', 'dl', );
 263  
 264      /**
 265       * List of dangerous attributes
 266       *
 267       * @var array
 268       * @access public
 269       */
 270      public $attributes = array('dynsrc', 'id', 'name', );
 271  
 272      /**
 273       * List of allowed "namespaced" attributes
 274       *
 275       * @var array
 276       * @access public
 277       */
 278      public $attributesNS = array('xml:lang', );
 279  
 280      /**
 281       * Constructs class
 282       *
 283       * @access public
 284       */
 285      public function __construct()
 286      {
 287          //making regular expressions based on Proto & CSS arrays
 288          foreach ($this->blackProtocols as $proto) {
 289              $preg = "/[\s\x01-\x1F]*";
 290              for ($i=0; $i<strlen($proto); $i++) {
 291                  $preg .= $proto{$i} . "[\s\x01-\x1F]*";
 292              }
 293              $preg .= ":/i";
 294              $this->_protoRegexps[] = $preg;
 295          }
 296  
 297          foreach ($this->cssKeywords as $css) {
 298              $this->_cssRegexps[] = '/' . $css . '/i';
 299          }
 300          return true;
 301      }
 302  
 303      /**
 304       * Handles the writing of attributes - called from $this->_openHandler()
 305       *
 306       * @param array $attrs array of attributes $name => $value
 307       * @return boolean
 308       * @access private
 309       */
 310      private function _writeAttrs ($attrs)
 311      {
 312          if (is_array($attrs)) {
 313              foreach ($attrs as $name => $value) {
 314  
 315                  $name = strtolower($name);
 316  
 317                  if (strpos($name, 'on') === 0) {
 318                      continue;
 319                  }
 320                  if (strpos($name, 'data') === 0) {
 321                      continue;
 322                  }
 323                  if (in_array($name, $this->attributes)) {
 324                      continue;
 325                  }
 326                  if (!preg_match("/^[a-z0-9]+$/i", $name)) {
 327                      if (!in_array($name, $this->attributesNS))
 328                      {
 329                          continue;
 330                      }
 331                  }
 332  
 333                  if (($value === TRUE) || (is_null($value))) {
 334                      $value = $name;
 335                  }
 336  
 337                  if ($name == 'style') {
 338  
 339                     // removes insignificant backslahes
 340                     $value = str_replace("\\", '', $value);
 341  
 342                     // removes CSS comments
 343                     while (1)
 344                     {
 345                       $_value = preg_replace("!/\*.*?\*/!s", '', $value);
 346                       if ($_value == $value) break;
 347                       $value = $_value;
 348                     }
 349  
 350                     // replace all & to &amp;
 351                     $value = str_replace('&amp;', '&', $value);
 352                     $value = str_replace('&', '&amp;', $value);
 353  
 354                     foreach ($this->_cssRegexps as $css) {
 355                         if (preg_match($css, $value)) {
 356                             continue 2;
 357                         }
 358                     }
 359                     foreach ($this->_protoRegexps as $proto) {
 360                         if (preg_match($proto, $value)) {
 361                             continue 2;
 362                         }
 363                     }
 364                  }
 365  
 366                  $tempval = preg_replace('/&#(\d+);?/me', "chr('\\1')", $value); //"'
 367                  $tempval = preg_replace('/&#x([0-9a-f]+);?/mei', "chr(hexdec('\\1'))", $tempval);
 368  
 369                  if ((in_array($name, $this->protocolAttributes)) &&
 370                      (strpos($tempval, ':') !== false))
 371                  {
 372                      if ($this->protocolFiltering == 'black') {
 373                          foreach ($this->_protoRegexps as $proto) {
 374                              if (preg_match($proto, $tempval)) continue 2;
 375                          }
 376                      } else {
 377                          $_tempval = explode(':', $tempval);
 378                          $proto = $_tempval[0];
 379                          if (!in_array($proto, $this->whiteProtocols)) {
 380                              continue;
 381                          }
 382                      }
 383                  }
 384  
 385                  $value = str_replace("\"", "&quot;", $value);
 386                  $this->_xhtml .= ' ' . $name . '="' . $value . '"';
 387              }
 388          }
 389          return true;
 390      }
 391  
 392      /**
 393       * Opening tag handler - called from HTMLSax
 394       *
 395       * @param object $parser HTML Parser
 396       * @param string $name   tag name
 397       * @param array  $attrs  tag attributes
 398       * @return boolean
 399       * @access private
 400       */
 401      public function _openHandler(&$parser, $name, $attrs)
 402      {
 403          $name = strtolower($name);
 404  
 405          if (in_array($name, $this->deleteTagsContent)) {
 406              array_push($this->_dcStack, $name);
 407              $this->_dcCounter[$name] = isset($this->_dcCounter[$name]) ? $this->_dcCounter[$name]+1 : 1;
 408          }
 409          if (count($this->_dcStack) != 0) {
 410              return true;
 411          }
 412  
 413          if (in_array($name, $this->deleteTags)) {
 414              return true;
 415          }
 416  
 417          if (!preg_match("/^[a-z0-9]+$/i", $name)) {
 418              if (preg_match("!(?:\@|://)!i", $name)) {
 419                  $this->_xhtml .= '&lt;' . $name . '&gt;';
 420              }
 421              return true;
 422          }
 423  
 424          if (in_array($name, $this->singleTags)) {
 425              $this->_xhtml .= '<' . $name;
 426              $this->_writeAttrs($attrs);
 427              $this->_xhtml .= ' />';
 428              return true;
 429          }
 430  
 431          // TABLES: cannot open table elements when we are not inside table
 432          if ((isset($this->_counter['table'])) && ($this->_counter['table'] <= 0)
 433              && (in_array($name, $this->tableTags)))
 434          {
 435              return true;
 436          }
 437  
 438          // PARAGRAPHS: close paragraph when closeParagraph tags opening
 439          if ((in_array($name, $this->closeParagraph)) && (in_array('p', $this->_stack))) {
 440              $this->_closeHandler($parser, 'p');
 441          }
 442  
 443          // LISTS: we should close <li> if <li> of the same level opening
 444          if ($name == 'li' && count($this->_liStack) &&
 445              $this->_listScope == $this->_liStack[count($this->_liStack)-1])
 446          {
 447              $this->_closeHandler($parser, 'li');
 448          }
 449  
 450          // LISTS: we want to know on what nesting level of lists we are
 451          if (in_array($name, $this->listTags)) {
 452              $this->_listScope++;
 453          }
 454          if ($name == 'li') {
 455              array_push($this->_liStack, $this->_listScope);
 456          }
 457  
 458          $this->_xhtml .= '<' . $name;
 459          $this->_writeAttrs($attrs);
 460          $this->_xhtml .= '>';
 461          array_push($this->_stack,$name);
 462          $this->_counter[$name] = isset($this->_counter[$name]) ? $this->_counter[$name]+1 : 1;
 463          return true;
 464      }
 465  
 466      /**
 467       * Closing tag handler - called from HTMLSax
 468       *
 469       * @param object $parsers HTML parser
 470       * @param string $name    tag name
 471       * @return boolean
 472       * @access private
 473       */
 474      public function _closeHandler(&$parser, $name)
 475      {
 476  
 477          $name = strtolower($name);
 478  
 479          if (isset($this->_dcCounter[$name]) && ($this->_dcCounter[$name] > 0) &&
 480              (in_array($name, $this->deleteTagsContent)))
 481          {
 482             while ($name != ($tag = array_pop($this->_dcStack))) {
 483              $this->_dcCounter[$tag]--;
 484             }
 485  
 486             $this->_dcCounter[$name]--;
 487          }
 488  
 489          if (count($this->_dcStack) != 0) {
 490              return true;
 491          }
 492  
 493          if ((isset($this->_counter[$name])) && ($this->_counter[$name] > 0)) {
 494             while ($name != ($tag = array_pop($this->_stack))) {
 495                 $this->_closeTag($tag);
 496             }
 497  
 498             $this->_closeTag($name);
 499          }
 500          return true;
 501      }
 502  
 503      /**
 504       * Closes tag
 505       *
 506       * @param string $tag tag name
 507       * @return boolean
 508       * @access private
 509       */
 510      public function _closeTag($tag)
 511      {
 512          if (!in_array($tag, $this->noClose)) {
 513              $this->_xhtml .= '</' . $tag . '>';
 514          }
 515  
 516          $this->_counter[$tag]--;
 517  
 518          if (in_array($tag, $this->listTags)) {
 519              $this->_listScope--;
 520          }
 521  
 522          if ($tag == 'li') {
 523              array_pop($this->_liStack);
 524          }
 525          return true;
 526      }
 527  
 528      /**
 529       * Character data handler - called from HTMLSax
 530       *
 531       * @param object $parser HTML parser
 532       * @param string $data   textual data
 533       * @return boolean
 534       * @access private
 535       */
 536      public function _dataHandler(&$parser, $data)
 537      {
 538          if (count($this->_dcStack) == 0) {
 539              $this->_xhtml .= $data;
 540          }
 541          return true;
 542      }
 543  
 544      /**
 545       * Escape handler - called from HTMLSax
 546       *
 547       * @param object $parser HTML parser
 548       * @param string $data   comments or other type of data
 549       * @return boolean
 550       * @access private
 551       */
 552      public function _escapeHandler(&$parser, $data)
 553      {
 554          return true;
 555      }
 556  
 557      /**
 558       * Returns the XHTML document
 559       *
 560       * @return string Processed (X)HTML document
 561       * @access public
 562       */
 563      public function getXHTML ()
 564      {
 565          while ($tag = array_pop($this->_stack)) {
 566              $this->_closeTag($tag);
 567          }
 568  
 569          return $this->_xhtml;
 570      }
 571  
 572      /**
 573       * Clears current document data
 574       *
 575       * @return boolean
 576       * @access public
 577       */
 578      public function clear()
 579      {
 580          $this->_xhtml = '';
 581          return true;
 582      }
 583  
 584      /**
 585       * Main parsing fuction
 586       *
 587       * @param string $doc HTML document for processing
 588       * @return string Processed (X)HTML document
 589       * @access public
 590       */
 591      public function parse($doc)
 592      {
 593         $this->clear();
 594  
 595         // Save all '<' symbols
 596         $doc = preg_replace("/<(?=[^a-zA-Z\/\!\?\%])/", '&lt;', (string)$doc);
 597  
 598         // Web documents shouldn't contains \x00 symbol
 599         $doc = str_replace("\x00", '', $doc);
 600  
 601         // Opera6 bug workaround
 602         $doc = str_replace("\xC0\xBC", '&lt;', $doc);
 603  
 604         // UTF-7 encoding ASCII decode
 605         $doc = $this->repackUTF7($doc);
 606  
 607         // Instantiate the parser
 608         $parser= new TSax3();
 609  
 610         // Set up the parser
 611         $parser->set_object($this);
 612  
 613         $parser->set_element_handler('_openHandler','_closeHandler');
 614         $parser->set_data_handler('_dataHandler');
 615         $parser->set_escape_handler('_escapeHandler');
 616  
 617         $parser->parse($doc);
 618  
 619         return $this->getXHTML();
 620  
 621      }
 622  
 623  
 624      /**
 625       * UTF-7 decoding fuction
 626       *
 627       * @param string $str HTML document for recode ASCII part of UTF-7 back to ASCII
 628       * @return string Decoded document
 629       * @access private
 630       */
 631      private function repackUTF7($str)
 632      {
 633         return preg_replace_callback('!\+([0-9a-zA-Z/]+)\-!', array($this, 'repackUTF7Callback'), $str);
 634      }
 635  
 636      /**
 637       * Additional UTF-7 decoding fuction
 638       *
 639       * @param string $str String for recode ASCII part of UTF-7 back to ASCII
 640       * @return string Recoded string
 641       * @access private
 642       */
 643      private function repackUTF7Callback($str)
 644      {
 645         $str = base64_decode($str[1]);
 646         $str = preg_replace_callback('/^((?:\x00.)*)((?:[^\x00].)+)/', array($this, 'repackUTF7Back'), $str);
 647         return preg_replace('/\x00(.)/', '$1', $str);
 648      }
 649  
 650      /**
 651       * Additional UTF-7 encoding fuction
 652       *
 653       * @param string $str String for recode ASCII part of UTF-7 back to ASCII
 654       * @return string Recoded string
 655       * @access private
 656       */
 657      private function repackUTF7Back($str)
 658      {
 659         return $str[1].'+'.rtrim(base64_encode($str[2]), '=').'-';
 660      }
 661  }
 662  
 663  /*
 664   * Local variables:
 665   * tab-width: 4
 666   * c-basic-offset: 4
 667   * c-hanging-comment-ender-p: nil
 668   * End:
 669   */
 670  
 671  ?>


Généré le : Sun Feb 25 21:07:04 2007 par Balluche grâce à PHPXref 0.7