[ Index ]
 

Code source de Typo3 4.1.3

Accédez au Source d'autres logiciels libres

Classes | Fonctions | Variables | Constantes | Tables

title

Body

[fermer]

/t3lib/ -> class.t3lib_parsehtml.php (source)

   1  <?php
   2  /***************************************************************
   3  *  Copyright notice
   4  *
   5  *  (c) 1999-2006 Kasper Skaarhoj (kasperYYYY@typo3.com)
   6  *  All rights reserved
   7  *
   8  *  This script is part of the TYPO3 project. The TYPO3 project is
   9  *  free software; you can redistribute it and/or modify
  10  *  it under the terms of the GNU General Public License as published by
  11  *  the Free Software Foundation; either version 2 of the License, or
  12  *  (at your option) any later version.
  13  *
  14  *  The GNU General Public License can be found at
  15  *  http://www.gnu.org/copyleft/gpl.html.
  16  *  A copy is found in the textfile GPL.txt and important notices to the license
  17  *  from the author is found in LICENSE.txt distributed with these scripts.
  18  *
  19  *
  20  *  This script is distributed in the hope that it will be useful,
  21  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  22  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23  *  GNU General Public License for more details.
  24  *
  25  *  This copyright notice MUST APPEAR in all copies of the script!
  26  ***************************************************************/
  27  /**
  28   * Contains class with functions for parsing HTML code.
  29   *
  30   * $Id: class.t3lib_parsehtml.php 1421 2006-04-10 09:27:15Z mundaun $
  31   * Revised for TYPO3 3.6 July/2003 by Kasper Skaarhoj
  32   *
  33   * @author    Kasper Skaarhoj <kasperYYYY@typo3.com>
  34   */
  35  /**
  36   * [CLASS/FUNCTION INDEX of SCRIPT]
  37   *
  38   *
  39   *
  40   *  106: class t3lib_parsehtml
  41   *  123:     function getSubpart($content, $marker)
  42   *  156:     function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)
  43   *
  44   *              SECTION: Parsing HTML code
  45   *  247:     function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)
  46   *  308:     function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)
  47   *  344:     function splitTags($tag,$content)
  48   *  378:     function getAllParts($parts,$tag_parts=1,$include_tag=1)
  49   *  396:     function removeFirstAndLastTag($str)
  50   *  412:     function getFirstTag($str)
  51   *  426:     function getFirstTagName($str,$preserveCase=FALSE)
  52   *  445:     function get_tag_attributes($tag,$deHSC=0)
  53   *  486:     function split_tag_attributes($tag)
  54   *  524:     function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')
  55   *
  56   *              SECTION: Clean HTML code
  57   *  617:     function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())
  58   *  814:     function bidir_htmlspecialchars($value,$dir)
  59   *  837:     function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')
  60   *  919:     function prefixRelPath($prefix,$srcVal,$suffix='')
  61   *  937:     function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)
  62   *  967:     function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')
  63   *  982:     function unprotectTags($content,$tagList='')
  64   * 1015:     function stripTagsExcept($value,$tagList)
  65   * 1038:     function caseShift($str,$flag,$cacheKey='')
  66   * 1065:     function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)
  67   * 1093:     function get_tag_attributes_classic($tag,$deHSC=0)
  68   * 1106:     function indentLines($content, $number=1, $indentChar="\t")
  69   * 1123:     function HTMLparserConfig($TSconfig,$keepTags=array())
  70   * 1247:     function XHTML_clean($content)
  71   * 1269:     function processTag($value,$conf,$endTag,$protected=0)
  72   * 1315:     function processContent($value,$dir,$conf)
  73   *
  74   * TOTAL FUNCTIONS: 28
  75   * (This index is automatically created/updated by the extension "extdeveval")
  76   *
  77   */
  78  
  79  
  80  
  81  
  82  
  83  
  84  
  85  
  86  
  87  
  88  
  89  
  90  
  91  
  92  
  93  
  94  
  95  
  96  
  97  
  98  /**
  99   * Functions for parsing HTML.
 100   * You are encouraged to use this class in your own applications
 101   *
 102   * @author    Kasper Skaarhoj <kasperYYYY@typo3.com>
 103   * @package TYPO3
 104   * @subpackage t3lib
 105   */
 106  class t3lib_parsehtml    {
 107      var $caseShift_cache=array();
 108  
 109  
 110      // *******************************************'
 111      // COPY FROM class.tslib_content.php: / BEGIN
 112      // substituteSubpart
 113      // Cleaned locally 2/2003 !!!! (so different from tslib_content version)
 114      // *******************************************'
 115  
 116      /**
 117       * Returns the first subpart encapsulated in the marker, $marker (possibly present in $content as a HTML comment)
 118       *
 119       * @param    string        Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
 120       * @param    string        Marker string, eg. "###CONTENT_PART###"
 121       * @return    string
 122       */
 123  	function getSubpart($content, $marker)    {
 124          $start = strpos($content, $marker);
 125          if ($start===false)    { return ''; }
 126          $start += strlen($marker);
 127          $stop = strpos($content, $marker, $start);
 128              // Q: What shall get returned if no stop marker is given /*everything till the end*/ or nothing
 129          if ($stop===false)    { return /*substr($content, $start)*/ ''; }
 130          $content = substr($content, $start, $stop-$start);
 131          $matches = array();
 132          if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)    {
 133              return $matches[2];
 134          }
 135          $matches = array();
 136          if (preg_match('/(.*)(\<\!\-\-[^\>]*)$/s', $content, $matches)===1)    {
 137              return $matches[1];
 138          }
 139          $matches = array();
 140          if (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $content, $matches)===1)    {
 141              return $matches[2];
 142          }
 143          return $content;
 144      }
 145  
 146      /**
 147       * Substitutes a subpart in $content with the content of $subpartContent.
 148       *
 149       * @param    string        Content with subpart wrapped in fx. "###CONTENT_PART###" inside.
 150       * @param    string        Marker string, eg. "###CONTENT_PART###"
 151       * @param    array        If $subpartContent happens to be an array, it's [0] and [1] elements are wrapped around the content of the subpart (fetched by getSubpart())
 152       * @param    boolean        If $recursive is set, the function calls itself with the content set to the remaining part of the content after the second marker. This means that proceding subparts are ALSO substituted!
 153       * @param    boolean        If set, the marker around the subpart is not removed, but kept in the output
 154       * @return    string        Processed input content
 155       */
 156  	function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)    {
 157          $start = strpos($content, $marker);
 158          if ($start===false)    { return $content; }
 159          $startAM = $start+strlen($marker);
 160          $stop = strpos($content, $marker, $startAM);
 161          if ($stop===false)    { return $content; }
 162          $stopAM = $stop+strlen($marker);
 163          $before = substr($content, 0, $start);
 164          $after = substr($content, $stopAM);
 165          $between = substr($content, $startAM, $stop-$startAM);
 166  
 167          if ($recursive)    {
 168              $after = t3lib_parsehtml::substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker);
 169          }
 170  
 171          if ($keepMarker)    {
 172              $matches = array();
 173              if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)    {
 174                  $before .= $marker.$matches[1];
 175                  $between = $matches[2];
 176                  $after = $matches[3].$marker.$after;
 177              } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)    {
 178                  $before .= $marker;
 179                  $between = $matches[1];
 180                  $after = $matches[2].$marker.$after;
 181              } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)    {
 182                  $before .= $marker.$matches[1];
 183                  $between = $matches[2];
 184                  $after = $marker.$after;
 185              } else    {
 186                  $before .= $marker;
 187                  $after = $marker.$after;
 188              }
 189          } else    {
 190              $matches = array();
 191              if (preg_match('/^(.*)\<\!\-\-[^\>]*$/s', $before, $matches)===1)    {
 192                  $before = $matches[1];
 193              }
 194              if (is_array($subpartContent))    {
 195                  $matches = array();
 196                  if (preg_match('/^([^\<]*\-\-\>)(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)    {
 197                      $between = $matches[2];
 198                  } elseif (preg_match('/^(.*)(\<\!\-\-[^\>]*)$/s', $between, $matches)===1)    {
 199                      $between = $matches[1];
 200                  } elseif (preg_match('/^([^\<]*\-\-\>)(.*)$/s', $between, $matches)===1)    {
 201                      $between = $matches[2];
 202                  }
 203              }
 204              $matches = array();
 205              if (preg_match('/^[^\<]*\-\-\>(.*)$/s', $after, $matches)===1)    {
 206                  $after = $matches[1];
 207              }
 208          }
 209  
 210          if (is_array($subpartContent))    {
 211              $between = $subpartContent[0].$between.$subpartContent[1];
 212          } else    {
 213              $between = $subpartContent;
 214          }
 215  
 216          return $before.$between.$after;
 217      }
 218  
 219  
 220      // *******************************************'
 221      // COPY FROM class.tslib_content.php: / END
 222      // *******************************************'
 223  
 224  
 225  
 226  
 227  
 228  
 229  
 230      /************************************
 231       *
 232       * Parsing HTML code
 233       *
 234       ************************************/
 235  
 236      /**
 237       * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
 238       * Even numbers in the array are outside the blocks, Odd numbers are block-content.
 239       * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
 240       *
 241       * @param    string        List of tags, comma separated.
 242       * @param    string        HTML-content
 243       * @param    boolean        If set, excessive end tags are ignored - you should probably set this in most cases.
 244       * @return    array        Even numbers in the array are outside the blocks, Odd numbers are block-content.
 245       * @see splitTags(), getAllParts(), removeFirstAndLastTag()
 246       */
 247  	function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)    {
 248          $tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
 249          $regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
 250  
 251          $parts = preg_split($regexStr, $content);
 252  
 253          $newParts=array();
 254          $pointer=strlen($parts[0]);
 255          $buffer=$parts[0];
 256          $nested=0;
 257          reset($parts);
 258          next($parts);
 259          while(list($k,$v)=each($parts))    {
 260              $isEndTag= substr($content,$pointer,2)=='</' ? 1 : 0;
 261              $tagLen = strcspn(substr($content,$pointer),'>')+1;
 262  
 263              if (!$isEndTag)    {    // We meet a start-tag:
 264                  if (!$nested)    {    // Ground level:
 265                      $newParts[]=$buffer;    // previous buffer stored
 266                      $buffer='';
 267                  }
 268                  $nested++;    // We are inside now!
 269                  $mbuffer=substr($content,$pointer,strlen($v)+$tagLen);    // New buffer set and pointer increased
 270                  $pointer+=strlen($mbuffer);
 271                  $buffer.=$mbuffer;
 272              } else {    // If we meet an endtag:
 273                  $nested--;    // decrease nested-level
 274                  $eliminated=0;
 275                  if ($eliminateExtraEndTags && $nested<0)    {
 276                      $nested=0;
 277                      $eliminated=1;
 278                  } else {
 279                      $buffer.=substr($content,$pointer,$tagLen);    // In any case, add the endtag to current buffer and increase pointer
 280                  }
 281                  $pointer+=$tagLen;
 282                  if (!$nested && !$eliminated)    {    // if we're back on ground level, (and not by eliminating tags...
 283                      $newParts[]=$buffer;
 284                      $buffer='';
 285                  }
 286                  $mbuffer=substr($content,$pointer,strlen($v));    // New buffer set and pointer increased
 287                  $pointer+=strlen($mbuffer);
 288                  $buffer.=$mbuffer;
 289              }
 290  
 291          }
 292          $newParts[]=$buffer;
 293          return $newParts;
 294      }
 295  
 296      /**
 297       * Splitting content into blocks *recursively* and processing tags/content with call back functions.
 298       *
 299       * @param    string        Tag list, see splitIntoBlock()
 300       * @param    string        Content, see splitIntoBlock()
 301       * @param    object        Object where call back methods are.
 302       * @param    string        Name of call back method for content; "function callBackContent($str,$level)"
 303       * @param    string        Name of call back method for tags; "function callBackTags($tags,$level)"
 304       * @param    integer        Indent level
 305       * @return    string        Processed content
 306       * @see splitIntoBlock()
 307       */
 308  	function splitIntoBlockRecursiveProc($tag,$content,&$procObj,$callBackContent,$callBackTags,$level=0)    {
 309          $parts = $this->splitIntoBlock($tag,$content,TRUE);
 310          foreach($parts as $k => $v)    {
 311              if ($k%2)    {
 312                  $firstTagName = $this->getFirstTagName($v, TRUE);
 313                  $tagsArray = array();
 314                  $tagsArray['tag_start'] = $this->getFirstTag($v);
 315                  $tagsArray['tag_end'] = '</'.$firstTagName.'>';
 316                  $tagsArray['tag_name'] = strtolower($firstTagName);
 317                  $tagsArray['add_level'] = 1;
 318                  $tagsArray['content'] = $this->splitIntoBlockRecursiveProc($tag,$this->removeFirstAndLastTag($v),$procObj,$callBackContent,$callBackTags,$level+$tagsArray['add_level']);
 319  
 320                  if ($callBackTags)    $tagsArray = $procObj->$callBackTags($tagsArray,$level);
 321  
 322                  $parts[$k] =
 323                      $tagsArray['tag_start'].
 324                      $tagsArray['content'].
 325                      $tagsArray['tag_end'];
 326              } else {
 327                  if ($callBackContent)    $parts[$k] = $procObj->$callBackContent($parts[$k],$level);
 328              }
 329          }
 330  
 331          return implode('',$parts);
 332      }
 333  
 334      /**
 335       * Returns an array with the $content divided by tag-blocks specified with the list of tags, $tag
 336       * Even numbers in the array are outside the blocks, Odd numbers are block-content.
 337       * Use ->getAllParts() and ->removeFirstAndLastTag() to process the content if needed.
 338       *
 339       * @param    string        List of tags
 340       * @param    string        HTML-content
 341       * @return    array        Even numbers in the array are outside the blocks, Odd numbers are block-content.
 342       * @see splitIntoBlock(), getAllParts(), removeFirstAndLastTag()
 343       */
 344  	function splitTags($tag,$content)    {
 345          $tags = t3lib_div::trimExplode(',',$tag,1);
 346          $regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
 347          $parts = preg_split($regexStr, $content);
 348  
 349          $pointer = strlen($parts[0]);
 350          $newParts = array();
 351          $newParts[] = $parts[0];
 352          reset($parts);
 353          next($parts);
 354          while(list($k,$v)=each($parts))    {
 355              $tagLen = strcspn(substr($content,$pointer),'>')+1;
 356  
 357                  // Set tag:
 358              $tag = substr($content,$pointer,$tagLen);    // New buffer set and pointer increased
 359              $newParts[] = $tag;
 360              $pointer+= strlen($tag);
 361  
 362                  // Set content:
 363              $newParts[] = $v;
 364              $pointer+= strlen($v);
 365          }
 366          return $newParts;
 367      }
 368  
 369      /**
 370       * Returns an array with either tag or non-tag content of the result from ->splitIntoBlock()/->splitTags()
 371       *
 372       * @param    array        Parts generated by ->splitIntoBlock() or >splitTags()
 373       * @param    boolean        Whether to return the tag-parts (default,true) or what was outside the tags.
 374       * @param    boolean        Whether to include the tags in the tag-parts (most useful for input made by ->splitIntoBlock())
 375       * @return    array        Tag-parts/Non-tag-parts depending on input argument settings
 376       * @see splitIntoBlock(), splitTags()
 377       */
 378  	function getAllParts($parts,$tag_parts=1,$include_tag=1)    {
 379          $newParts=array();
 380          foreach ($parts as $k => $v)    {
 381              if (($k+($tag_parts?0:1))%2)    {
 382                  if (!$include_tag)    $v=$this->removeFirstAndLastTag($v);
 383                  $newParts[]=$v;
 384              }
 385          }
 386          return $newParts;
 387      }
 388  
 389      /**
 390       * Removes the first and last tag in the string
 391       * Anything before the first and after the last tags respectively is also removed
 392       *
 393       * @param    string        String to process
 394       * @return    string
 395       */
 396  	function removeFirstAndLastTag($str)    {
 397              // End of first tag:
 398          $start = strpos($str,'>');
 399              // Begin of last tag:
 400          $end = strrpos($str,'<');
 401              // return
 402          return substr($str, $start+1, $end-$start-1);
 403      }
 404  
 405      /**
 406       * Returns the first tag in $str
 407       * Actually everything from the begining of the $str is returned, so you better make sure the tag is the first thing...
 408       *
 409       * @param    string        HTML string with tags
 410       * @return    string
 411       */
 412  	function getFirstTag($str)    {
 413              // First:
 414          $endLen = strpos($str,'>')+1;
 415          return substr($str,0,$endLen);
 416      }
 417  
 418      /**
 419       * Returns the NAME of the first tag in $str
 420       *
 421       * @param    string        HTML tag (The element name MUST be separated from the attributes by a space character! Just *whitespace* will not do)
 422       * @param    boolean        If set, then the tag is NOT converted to uppercase by case is preserved.
 423       * @return    string        Tag name in upper case
 424       * @see getFirstTag()
 425       */
 426  	function getFirstTagName($str,$preserveCase=FALSE)    {
 427          $matches = array();
 428          if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)    {
 429              if (!$preserveCase)    {
 430                  return strtoupper($matches[1]);
 431              }
 432              return $matches[1];
 433          }
 434          return '';
 435      }
 436  
 437      /**
 438       * Returns an array with all attributes as keys. Attributes are only lowercase a-z
 439       * If a attribute is empty (shorthand), then the value for the key is empty. You can check if it existed with isset()
 440       *
 441       * @param    string        Tag: $tag is either a whole tag (eg '<TAG OPTION ATTRIB=VALUE>') or the parameterlist (ex ' OPTION ATTRIB=VALUE>')
 442       * @param    boolean        If set, the attribute values are de-htmlspecialchar'ed. Should actually always be set!
 443       * @return    array        array(Tag attributes,Attribute meta-data)
 444       */
 445  	function get_tag_attributes($tag,$deHSC=0)    {
 446          list($components,$metaC) = $this->split_tag_attributes($tag);
 447          $name = '';     // attribute name is stored here
 448          $valuemode = false;
 449          $attributes = array();
 450          $attributesMeta = array();
 451          if (is_array($components))    {
 452              foreach ($components as $key => $val)    {
 453                  if ($val != '=')    {    // Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
 454                      if ($valuemode)    {
 455                          if ($name)    {
 456                              $attributes[$name] = $deHSC?t3lib_div::htmlspecialchars_decode($val):$val;
 457                              $attributesMeta[$name]['dashType']=$metaC[$key];
 458                              $name = '';
 459                          }
 460                      } else {
 461                          if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))    {
 462                              $name = strtolower($namekey);
 463                              $attributesMeta[$name]=array();
 464                              $attributesMeta[$name]['origTag']=$namekey;
 465                              $attributes[$name] = '';
 466                          }
 467                      }
 468                      $valuemode = false;
 469                  } else {
 470                      $valuemode = true;
 471                  }
 472              }
 473              return array($attributes,$attributesMeta);
 474          }
 475      }
 476  
 477      /**
 478       * Returns an array with the 'components' from an attribute list. The result is normally analyzed by get_tag_attributes
 479       * Removes tag-name if found
 480       *
 481       * @param    string        The tag or attributes
 482       * @return    array
 483       * @access private
 484       * @see t3lib_div::split_tag_attributes()
 485       */
 486  	function split_tag_attributes($tag)    {
 487          $matches = array();
 488          if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)    {
 489              return array(array(), array());
 490          }
 491          $tag_tmp = $matches[2];
 492  
 493          $metaValue = array();
 494          $value = array();
 495          $matches = array();
 496          if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)    {
 497              foreach ($matches[1] as $part)    {
 498                  $firstChar = substr($part, 0, 1);
 499                  if ($firstChar=='"' || $firstChar=="'")    {
 500                      $metaValue[] = $firstChar;
 501                      $value[] = substr($part, 1, -1);
 502                  } else    {
 503                      $metaValue[] = '';
 504                      $value[] = $part;
 505                  }
 506              }
 507          }
 508          return array($value,$metaValue);
 509      }
 510  
 511      /**
 512       * Checks whether block/solo tags are found in the correct amounts in HTML content
 513       * Block tags are tags which are required to have an equal amount of start and end tags, eg. "<table>...</table>"
 514       * Solo tags are tags which are required to have ONLY start tags (possibly with an XHTML ending like ".../>")
 515       * NOTICE: Correct XHTML might actually fail since "<br></br>" is allowed as well as "<br/>". However only the LATTER is accepted by this function (with "br" in the "solo-tag" list), the first example will result in a warning.
 516       * NOTICE: Correct XHTML might actually fail since "<p/>" is allowed as well as "<p></p>". However only the LATTER is accepted by this function (with "p" in the "block-tag" list), the first example will result in an ERROR!
 517       * NOTICE: Correct HTML version "something" allows eg. <p> and <li> to be NON-ended (implicitly ended by other tags). However this is NOT accepted by this function (with "p" and "li" in the block-tag list) and it will result in an ERROR!
 518       *
 519       * @param    string        HTML content to analyze
 520       * @param    string        Tag names for block tags (eg. table or div or p) in lowercase, commalist (eg. "table,div,p")
 521       * @param    string        Tag names for solo tags (eg. img, br or input) in lowercase, commalist ("img,br,input")
 522       * @return    array        Analyse data.
 523       */
 524  	function checkTagTypeCounts($content,$blockTags='a,b,blockquote,body,div,em,font,form,h1,h2,h3,h4,h5,h6,i,li,map,ol,option,p,pre,select,span,strong,table,td,textarea,tr,u,ul', $soloTags='br,hr,img,input,area')    {
 525          $content = strtolower($content);
 526          $analyzedOutput=array();
 527          $analyzedOutput['counts']=array();    // Counts appearances of start-tags
 528          $analyzedOutput['errors']=array();    // Lists ERRORS
 529          $analyzedOutput['warnings']=array();    // Lists warnings.
 530          $analyzedOutput['blocks']=array();    // Lists stats for block-tags
 531          $analyzedOutput['solo']=array();    // Lists stats for solo-tags
 532  
 533              // Block tags, must have endings...
 534          $blockTags = explode(',',$blockTags);
 535          foreach($blockTags as $tagName)    {
 536              $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
 537              $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
 538              $analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
 539              if ($countBegin)    $analyzedOutput['counts'][$tagName]=$countBegin;
 540              if ($countBegin-$countEnd)    {
 541                  if ($countBegin-$countEnd > 0)    {
 542                      $analyzedOutput['errors'][$tagName]='There were more start-tags ('.$countBegin.') than end-tags ('.$countEnd.') for the element "'.$tagName.'". There should be an equal amount!';
 543                  } else {
 544                      $analyzedOutput['warnings'][$tagName]='There were more end-tags ('.$countEnd.') than start-tags ('.$countBegin.') for the element "'.$tagName.'". There should be an equal amount! However the problem is not fatal.';
 545                  }
 546              }
 547          }
 548  
 549              // Solo tags, must NOT have endings...
 550          $soloTags = explode(',',$soloTags);
 551          foreach($soloTags as $tagName)    {
 552              $countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
 553              $countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
 554              $analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
 555              if ($countBegin)    $analyzedOutput['counts'][$tagName]=$countBegin;
 556              if ($countEnd)    {
 557                  $analyzedOutput['warnings'][$tagName]='There were end-tags found ('.$countEnd.') for the element "'.$tagName.'". This was not expected (although XHTML technically allows it).';
 558              }
 559          }
 560  
 561          return $analyzedOutput;
 562      }
 563  
 564  
 565  
 566  
 567  
 568  
 569  
 570  
 571  
 572  
 573  
 574  
 575      /*********************************
 576       *
 577       * Clean HTML code
 578       *
 579       *********************************/
 580  
 581      /**
 582       * Function that can clean up HTML content according to configuration given in the $tags array.
 583       *
 584       * Initializing the $tags array to allow a list of tags (in this case <B>,<I>,<U> and <A>), set it like this:         $tags = array_flip(explode(',','b,a,i,u'))
 585       * If the value of the $tags[$tagname] entry is an array, advanced processing of the tags is initialized. These are the options:
 586       *
 587       *     $tags[$tagname] = Array(
 588       *         'overrideAttribs' => ''        If set, this string is preset as the attributes of the tag
 589       *         'allowedAttribs' =>   '0' (zero) = no attributes allowed, '[commalist of attributes]' = only allowed attributes. If blank, all attributes are allowed.
 590       *         'fixAttrib' => Array(
 591       *             '[attribute name]' => Array (
 592       *                 'set' => Force the attribute value to this value.
 593       *                 'unset' => Boolean: If set, the attribute is unset.
 594       *                 'default' =>     If no attribute exists by this name, this value is set as default value (if this value is not blank)
 595       *                 'always' =>     Boolean. If set, the attribute is always processed. Normally an attribute is processed only if it exists
 596       *                 'trim,intval,lower,upper' =>     All booleans. If any of these keys are set, the value is passed through the respective PHP-functions.
 597       *                 'range' => Array ('[low limit]','[high limit, optional]')        Setting integer range.
 598       *                 'list' => Array ('[value1/default]','[value2]','[value3]')        Attribute must be in this list. If not, the value is set to the first element.
 599       *                 'removeIfFalse' =>     Boolean/'blank'.    If set, then the attribute is removed if it is 'false'. If this value is set to 'blank' then the value must be a blank string (that means a 'zero' value will not be removed)
 600       *                 'removeIfEquals' =>     [value]    If the attribute value matches the value set here, then it is removed.
 601       *                 'casesensitiveComp' => 1    If set, then the removeIfEquals and list comparisons will be case sensitive. Otherwise not.
 602       *             )
 603       *         ),
 604       *         'protect' => '',    Boolean. If set, the tag <> is converted to &lt; and &gt;
 605       *         'remap' => '',        String. If set, the tagname is remapped to this tagname
 606       *         'rmTagIfNoAttrib' => '',    Boolean. If set, then the tag is removed if no attributes happend to be there.
 607       *         'nesting' => '',    Boolean/'global'. If set true, then this tag must have starting and ending tags in the correct order. Any tags not in this order will be discarded. Thus '</B><B><I></B></I></B>' will be converted to '<B><I></B></I>'. Is the value 'global' then true nesting in relation to other tags marked for 'global' nesting control is preserved. This means that if <B> and <I> are set for global nesting then this string '</B><B><I></B></I></B>' is converted to '<B></B>'
 608       *     )
 609       *
 610       * @param    string        $content; is the HTML-content being processed. This is also the result being returned.
 611       * @param    array        $tags; is an array where each key is a tagname in lowercase. Only tags present as keys in this array are preserved. The value of the key can be an array with a vast number of options to configure.
 612       * @param    string        $keepAll; boolean/'protect', if set, then all tags are kept regardless of tags present as keys in $tags-array. If 'protect' then the preserved tags have their <> converted to &lt; and &gt;
 613       * @param    integer        $hSC; Values -1,0,1,2: Set to zero= disabled, set to 1 then the content BETWEEN tags is htmlspecialchar()'ed, set to -1 its the opposite and set to 2 the content will be HSC'ed BUT with preservation for real entities (eg. "&amp;" or "&#234;")
 614       * @param    array        Configuration array send along as $conf to the internal functions ->processContent() and ->processTag()
 615       * @return    string        Processed HTML content
 616       */
 617  	function HTMLcleaner($content, $tags=array(),$keepAll=0,$hSC=0,$addConfig=array())    {
 618          $newContent = array();
 619          $tokArr = explode('<',$content);
 620          $newContent[] = $this->processContent(current($tokArr),$hSC,$addConfig);
 621          next($tokArr);
 622  
 623          $c = 1;
 624          $tagRegister = array();
 625          $tagStack = array();
 626          while(list(,$tok)=each($tokArr))    {
 627              $firstChar = substr($tok,0,1);
 628  #            if (strcmp(trim($firstChar),''))    {        // It is a tag...
 629              if (preg_match('/[[:alnum:]\/]/',$firstChar)==1)    {        // It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
 630                  $tagEnd = strpos($tok,'>');
 631                  if ($tagEnd)    {    // If there is and end-bracket...    tagEnd can't be 0 as the first character can't be a >
 632                      $endTag = $firstChar=='/' ? 1 : 0;
 633                      $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
 634                      $tagParts = preg_split('/\s+/s',$tagContent,2);
 635                      $tagName = strtolower($tagParts[0]);
 636                      if (isset($tags[$tagName]))    {
 637                          if (is_array($tags[$tagName]))    {    // If there is processing to do for the tag:
 638  
 639                              if (!$endTag)    {    // If NOT an endtag, do attribute processing (added dec. 2003)
 640                                      // Override attributes
 641                                  if (strcmp($tags[$tagName]['overrideAttribs'],''))    {
 642                                      $tagParts[1]=$tags[$tagName]['overrideAttribs'];
 643                                  }
 644  
 645                                      // Allowed tags
 646                                  if (strcmp($tags[$tagName]['allowedAttribs'],''))    {
 647                                      if (!strcmp($tags[$tagName]['allowedAttribs'],'0'))    {    // No attribs allowed
 648                                          $tagParts[1]='';
 649                                      } elseif (trim($tagParts[1])) {
 650                                          $tagAttrib = $this->get_tag_attributes($tagParts[1]);
 651                                          $tagParts[1]='';
 652                                          $newTagAttrib = array();
 653                                          if (!($tList = $tags[$tagName]['_allowedAttribs']))    {
 654                                                  // Just explode attribts for tag once
 655                                              $tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
 656                                          }
 657                                          foreach ($tList as $allowTag)    {
 658                                              if (isset($tagAttrib[0][$allowTag]))    $newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
 659                                          }
 660                                          $tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
 661                                      }
 662                                  }
 663  
 664                                      // Fixed attrib values
 665                                  if (is_array($tags[$tagName]['fixAttrib']))    {
 666                                      $tagAttrib = $this->get_tag_attributes($tagParts[1]);
 667                                      $tagParts[1]='';
 668                                      reset($tags[$tagName]['fixAttrib']);
 669                                      while(list($attr,$params)=each($tags[$tagName]['fixAttrib']))    {
 670                                          if (strlen($params['set']))    $tagAttrib[0][$attr] = $params['set'];
 671                                          if (strlen($params['unset']))    unset($tagAttrib[0][$attr]);
 672                                          if (strcmp($params['default'],'') && !isset($tagAttrib[0][$attr]))    $tagAttrib[0][$attr]=$params['default'];
 673                                          if ($params['always'] || isset($tagAttrib[0][$attr]))    {
 674                                              if ($params['trim'])    {$tagAttrib[0][$attr]=trim($tagAttrib[0][$attr]);}
 675                                              if ($params['intval'])    {$tagAttrib[0][$attr]=intval($tagAttrib[0][$attr]);}
 676                                              if ($params['lower'])    {$tagAttrib[0][$attr]=strtolower($tagAttrib[0][$attr]);}
 677                                              if ($params['upper'])    {$tagAttrib[0][$attr]=strtoupper($tagAttrib[0][$attr]);}
 678                                              if ($params['range'])    {
 679                                                  if (isset($params['range'][1]))    {
 680                                                      $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]),intval($params['range'][1]));
 681                                                  } else {
 682                                                      $tagAttrib[0][$attr]=t3lib_div::intInRange($tagAttrib[0][$attr],intval($params['range'][0]));
 683                                                  }
 684                                              }
 685                                              if (is_array($params['list']))    {
 686                                                  if (!in_array($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['list'],$params['casesensitiveComp'],$tagName)))    $tagAttrib[0][$attr]=$params['list'][0];
 687                                              }
 688                                              if (($params['removeIfFalse'] && $params['removeIfFalse']!='blank' && !$tagAttrib[0][$attr]) || ($params['removeIfFalse']=='blank' && !strcmp($tagAttrib[0][$attr],'')))    {
 689                                                  unset($tagAttrib[0][$attr]);
 690                                              }
 691                                              if (strcmp($params['removeIfEquals'],'') && !strcmp($this->caseShift($tagAttrib[0][$attr],$params['casesensitiveComp']),$this->caseShift($params['removeIfEquals'],$params['casesensitiveComp'])))    {
 692                                                  unset($tagAttrib[0][$attr]);
 693                                              }
 694                                              if ($params['prefixLocalAnchors'])    {
 695                                                  if (substr($tagAttrib[0][$attr],0,1)=='#')    {
 696                                                      $prefix = t3lib_div::getIndpEnv('TYPO3_REQUEST_URL');
 697                                                      $tagAttrib[0][$attr] = $prefix.$tagAttrib[0][$attr];
 698                                                      if ($params['prefixLocalAnchors']==2 && t3lib_div::isFirstPartOfStr($prefix,t3lib_div::getIndpEnv('TYPO3_SITE_URL')))        {
 699                                                          $tagAttrib[0][$attr] = substr($tagAttrib[0][$attr],strlen(t3lib_div::getIndpEnv('TYPO3_SITE_URL')));
 700                                                      }
 701                                                  }
 702                                              }
 703                                              if ($params['prefixRelPathWith'])    {
 704                                                  $urlParts = parse_url($tagAttrib[0][$attr]);
 705                                                  if (!$urlParts['scheme'] && substr($urlParts['path'],0,1)!='/')    {    // If it is NOT an absolute URL (by http: or starting "/")
 706                                                      $tagAttrib[0][$attr] = $params['prefixRelPathWith'].$tagAttrib[0][$attr];
 707                                                  }
 708                                              }
 709                                              if ($params['userFunc'])    {
 710                                                  $tagAttrib[0][$attr] = t3lib_div::callUserFunction($params['userFunc'],$tagAttrib[0][$attr],$this);
 711                                              }
 712                                          }
 713                                      }
 714                                      $tagParts[1]=$this->compileTagAttribs($tagAttrib[0],$tagAttrib[1]);
 715                                  }
 716                              } else {    // If endTag, remove any possible attributes:
 717                                  $tagParts[1]='';
 718                              }
 719  
 720                                  // Protecting the tag by converting < and > to &lt; and &gt; ??
 721                              if ($tags[$tagName]['protect'])    {
 722                                  $lt = '&lt;';    $gt = '&gt;';
 723                              } else {
 724                                  $lt = '<';    $gt = '>';
 725                              }
 726                                  // Remapping tag name?
 727                              if ($tags[$tagName]['remap'])    $tagParts[0] = $tags[$tagName]['remap'];
 728  
 729                                  // rmTagIfNoAttrib
 730                              if ($endTag || trim($tagParts[1]) || !$tags[$tagName]['rmTagIfNoAttrib'])    {
 731                                  $setTag=1;
 732  
 733                                  if ($tags[$tagName]['nesting'])    {
 734                                      if (!is_array($tagRegister[$tagName]))    $tagRegister[$tagName]=array();
 735  
 736                                      if ($endTag)    {
 737  /*                                        if ($tags[$tagName]['nesting']=='global')    {
 738                                              $lastEl = end($tagStack);
 739                                              $correctTag = !strcmp($tagName,$lastEl);
 740                                          } else $correctTag=1;
 741      */
 742                                          $correctTag=1;
 743                                          if ($tags[$tagName]['nesting']=='global')    {
 744                                              $lastEl = end($tagStack);
 745                                              if (strcmp($tagName,$lastEl))    {
 746                                                  if (in_array($tagName,$tagStack))    {
 747                                                      while(count($tagStack) && strcmp($tagName,$lastEl))    {
 748                                                          $elPos = end($tagRegister[$lastEl]);
 749                                                          unset($newContent[$elPos]);
 750  
 751                                                          array_pop($tagRegister[$lastEl]);
 752                                                          array_pop($tagStack);
 753                                                          $lastEl = end($tagStack);
 754                                                      }
 755                                                  } else {
 756                                                      $correctTag=0;    // In this case the
 757                                                  }
 758                                              }
 759                                          }
 760                                          if (!count($tagRegister[$tagName]) || !$correctTag)    {
 761                                              $setTag=0;
 762                                          } else {
 763                                              array_pop($tagRegister[$tagName]);
 764                                              if ($tags[$tagName]['nesting']=='global')    {array_pop($tagStack);}
 765                                          }
 766                                      } else {
 767                                          array_push($tagRegister[$tagName],$c);
 768                                          if ($tags[$tagName]['nesting']=='global')    {array_push($tagStack,$tagName);}
 769                                      }
 770                                  }
 771  
 772                                  if ($setTag)    {
 773                                          // Setting the tag
 774                                      $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').trim($tagParts[0].' '.$tagParts[1]).$gt,$addConfig,$endTag,$lt=='&lt;');
 775                                  }
 776                              }
 777                          } else {
 778                              $newContent[$c++]=$this->processTag('<'.($endTag?'/':'').$tagContent.'>',$addConfig,$endTag);
 779                          }
 780                      } elseif ($keepAll) {    // This is if the tag was not defined in the array for processing:
 781                          if (!strcmp($keepAll,'protect'))    {
 782                              $lt = '&lt;';    $gt = '&gt;';
 783                          } else {
 784                              $lt = '<';    $gt = '>';
 785                          }
 786                          $newContent[$c++]=$this->processTag($lt.($endTag?'/':'').$tagContent.$gt,$addConfig,$endTag,$lt=='&lt;');
 787                      }
 788                      $newContent[$c++]=$this->processContent(substr($tok,$tagEnd+1),$hSC,$addConfig);
 789                  } else {
 790                      $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);    // There were not end-bracket, so no tag...
 791                  }
 792              } else {
 793                  $newContent[$c++]=$this->processContent('<'.$tok,$hSC,$addConfig);    // It was not a tag anyways
 794              }
 795          }
 796  
 797              // Unsetting tags:
 798          foreach ($tagRegister as $tag => $positions)    {
 799              foreach ($positions as $pKey)    {
 800                  unset($newContent[$pKey]);
 801              }
 802          }
 803  
 804          return implode('',$newContent);
 805      }
 806  
 807      /**
 808       * Converts htmlspecialchars forth ($dir=1) AND back ($dir=-1)
 809       *
 810       * @param    string        Input value
 811       * @param    integer        Direction: forth ($dir=1, dir=2 for preserving entities) AND back ($dir=-1)
 812       * @return    string        Output value
 813       */
 814  	function bidir_htmlspecialchars($value,$dir)    {
 815          if ($dir==1)    {
 816              $value = htmlspecialchars($value);
 817          } elseif ($dir==2)    {
 818              $value = t3lib_div::deHSCentities(htmlspecialchars($value));
 819          } elseif ($dir==-1) {
 820              $value = str_replace('&gt;','>',$value);
 821              $value = str_replace('&lt;','<',$value);
 822              $value = str_replace('&quot;','"',$value);
 823              $value = str_replace('&amp;','&',$value);
 824          }
 825          return $value;
 826      }
 827  
 828      /**
 829       * Prefixes the relative paths of hrefs/src/action in the tags [td,table,body,img,input,form,link,script,a] in the $content with the $main_prefix or and alternative given by $alternatives
 830       *
 831       * @param    string        Prefix string
 832       * @param    string        HTML content
 833       * @param    array        Array with alternative prefixes for certain of the tags. key=>value pairs where the keys are the tag element names in uppercase
 834       * @param    string        Suffix string (put after the resource).
 835       * @return    string        Processed HTML content
 836       */
 837  	function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')    {
 838  
 839          $parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
 840          foreach ($parts as $k => $v)    {
 841              if ($k%2)    {
 842                  $params = $this->get_tag_attributes($v,1);
 843                  $tagEnd = substr($v,-2)=='/>' ? ' />' : '>';    // Detect tag-ending so that it is re-applied correctly.
 844                  $firstTagName = $this->getFirstTagName($v);    // The 'name' of the first tag
 845                  $somethingDone=0;
 846                  $prefix = isset($alternatives[strtoupper($firstTagName)]) ? $alternatives[strtoupper($firstTagName)] : $main_prefix;
 847                  switch(strtolower($firstTagName))    {
 848                          // background - attribute:
 849                      case 'td':
 850                      case 'body':
 851                      case 'table':
 852                          $src = $params[0]['background'];
 853                          if ($src)    {
 854                              $params[0]['background'] = $this->prefixRelPath($prefix,$params[0]['background'],$suffix);
 855                              $somethingDone=1;
 856                          }
 857                      break;
 858                          // src attribute
 859                      case 'img':
 860                      case 'input':
 861                      case 'script':
 862                      case 'embed':
 863                          $src = $params[0]['src'];
 864                          if ($src)    {
 865                              $params[0]['src'] = $this->prefixRelPath($prefix,$params[0]['src'],$suffix);
 866                              $somethingDone=1;
 867                          }
 868                      break;
 869                      case 'link':
 870                      case 'a':
 871                          $src = $params[0]['href'];
 872                          if ($src)    {
 873                              $params[0]['href'] = $this->prefixRelPath($prefix,$params[0]['href'],$suffix);
 874                              $somethingDone=1;
 875                          }
 876                      break;
 877                          // action attribute
 878                      case 'form':
 879                          $src = $params[0]['action'];
 880                          if ($src)    {
 881                              $params[0]['action'] = $this->prefixRelPath($prefix,$params[0]['action'],$suffix);
 882                              $somethingDone=1;
 883                          }
 884                      break;
 885                  }
 886                  if ($somethingDone)    {
 887                      $tagParts = preg_split('/\s+/s',$v,2);
 888                      $tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
 889                      $parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
 890                  }
 891              }
 892          }
 893          $content = implode('',$parts);
 894  
 895              // Fix <style> section:
 896          $prefix = isset($alternatives['style']) ? $alternatives['style'] : $main_prefix;
 897          if (strlen($prefix))    {
 898              $parts = $this->splitIntoBlock('style',$content);
 899              foreach($parts as $k => $v)    {
 900                  if ($k%2)    {
 901                      $parts[$k] = eregi_replace('(url[[:space:]]*\([[:space:]]*["\']?)([^"\')]*)(["\']?[[:space:]]*\))','\1'.$prefix.'\2'.$suffix.'\3',$parts[$k]);
 902                  }
 903              }
 904              $content = implode('',$parts);
 905          }
 906  
 907          return $content;
 908      }
 909  
 910      /**
 911       * Internal sub-function for ->prefixResourcePath()
 912       *
 913       * @param    string        Prefix string
 914       * @param    string        Relative path/URL
 915       * @param    string        Suffix string
 916       * @return    string        Output path, prefixed if no scheme in input string
 917       * @access private
 918       */
 919  	function prefixRelPath($prefix,$srcVal,$suffix='')    {
 920          $pU = parse_url($srcVal);
 921          if (!$pU['scheme'] && substr($srcVal, 0, 1)!='/')    { // If not an absolute URL.
 922              $srcVal = $prefix.$srcVal.$suffix;
 923          }
 924          return $srcVal;
 925      }
 926  
 927      /**
 928       * Cleans up the input $value for fonttags.
 929       * If keepFace,-Size and -Color is set then font-tags with an allowed property is kept. Else deleted.
 930       *
 931       * @param    string        HTML content with font-tags inside to clean up.
 932       * @param    boolean        If set, keep "face" attribute
 933       * @param    boolean        If set, keep "size" attribute
 934       * @param    boolean        If set, keep "color" attribute
 935       * @return    string        Processed HTML content
 936       */
 937  	function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)    {
 938          $fontSplit = $this->splitIntoBlock('font',$value);    // ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
 939          foreach ($fontSplit as $k => $v)    {
 940              if ($k%2)    {    // font:
 941                  $attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
 942                  $newAttribs=array();
 943                  if ($keepFace && $attribArray['face'])    $newAttribs[]='face="'.$attribArray['face'].'"';
 944                  if ($keepSize && $attribArray['size'])    $newAttribs[]='size="'.$attribArray['size'].'"';
 945                  if ($keepColor && $attribArray['color'])    $newAttribs[]='color="'.$attribArray['color'].'"';
 946  
 947                  $innerContent = $this->cleanFontTags($this->removeFirstAndLastTag($v),$keepFace,$keepSize,$keepColor);
 948                  if (count($newAttribs))    {
 949                      $fontSplit[$k]='<font '.implode(' ',$newAttribs).'>'.$innerContent.'</font>';
 950                  } else {
 951                      $fontSplit[$k]=$innerContent;
 952                  }
 953              }
 954          }
 955          return implode('',$fontSplit);
 956      }
 957  
 958      /**
 959       * This is used to map certain tag-names into other names.
 960       *
 961       * @param    string        HTML content
 962       * @param    array        Array with tag key=>value pairs where key is from-tag and value is to-tag
 963       * @param    string        Alternative less-than char to search for (search regex string)
 964       * @param    string        Alternative less-than char to replace with (replace regex string)
 965       * @return    string        Processed HTML content
 966       */
 967  	function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')    {
 968  
 969          foreach($tags as $from => $to)    {
 970              $value = preg_replace('/'.preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
 971          }
 972          return $value;
 973      }
 974  
 975      /**
 976       * This converts htmlspecialchar()'ed tags (from $tagList) back to real tags. Eg. '&lt;strong&gt' would be converted back to '<strong>' if found in $tagList
 977       *
 978       * @param    string        HTML content
 979       * @param    string        Tag list, separated by comma. Lowercase!
 980       * @return    string        Processed HTML content
 981       */
 982  	function unprotectTags($content,$tagList='')    {
 983          $tagsArray = t3lib_div::trimExplode(',',$tagList,1);
 984          $contentParts = explode('&lt;',$content);
 985          next($contentParts);    // bypass the first
 986          while(list($k,$tok)=each($contentParts))    {
 987              $firstChar = substr($tok,0,1);
 988              if (strcmp(trim($firstChar),''))    {
 989                  $subparts = explode('&gt;',$tok,2);
 990                  $tagEnd = strlen($subparts[0]);
 991                  if (strlen($tok)!=$tagEnd)    {
 992                      $endTag = $firstChar=='/' ? 1 : 0;
 993                      $tagContent = substr($tok,$endTag,$tagEnd-$endTag);
 994                      $tagParts = preg_split('/\s+/s',$tagContent,2);
 995                      $tagName = strtolower($tagParts[0]);
 996                      if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))    {
 997                          $contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
 998                      } else $contentParts[$k] = '&lt;'.$tok;
 999                  } else $contentParts[$k] = '&lt;'.$tok;
1000              } else $contentParts[$k] = '&lt;'.$tok;
1001          }
1002  
1003          return implode('',$contentParts);
1004      }
1005  
1006      /**
1007       * Strips tags except the tags in the list, $tagList
1008       * OBSOLETE - use PHP function strip_tags()
1009       *
1010       * @param    string        Value to process
1011       * @param    string        List of tags
1012       * @return    string        Output value
1013       * @ignore
1014       */
1015  	function stripTagsExcept($value,$tagList)    {
1016          $tags=t3lib_div::trimExplode(',',$tagList,1);
1017          $forthArr=array();
1018          $backArr=array();
1019          foreach ($tags as $theTag)    {
1020              $forthArr[$theTag]=md5($theTag);
1021              $backArr[md5($theTag)]=$theTag;
1022          }
1023          $value = $this->mapTags($value,$forthArr,'<','_');
1024          $value=strip_tags($value);
1025          $value = $this->mapTags($value,$backArr,'_','<');
1026          return $value;
1027      }
1028  
1029      /**
1030       * Internal function for case shifting of a string or whole array
1031       *
1032       * @param    mixed        Input string/array
1033       * @param    boolean        If $str is a string AND this boolean(caseSensitive) is false, the string is returned in uppercase
1034       * @param    string        Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
1035       * @return    string        Output string, processed
1036       * @access private
1037       */
1038  	function caseShift($str,$flag,$cacheKey='')    {
1039          $cacheKey .= $flag?1:0;
1040          if (is_array($str))    {
1041              if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))    {
1042                  reset($str);
1043                  foreach ($str as $k => $v)    {
1044                      if (!$flag)    {
1045                          $str[$k] = strtoupper($v);
1046                      }
1047                  }
1048                  if ($cacheKey)    $this->caseShift_cache[$cacheKey]=$str;
1049              } else {
1050                  $str = $this->caseShift_cache[$cacheKey];
1051              }
1052          } elseif (!$flag)    { $str = strtoupper($str); }
1053          return $str;
1054      }
1055  
1056      /**
1057       * Compiling an array with tag attributes into a string
1058       *
1059       * @param    array        Tag attributes
1060       * @param    array        Meta information about these attributes (like if they were quoted)
1061       * @param    boolean        If set, then the attribute names will be set in lower case, value quotes in double-quotes and the value will be htmlspecialchar()'ed
1062       * @return    string        Imploded attributes, eg: 'attribute="value" attrib2="value2"'
1063       * @access private
1064       */
1065  	function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)    {
1066          $accu=array();
1067          foreach ($tagAttrib as $k =>$v)    {
1068              if ($xhtmlClean)    {
1069                  $attr=strtolower($k);
1070                  if (strcmp($v,'') || isset($meta[$k]['dashType']))    {
1071                      $attr.='="'.htmlspecialchars($v).'"';
1072                  }
1073              } else {
1074                  $attr=$meta[$k]['origTag']?$meta[$k]['origTag']:$k;
1075                  if (strcmp($v,'') || isset($meta[$k]['dashType']))    {
1076                      $dash=$meta[$k]['dashType']?$meta[$k]['dashType']:(t3lib_div::testInt($v)?'':'"');
1077                      $attr.='='.$dash.$v.$dash;
1078                  }
1079              }
1080              $accu[]=$attr;
1081          }
1082          return implode(' ',$accu);
1083      }
1084  
1085      /**
1086       * Get tag attributes, the classic version (which had some limitations?)
1087       *
1088       * @param    string        The tag
1089       * @param    boolean        De-htmlspecialchar flag.
1090       * @return    array
1091       * @access private
1092       */
1093  	function get_tag_attributes_classic($tag,$deHSC=0)    {
1094          $attr=$this->get_tag_attributes($tag,$deHSC);
1095          return is_array($attr[0])?$attr[0]:array();
1096      }
1097  
1098      /**
1099       * Indents input content with $number instances of $indentChar
1100       *
1101       * @param    string        Content string, multiple lines.
1102       * @param    integer        Number of indents
1103       * @param    string        Indent character/string
1104       * @return    string        Indented code (typ. HTML)
1105       */
1106  	function indentLines($content, $number=1, $indentChar="\t")    {
1107          $preTab = str_pad('', $number*strlen($indentChar), $indentChar);
1108          $lines = explode(chr(10),str_replace(chr(13),'',$content));
1109          foreach ($lines as $k => $v)    {
1110              $lines[$k] = $preTab.$v;
1111          }
1112          return implode(chr(10), $lines);
1113      }
1114  
1115      /**
1116       * Converts TSconfig into an array for the HTMLcleaner function.
1117       *
1118       * @param    array        TSconfig for HTMLcleaner
1119       * @param    array        Array of tags to keep (?)
1120       * @return    array
1121       * @access private
1122       */
1123  	function HTMLparserConfig($TSconfig,$keepTags=array())    {
1124              // Allow tags (base list, merged with incoming array)
1125          $alTags = array_flip(t3lib_div::trimExplode(',',strtolower($TSconfig['allowTags']),1));
1126          $keepTags = array_merge($alTags,$keepTags);
1127  
1128              // Set config properties.
1129          if (is_array($TSconfig['tags.']))    {
1130              reset($TSconfig['tags.']);
1131              while(list($key,$tagC)=each($TSconfig['tags.']))    {
1132                  if (!is_array($tagC) && $key==strtolower($key))    {
1133                      if (!strcmp($tagC,'0'))    unset($keepTags[$key]);
1134                      if (!strcmp($tagC,'1') && !isset($keepTags[$key]))    $keepTags[$key]=1;
1135                  }
1136              }
1137  
1138              reset($TSconfig['tags.']);
1139              foreach ($TSconfig['tags.'] as $key => $tagC)    {
1140                  if (is_array($tagC) && $key==strtolower($key))    {
1141                      $key=substr($key,0,-1);
1142                      if (!is_array($keepTags[$key]))    $keepTags[$key]=array();
1143                      if (is_array($tagC['fixAttrib.']))    {
1144                          reset($tagC['fixAttrib.']);
1145                          while(list($atName,$atConfig)=each($tagC['fixAttrib.']))    {
1146                              if (is_array($atConfig))    {
1147                                  $atName=substr($atName,0,-1);
1148                                  if (!is_array($keepTags[$key]['fixAttrib'][$atName]))    {
1149                                      $keepTags[$key]['fixAttrib'][$atName]=array();
1150                                  }
1151                                  $keepTags[$key]['fixAttrib'][$atName] = array_merge($keepTags[$key]['fixAttrib'][$atName],$atConfig);        // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
1152                                  if (strcmp($keepTags[$key]['fixAttrib'][$atName]['range'],''))    $keepTags[$key]['fixAttrib'][$atName]['range'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['range']);
1153                                  if (strcmp($keepTags[$key]['fixAttrib'][$atName]['list'],''))    $keepTags[$key]['fixAttrib'][$atName]['list'] = t3lib_div::trimExplode(',',$keepTags[$key]['fixAttrib'][$atName]['list']);
1154                              }
1155                          }
1156                      }
1157                      unset($tagC['fixAttrib.']);
1158                      unset($tagC['fixAttrib']);
1159                      $keepTags[$key] = array_merge($keepTags[$key],$tagC);            // Candidate for t3lib_div::array_merge() if integer-keys will some day make trouble...
1160                  }
1161              }
1162          }
1163              // localNesting
1164          if ($TSconfig['localNesting'])    {
1165              $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['localNesting']),1);
1166              while(list(,$tn)=each($lN))    {
1167                  if (isset($keepTags[$tn]))    {
1168                      $keepTags[$tn]['nesting']=1;
1169                  }
1170              }
1171          }
1172          if ($TSconfig['globalNesting'])    {
1173              $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['globalNesting']),1);
1174              while(list(,$tn)=each($lN))    {
1175                  if (isset($keepTags[$tn]))    {
1176                      if (!is_array($keepTags[$tn]))    $keepTags[$tn]=array();
1177                      $keepTags[$tn]['nesting']='global';
1178                  }
1179              }
1180          }
1181          if ($TSconfig['rmTagIfNoAttrib'])    {
1182              $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['rmTagIfNoAttrib']),1);
1183              while(list(,$tn)=each($lN))    {
1184                  if (isset($keepTags[$tn]))    {
1185                      if (!is_array($keepTags[$tn]))    $keepTags[$tn]=array();
1186                      $keepTags[$tn]['rmTagIfNoAttrib']=1;
1187                  }
1188              }
1189          }
1190          if ($TSconfig['noAttrib'])    {
1191              $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['noAttrib']),1);
1192              while(list(,$tn)=each($lN))    {
1193                  if (isset($keepTags[$tn]))    {
1194                      if (!is_array($keepTags[$tn]))    $keepTags[$tn]=array();
1195                      $keepTags[$tn]['allowedAttribs']=0;
1196                  }
1197              }
1198          }
1199          if ($TSconfig['removeTags'])    {
1200              $lN = t3lib_div::trimExplode(',',strtolower($TSconfig['removeTags']),1);
1201              while(list(,$tn)=each($lN))    {
1202                  $keepTags[$tn]=array();
1203                  $keepTags[$tn]['allowedAttribs']=0;
1204                  $keepTags[$tn]['rmTagIfNoAttrib']=1;
1205              }
1206          }
1207  
1208              // Create additional configuration:
1209          $addConfig=array();
1210          if ($TSconfig['xhtml_cleaning'])    {
1211              $addConfig['xhtml']=1;
1212          }
1213  
1214          return array(
1215              $keepTags,
1216              ''.$TSconfig['keepNonMatchedTags'],
1217              intval($TSconfig['htmlSpecialChars']),
1218              $addConfig
1219          );
1220      }
1221  
1222      /**
1223       * Tries to convert the content to be XHTML compliant and other stuff like that.
1224       * STILL EXPERIMENTAL. See comments below.
1225       *
1226       *             What it does NOT do (yet) according to XHTML specs.:
1227       *             - Wellformedness: Nesting is NOT checked
1228       *             - name/id attribute issue is not observed at this point.
1229       *             - Certain nesting of elements not allowed. Most interesting, <PRE> cannot contain img, big,small,sub,sup ...
1230       *             - Wrapping scripts and style element contents in CDATA - or alternatively they should have entitites converted.
1231       *             - Setting charsets may put some special requirements on both XML declaration/ meta-http-equiv. (C.9)
1232       *             - UTF-8 encoding is in fact expected by XML!!
1233       *             - stylesheet element and attribute names are NOT converted to lowercase
1234       *             - ampersands (and entities in general I think) MUST be converted to an entity reference! (&amps;). This may mean further conversion of non-tag content before output to page. May be related to the charset issue as a whole.
1235       *             - Minimized values not allowed: Must do this: selected="selected"
1236       *
1237       *             What it does at this point:
1238       *             - All tags (frame,base,meta,link + img,br,hr,area,input) is ended with "/>" - others?
1239       *             - Lowercase for elements and attributes
1240       *             - All attributes in quotes
1241       *             - Add "alt" attribute to img-tags if it's not there already.
1242       *
1243       * @param    string        Content to clean up
1244       * @return    string        Cleaned up content returned.
1245       * @access private
1246       */
1247  	function XHTML_clean($content)    {
1248          $content = $this->HTMLcleaner(
1249              $content,
1250              array(),    // No tags treated specially
1251              1,            // Keep ALL tags.
1252              0,            // All content is htmlspecialchar()'ed (or ??) - if we do, <script> content will break...
1253              array('xhtml' => 1)
1254          );
1255          return $content;
1256      }
1257  
1258      /**
1259       * Processing all tags themselves
1260       * (Some additions by Sacha Vorbeck)
1261       *
1262       * @param    string        Tag to process
1263       * @param    array        Configuration array passing instructions for processing. If count()==0, function will return value unprocessed. See source code for details
1264       * @param    boolean        Is endtag, then set this.
1265       * @param    boolean        If set, just return value straight away
1266       * @return    string        Processed value.
1267       * @access private
1268       */
1269  	function processTag($value,$conf,$endTag,$protected=0)    {
1270              // Return immediately if protected or no parameters
1271          if ($protected || !count($conf))    return $value;
1272              // OK then, begin processing for XHTML output:
1273              // STILL VERY EXPERIMENTAL!!
1274          if ($conf['xhtml'])    {
1275              if ($endTag)    {    // Endtags are just set lowercase right away
1276                  $value = strtolower($value);
1277              } elseif (substr($value,0,4)!='<!--') {    // ... and comments are ignored.
1278                  $inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));    // Finding inner value with out < >
1279                  list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);    // Separate attributes and tagname
1280                  $tagName = strtolower($tagName);
1281  
1282                      // Process attributes
1283                  $tagAttrib = $this->get_tag_attributes($tagP);
1284                  if (!strcmp($tagName,'img') && !isset($tagAttrib[0]['alt']))        $tagAttrib[0]['alt']='';    // Set alt attribute for all images (not XHTML though...)
1285                  if (!strcmp($tagName,'script') && !isset($tagAttrib[0]['type']))    $tagAttrib[0]['type']='text/javascript';    // Set type attribute for all script-tags
1286                  $outA=array();
1287                  reset($tagAttrib[0]);
1288                  while(list($attrib_name,$attrib_value)=each($tagAttrib[0]))    {
1289                          // Set attributes: lowercase, always in quotes, with htmlspecialchars converted.
1290                      $outA[]=$attrib_name.'="'.$this->bidir_htmlspecialchars($attrib_value,2).'"';
1291                  }
1292                  $newTag='<'.trim($tagName.' '.implode(' ',$outA));
1293                      // All tags that are standalone (not wrapping, not having endtags) should be ended with '/>'
1294                  if (t3lib_div::inList('img,br,hr,meta,link,base,area,input,param,col',$tagName) || substr($value,-2)=='/>')    {
1295                      $newTag.=' />';
1296                  } else {
1297                      $newTag.='>';
1298                  }
1299                  $value = $newTag;
1300              }
1301          }
1302  
1303          return $value;
1304      }
1305  
1306      /**
1307       * Processing content between tags for HTML_cleaner
1308       *
1309       * @param    string        The value
1310       * @param    integer        Direction, either -1 or +1. 0 (zero) means no change to input value.
1311       * @param    mixed        Not used, ignore.
1312       * @return    string        The processed value.
1313       * @access private
1314       */
1315  	function processContent($value,$dir,$conf)    {
1316          if ($dir!=0)    $value = $this->bidir_htmlspecialchars($value,$dir);
1317          return $value;
1318      }
1319  }
1320  
1321  
1322  
1323  if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php'])    {
1324      include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['t3lib/class.t3lib_parsehtml.php']);
1325  }
1326  ?>


Généré le : Sun Nov 25 17:13:16 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics