[ Index ] |
|
Code source de eZ Publish 3.9.0 |
1 <?php 2 // 3 // $Id$ 4 // 5 // Definition of eZXML class 6 // 7 // Created on: <13-Feb-2002 09:15:42 bf> 8 // 9 // SOFTWARE NAME: eZ publish 10 // SOFTWARE RELEASE: 3.9.0 11 // BUILD VERSION: 17785 12 // COPYRIGHT NOTICE: Copyright (C) 1999-2006 eZ systems AS 13 // SOFTWARE LICENSE: GNU General Public License v2.0 14 // NOTICE: > 15 // This program is free software; you can redistribute it and/or 16 // modify it under the terms of version 2.0 of the GNU General 17 // Public License as published by the Free Software Foundation. 18 // 19 // This program is distributed in the hope that it will be useful, 20 // but WITHOUT ANY WARRANTY; without even the implied warranty of 21 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 22 // GNU General Public License for more details. 23 // 24 // You should have received a copy of version 2.0 of the GNU General 25 // Public License along with this program; if not, write to the Free 26 // Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 27 // MA 02110-1301, USA. 28 // 29 // 30 31 /*! \file ezxml.php 32 XML DOM parser. 33 */ 34 35 /*! \defgroup eZXML XML parser and DOM library */ 36 37 /*! 38 \class eZXML ezxml.php 39 \ingroup eZXML 40 \brief eZXML handles parsing of well formed XML documents. 41 42 1 eZXML will create a DOM tree from well formed XML documents. 43 44 \sa eZDOMDocument eZDOMNode 45 */ 46 47 include_once ( "lib/ezutils/classes/ezdebug.php" ); 48 include_once ( "lib/ezxml/classes/ezdomnode.php" ); 49 include_once ( "lib/ezxml/classes/ezdomdocument.php" ); 50 51 define( "EZ_NODE_TYPE_ELEMENT", 1 ); 52 define( "EZ_NODE_TYPE_ATTRIBUTE", 2 ); 53 define( "EZ_NODE_TYPE_TEXT", 3 ); 54 define( "EZ_NODE_TYPE_CDATASECTION", 4 ); 55 56 class eZXML 57 { 58 /*! 59 Constructor 60 */ 61 function eZXML( ) 62 { 63 64 } 65 66 /*! 67 Will return a DOM object tree from the well formed XML. 68 69 $params["SetParentNode"] = false/true : create eZDOMDocument with setParentNode parameter set to true or false. 70 $params["TrimWhiteSpace"] = false/true : should the XML parser ignore whitespaces between tags. 71 $params["CharsetConversion"] = false/true : Whether charset conversion is done or not, default is true. 72 $params["ConvertSpecialChars"] = false/true: whether to convert < > & etc into < > &; default is true. 73 */ 74 function &domTree( $xmlDoc, $params = array(), $native = false ) 75 { 76 /* We remove all control chars from the text, although they 77 * should have not be there in the first place. This is 78 * iso-8859-1 and UTF-8 safe. Those characters might also never exist 79 * in an XML document in the first place 80 * (http://w3.org/TR/2004/REC-xml-20040204/#NT-Char) so it's safe to 81 * remove them */ 82 $xmlDoc = preg_replace('/[\x00-\x08\x0b-\x0c\x0e-\x1f]/', '', $xmlDoc); 83 84 if ( $native and function_exists( 'domxml_open_mem' ) ) 85 { 86 $domDocument = domxml_open_mem( $xmlDoc ); 87 return $domDocument; 88 } 89 90 if ( !isset( $params["TrimWhiteSpace"] ) ) 91 $params["TrimWhiteSpace"] = true; 92 93 if ( !isset( $params["SetParentNode"] ) ) 94 $params["SetParentNode"] = false; 95 96 $schema = false; 97 if ( isset( $params["Schema"] ) && get_class( $params["Schema"] ) == "ezschema" ) 98 { 99 $schema = $params["Schema"]; 100 } 101 $charset = 'UTF-8'; 102 if ( isset( $params['CharsetConversion'] ) and 103 !$params['CharsetConversion'] ) 104 $charset = false; 105 if ( !isset( $params['ConvertSpecialChars'] ) ) 106 { 107 $params['ConvertSpecialChars'] = true; 108 } 109 110 $TagStack = array(); 111 112 $xmlAttributes = array(); 113 114 // strip header 115 if ( preg_match( "#<\?xml(.*?)\?>#", $xmlDoc, $matches ) ) 116 { 117 $xmlAttributeText = $matches[1]; 118 $xmlAttributes = $this->parseAttributes( $xmlAttributeText ); 119 for ( $i = 0; $i < count( $xmlAttributes ); ++$i ) 120 { 121 $xmlAttribute =& $xmlAttributes[$i]; 122 if ( $xmlAttribute->name() == 'encoding' ) 123 $charset = $xmlAttribute->content(); 124 // This is required due to a bug in an old xml parser 125 else if ( $xmlAttribute->name() == 'charset' ) 126 $charset = $xmlAttribute->content(); 127 } 128 } 129 else if ( !preg_match( "#<[a-zA-Z0-9_-]+>#", $xmlDoc ) ) 130 { 131 $retVal = null; 132 return $retVal; 133 } 134 if ( $charset !== false ) 135 { 136 include_once ( 'lib/ezi18n/classes/eztextcodec.php' ); 137 $codec =& eZTextCodec::instance( $charset, false, false ); 138 if ( $codec ) 139 { 140 $xmlDoc = $codec->convertString( $xmlDoc ); 141 } 142 } 143 144 $xmlDoc = preg_replace( "#<\?.*?\?>#", "", $xmlDoc ); 145 146 // get document version 147 $xmlDoc = preg_replace( "%<\!DOCTYPE.*?>%is", "", $xmlDoc ); 148 149 // convert all newline types to unix newlines 150 $xmlDoc = preg_replace( "#\n|\r\n|\r#", "\n", $xmlDoc ); 151 152 // strip comments 153 $xmlDoc = $this->stripComments( $xmlDoc ); 154 155 // libxml compatible object creation 156 $domDocument = new eZDOMDocument( '', $params["SetParentNode"] ); 157 158 $this->DOMDocument =& $domDocument; 159 $currentNode =& $domDocument; 160 161 $defaultNamespace = ""; 162 163 $pos = 0; 164 $endTagPos = 0; 165 while ( $pos < strlen( $xmlDoc ) ) 166 { 167 $char = $xmlDoc[$pos]; 168 if ( $char == "<" ) 169 { 170 // find tag name 171 $endTagPos = strpos( $xmlDoc, ">", $pos ); 172 173 // tag name with attributes 174 $tagName = substr( $xmlDoc, $pos + 1, $endTagPos - ( $pos + 1 ) ); 175 176 // check if it's an endtag </tagname> 177 if ( $tagName[0] == "/" ) 178 { 179 $lastNodeArray = array_pop( $TagStack ); 180 $lastTag = $lastNodeArray["TagName"]; 181 182 $lastNode =& $lastNodeArray["ParentNodeObject"]; 183 184 unset( $currentNode ); 185 $currentNode =& $lastNode; 186 187 $tagName = substr( $tagName, 1, strlen( $tagName ) ); 188 189 // strip out namespace; nameSpace:Name 190 $colonPos = strpos( $tagName, ":" ); 191 192 if ( $colonPos > 0 ) 193 $tagName = substr( $tagName, $colonPos + 1, strlen( $tagName ) ); 194 195 if ( $lastTag != $tagName ) 196 { 197 eZDebug::writeError( "Error parsing XML, unmatched tags $tagName" ); 198 $retVal = false; 199 return $retVal; 200 } 201 else 202 { 203 // print( "endtag name: $tagName ending: $lastTag <br> " ); 204 } 205 } 206 else 207 { 208 $firstSpaceEnd = strpos( $tagName, " " ); 209 $firstNewlineEnd = strpos( $tagName, "\n" ); 210 211 if ( $firstNewlineEnd != false ) 212 { 213 if ( $firstSpaceEnd != false ) 214 { 215 $tagNameEnd = min( $firstSpaceEnd, $firstNewlineEnd ); 216 } 217 else 218 { 219 $tagNameEnd = $firstNewlineEnd; 220 } 221 } 222 else 223 { 224 if ( $firstSpaceEnd != false ) 225 { 226 $tagNameEnd = $firstSpaceEnd; 227 } 228 else 229 { 230 $tagNameEnd = 0; 231 } 232 } 233 234 if ( $tagNameEnd > 0 ) 235 { 236 $justName = substr( $tagName, 0, $tagNameEnd ); 237 } 238 else 239 $justName = $tagName; 240 241 242 // strip out the namespace prefix 243 // If $justname contains ![CDATA[ we should not set namespace prefix 244 $colonPos = strpos( $justName, "![CDATA[" ) === false ? strpos( $justName, ":" ) : false; 245 246 $prefix = ""; 247 if ( $colonPos > 0 ) 248 { 249 $prefix = substr( $justName, 0, $colonPos ); 250 $justName = substr( $justName, $colonPos + 1, strlen( $justName ) ); 251 } 252 253 254 // remove trailing / from the name if exists 255 if ( $justName[strlen($justName) - 1] == "/" ) 256 { 257 $justName = substr( $justName, 0, strlen( $justName ) - 1 ); 258 } 259 260 261 // create the new XML element node 262 unset( $subNode ); 263 $subNode = $domDocument->createElementNode( $justName ); 264 265 // find attributes 266 if ( $tagNameEnd > 0 ) 267 { 268 unset( $attributePart ); 269 $attributePart = substr( $tagName, $tagNameEnd, strlen( $tagName ) ); 270 271 // attributes 272 unset( $attr ); 273 $attr = $this->parseAttributes( $attributePart ); 274 275 if ( $attr != false ) 276 $subNode->Attributes =& $attr; 277 } 278 279 if ( $prefix != false ) 280 { 281 $subNode->Prefix = $prefix; 282 283 // find prefix 284 if ( isSet( $this->NamespaceArray[$prefix] ) ) 285 { 286 $subNode->setNamespaceURI( $this->NamespaceArray[$prefix] ); 287 } 288 else 289 { 290 eZDebug::writeError( "Namespace: $prefix not defined", "eZ xml" ); 291 } 292 } 293 else 294 { 295 // set the default namespace 296 if ( isset( $this->NamespaceStack[0] ) ) 297 { 298 $subNode->setNamespaceURI( $this->NamespaceStack[0] ); 299 } 300 } 301 302 // check for CDATA 303 $cdataSection = ""; 304 $isCDATASection = false; 305 $cdataPos = strpos( $xmlDoc, "<![CDATA[", $pos ); 306 if ( $cdataPos == $pos && $pos > 0) 307 { 308 $isCDATASection = true; 309 $endTagPos = strpos( $xmlDoc, "]]>", $cdataPos ); 310 if ( $endTagPos == false ) 311 { 312 eZDebug::writeError( "XML parser error: Closing tag \']]>\' for <![CDATA[ not found" , "eZ xml" ); 313 $endTagPos = strlen($xmlDoc); 314 } 315 $cdataSection = substr( $xmlDoc, $cdataPos + 9, $endTagPos - ( $cdataPos + 9 ) ); 316 317 // new CDATA node 318 $subNode->Name = $subNode->LocalName = "#cdata-section"; 319 $subNode->Content = $cdataSection; 320 $subNode->Type = EZ_NODE_TYPE_CDATASECTION; 321 322 $pos = $endTagPos; 323 $endTagPos += 2; 324 } 325 else 326 { 327 // element start tag 328 //$subNode->Name = $justName; 329 //$subNode->LocalName = $justName; 330 //$subNode->Type = EZ_NODE_TYPE_ELEMENT; 331 332 $domDocument->registerElement( $subNode ); 333 } 334 335 336 $currentNode->appendChild( $subNode ); 337 338 339 // check it it's a oneliner: <tagname /> or a cdata section 340 if ( $isCDATASection == false ) 341 if ( $tagName[strlen($tagName) - 1] != "/" ) 342 { 343 $TagStack[] = array( "TagName" => $justName, "ParentNodeObject" => &$currentNode ); 344 345 unset( $currentNode ); 346 $currentNode =& $subNode; 347 } 348 } 349 } 350 351 $pos = strpos( $xmlDoc, "<", $pos + 1 ); 352 353 if ( $pos == false ) 354 { 355 // end of document 356 $pos = strlen( $xmlDoc ); 357 } 358 else 359 { 360 // content tag 361 $tagContent = substr( $xmlDoc, $endTagPos + 1, $pos - ( $endTagPos + 1 ) ); 362 363 // Keep the whitespace consistent, parsing back and forward shouldn't change data 364 $tagContent = preg_replace( "#[\n]+[\s]*$#", "", $tagContent, 1 ); 365 366 if ( ( $params["TrimWhiteSpace"] == true and trim( $tagContent ) != "" ) or ( $params["TrimWhiteSpace"] == false and $tagContent != "" ) ) 367 { 368 // convert special chars 369 if ( $params["ConvertSpecialChars"] == true ) 370 { 371 $tagContent = str_replace(">", ">", $tagContent ); 372 $tagContent = str_replace("<", "<", $tagContent ); 373 $tagContent = str_replace("'", "'", $tagContent ); 374 $tagContent = str_replace(""", '"', $tagContent ); 375 $tagContent = str_replace("&", "&", $tagContent ); 376 } 377 378 unset( $subNode ); 379 $subNode = $domDocument->createTextNode( $tagContent ); 380 381 $domDocument->registerElement( $subNode ); 382 $currentNode->appendChild( $subNode ); 383 } 384 } 385 } 386 387 return $domDocument; 388 } 389 390 /*! 391 \static 392 \private 393 */ 394 function stripComments( &$str ) 395 { 396 return preg_replace( "#<\!--.*?-->#s", "", $str ); 397 } 398 399 /*! 400 \private 401 Parses the attributes. Returns false if no attributes in the supplied string is found. 402 */ 403 function parseAttributes( $attributeString ) 404 { 405 $ret = false; 406 407 preg_match_all( "/([a-zA-Z0-9:_-]+\s*=\s*(\"|').*?(\\2))/i", $attributeString, $attributeArray ); 408 409 foreach ( $attributeArray[0] as $attributePart ) 410 { 411 if ( trim( $attributePart ) != "" && trim( $attributePart ) != "/" ) 412 { 413 $attributeNamespaceURI = false; 414 $attributePrefix = false; 415 $attributeTmpArray = preg_split ("#\s*(=\s*(\"|'))#", $attributePart ); 416 417 $attributeName = $attributeTmpArray[0]; 418 419 // strip out namespace; nameSpace:Name 420 $colonPos = strpos( $attributeName, ":" ); 421 422 if ( $colonPos > 0 ) 423 { 424 $attributePrefix = substr( $attributeName, 0, $colonPos ); 425 $attributeName = substr( $attributeName, $colonPos + 1, strlen( $attributeName ) ); 426 } 427 else 428 { 429 $attributePrefix = false; 430 } 431 432 $attributeValue = $attributeTmpArray[1]; 433 434 // remove " from value part 435 $attributeValue = substr( $attributeValue, 0, strlen( $attributeValue ) - 1); 436 437 $attributeValue = str_replace( ">", ">", $attributeValue ); 438 $attributeValue = str_replace( "<", "<", $attributeValue ); 439 $attributeValue = str_replace( "'", "'", $attributeValue ); 440 $attributeValue = str_replace( """, '"', $attributeValue ); 441 $attributeValue = str_replace( "&", "&", $attributeValue ); 442 443 // check for namespace definition 444 if ( $attributePrefix == "xmlns" ) 445 { 446 $attributeNamespaceURI = $attributeValue; 447 $this->NamespaceArray[$attributeName] = $attributeValue; 448 449 $this->DOMDocument->registerNamespaceAlias( $attributeName, $attributeValue ); 450 } 451 452 // check for default namespace definition 453 if ( $attributeName == "xmlns" ) 454 { 455 $attributeNamespaceURI = $attributeValue; 456 457 // change the default namespace 458 $this->NamespaceStack[] = $attributeNamespaceURI; 459 } 460 461 unset( $attrNode ); 462 $attrNode = new eZDOMNode(); 463 $attrNode->Name = $attributeName; 464 465 if ( $attributePrefix != false && $attributePrefix != "xmlns" ) 466 { 467 $attrNode->Prefix = $attributePrefix; 468 $attrNode->LocalName = $attributeName; 469 470 // find prefix 471 if ( isSet( $this->NamespaceArray["$attributePrefix"] ) ) 472 { 473 $attrNode->NamespaceURI = $this->NamespaceArray["$attributePrefix"]; 474 } 475 else 476 { 477 eZDebug::writeError( "Namespace: $attributePrefix not found", "eZ xml" ); 478 } 479 } 480 else if ( $attributePrefix == "xmlns" ) 481 { 482 $attrNode->LocalName = $attributeName; 483 $attrNode->NamespaceURI = $attributeNamespaceURI; 484 $attrNode->Prefix = $attributePrefix; 485 } 486 else 487 { 488 // check for default namespace definition 489 if ( $attributeName == "xmlns" ) 490 { 491 $attrNode->LocalName = $attributeName; 492 $attrNode->NamespaceURI = $attributeNamespaceURI; 493 } 494 else 495 { 496 $attrNode->NamespaceURI = false; 497 $attrNode->LocalName = false; 498 } 499 $attrNode->Prefix = false; 500 } 501 502 $attrNode->Type = EZ_NODE_TYPE_ATTRIBUTE; 503 $attrNode->Content = $attributeValue; 504 505 506 $ret[] = $attrNode; 507 508 } 509 } 510 return $ret; 511 } 512 513 /// Contains the namespaces 514 var $NamespaceStack = array(); 515 516 /// Contains the available namespaces 517 var $NamespaceArray = array(); 518 519 /// Contains the current namespace 520 var $CurrentNameSpace; 521 522 /// Contains a reference to the DOM document object 523 var $DOMDocument; 524 } 525 526 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sat Feb 24 10:30:04 2007 | par Balluche grâce à PHPXref 0.7 |