[ Index ] |
|
Code source de vtiger CRM 5.0.2 |
1 <?php 2 3 /** 4 * Project: MagpieRSS: a simple RSS integration tool 5 * File: rss_parse.inc - parse an RSS or Atom feed 6 * return as a simple object. 7 * 8 * Handles RSS 0.9x, RSS 2.0, RSS 1.0, and Atom 0.3 9 * 10 * The lastest version of MagpieRSS can be obtained from: 11 * http://magpierss.sourceforge.net 12 * 13 * For questions, help, comments, discussion, etc., please join the 14 * Magpie mailing list: 15 * magpierss-general@lists.sourceforge.net 16 * 17 * @author Kellan Elliott-McCrea <kellan@protest.net> 18 * @version 0.7a 19 * @license GPL 20 * 21 */ 22 23 define('RSS', 'RSS'); 24 define('ATOM', 'Atom'); 25 26 require_once (MAGPIE_DIR . 'rss_utils.inc'); 27 28 /** 29 * Hybrid parser, and object, takes RSS as a string and returns a simple object. 30 * 31 * see: rss_fetch.inc for a simpler interface with integrated caching support 32 * 33 */ 34 class MagpieRSS { 35 var $parser; 36 37 var $current_item = array(); // item currently being parsed 38 var $items = array(); // collection of parsed items 39 var $channel = array(); // hash of channel fields 40 var $textinput = array(); 41 var $image = array(); 42 var $feed_type; 43 var $feed_version; 44 var $encoding = ''; // output encoding of parsed rss 45 46 var $_source_encoding = ''; // only set if we have to parse xml prolog 47 48 var $ERROR = ""; 49 var $WARNING = ""; 50 51 // define some constants 52 53 var $_CONTENT_CONSTRUCTS = array('content', 'summary', 'info', 'title', 'tagline', 'copyright'); 54 var $_KNOWN_ENCODINGS = array('UTF-8', 'US-ASCII', 'ISO-8859-1'); 55 56 // parser variables, useless if you're not a parser, treat as private 57 var $stack = array(); // parser stack 58 var $inchannel = false; 59 var $initem = false; 60 var $incontent = false; // if in Atom <content mode="xml"> field 61 var $intextinput = false; 62 var $inimage = false; 63 var $current_namespace = false; 64 65 66 /** 67 * Set up XML parser, parse source, and return populated RSS object.. 68 * 69 * @param string $source string containing the RSS to be parsed 70 * 71 * NOTE: Probably a good idea to leave the encoding options alone unless 72 * you know what you're doing as PHP's character set support is 73 * a little weird. 74 * 75 * NOTE: A lot of this is unnecessary but harmless with PHP5 76 * 77 * 78 * @param string $output_encoding output the parsed RSS in this character 79 * set defaults to ISO-8859-1 as this is PHP's 80 * default. 81 * 82 * NOTE: might be changed to UTF-8 in future 83 * versions. 84 * 85 * @param string $input_encoding the character set of the incoming RSS source. 86 * Leave blank and Magpie will try to figure it 87 * out. 88 * 89 * 90 * @param bool $detect_encoding if false Magpie won't attempt to detect 91 * source encoding. (caveat emptor) 92 * 93 */ 94 function MagpieRSS ($source, $output_encoding='ISO-8859-1', 95 $input_encoding=null, $detect_encoding=true) 96 { 97 # if PHP xml isn't compiled in, die 98 # 99 if (!function_exists('xml_parser_create')) { 100 $this->error( "Failed to load PHP's XML Extension. " . 101 "http://www.php.net/manual/en/ref.xml.php", 102 E_USER_ERROR ); 103 } 104 105 list($parser, $source) = $this->create_parser($source, 106 $output_encoding, $input_encoding, $detect_encoding); 107 108 109 if (!is_resource($parser)) { 110 $this->error( "Failed to create an instance of PHP's XML parser. " . 111 "http://www.php.net/manual/en/ref.xml.php", 112 E_USER_ERROR ); 113 } 114 115 116 $this->parser = $parser; 117 118 # pass in parser, and a reference to this object 119 # setup handlers 120 # 121 xml_set_object( $this->parser, $this ); 122 xml_set_element_handler($this->parser, 123 'feed_start_element', 'feed_end_element' ); 124 125 xml_set_character_data_handler( $this->parser, 'feed_cdata' ); 126 127 $status = xml_parse( $this->parser, $source ); 128 129 if (! $status ) { 130 $errorcode = xml_get_error_code( $this->parser ); 131 if ( $errorcode != XML_ERROR_NONE ) { 132 $xml_error = xml_error_string( $errorcode ); 133 $error_line = xml_get_current_line_number($this->parser); 134 $error_col = xml_get_current_column_number($this->parser); 135 $errormsg = "$xml_error at line $error_line, column $error_col"; 136 137 $this->error( $errormsg ); 138 } 139 } 140 141 xml_parser_free( $this->parser ); 142 143 $this->normalize(); 144 } 145 146 function feed_start_element($p, $element, &$attrs) { 147 $el = $element = strtolower($element); 148 $attrs = array_change_key_case($attrs, CASE_LOWER); 149 150 // check for a namespace, and split if found 151 $ns = false; 152 if ( strpos( $element, ':' ) ) { 153 list($ns, $el) = split( ':', $element, 2); 154 } 155 if ( $ns and $ns != 'rdf' ) { 156 $this->current_namespace = $ns; 157 } 158 159 # if feed type isn't set, then this is first element of feed 160 # identify feed from root element 161 # 162 if (!isset($this->feed_type) ) { 163 if ( $el == 'rdf' ) { 164 $this->feed_type = RSS; 165 $this->feed_version = '1.0'; 166 } 167 elseif ( $el == 'rss' ) { 168 $this->feed_type = RSS; 169 $this->feed_version = $attrs['version']; 170 } 171 elseif ( $el == 'feed' ) { 172 $this->feed_type = ATOM; 173 $this->feed_version = $attrs['version']; 174 $this->inchannel = true; 175 } 176 return; 177 } 178 179 if ( $el == 'channel' ) 180 { 181 $this->inchannel = true; 182 } 183 elseif ($el == 'item' or $el == 'entry' ) 184 { 185 $this->initem = true; 186 if ( isset($attrs['rdf:about']) ) { 187 $this->current_item['about'] = $attrs['rdf:about']; 188 } 189 } 190 191 // if we're in the default namespace of an RSS feed, 192 // record textinput or image fields 193 elseif ( 194 $this->feed_type == RSS and 195 $this->current_namespace == '' and 196 $el == 'textinput' ) 197 { 198 $this->intextinput = true; 199 } 200 201 elseif ( 202 $this->feed_type == RSS and 203 $this->current_namespace == '' and 204 $el == 'image' ) 205 { 206 $this->inimage = true; 207 } 208 209 # handle atom content constructs 210 elseif ( $this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 211 { 212 // avoid clashing w/ RSS mod_content 213 if ($el == 'content' ) { 214 $el = 'atom_content'; 215 } 216 217 $this->incontent = $el; 218 219 220 } 221 222 // if inside an Atom content construct (e.g. content or summary) field treat tags as text 223 elseif ($this->feed_type == ATOM and $this->incontent ) 224 { 225 // if tags are inlined, then flatten 226 $attrs_str = join(' ', 227 array_map('map_attrs', 228 array_keys($attrs), 229 array_values($attrs) ) ); 230 231 $this->append_content( "<$element $attrs_str>" ); 232 233 array_unshift( $this->stack, $el ); 234 } 235 236 // Atom support many links per containging element. 237 // Magpie treats link elements of type rel='alternate' 238 // as being equivalent to RSS's simple link element. 239 // 240 elseif ($this->feed_type == ATOM and $el == 'link' ) 241 { 242 if ( isset($attrs['rel']) and $attrs['rel'] == 'alternate' ) 243 { 244 $link_el = 'link'; 245 } 246 else { 247 $link_el = 'link_' . $attrs['rel']; 248 } 249 250 $this->append($link_el, $attrs['href']); 251 } 252 // set stack[0] to current element 253 else { 254 array_unshift($this->stack, $el); 255 } 256 } 257 258 259 260 function feed_cdata ($p, $text) { 261 if ($this->feed_type == ATOM and $this->incontent) 262 { 263 $this->append_content( $text ); 264 } 265 else { 266 $current_el = join('_', array_reverse($this->stack)); 267 $this->append($current_el, $text); 268 } 269 } 270 271 function feed_end_element ($p, $el) { 272 $el = strtolower($el); 273 274 if ( $el == 'item' or $el == 'entry' ) 275 { 276 $this->items[] = $this->current_item; 277 $this->current_item = array(); 278 $this->initem = false; 279 } 280 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'textinput' ) 281 { 282 $this->intextinput = false; 283 } 284 elseif ($this->feed_type == RSS and $this->current_namespace == '' and $el == 'image' ) 285 { 286 $this->inimage = false; 287 } 288 elseif ($this->feed_type == ATOM and in_array($el, $this->_CONTENT_CONSTRUCTS) ) 289 { 290 $this->incontent = false; 291 } 292 elseif ($el == 'channel' or $el == 'feed' ) 293 { 294 $this->inchannel = false; 295 } 296 elseif ($this->feed_type == ATOM and $this->incontent ) { 297 // balance tags properly 298 // note: i don't think this is actually neccessary 299 if ( $this->stack[0] == $el ) 300 { 301 $this->append_content("</$el>"); 302 } 303 else { 304 $this->append_content("<$el />"); 305 } 306 307 array_shift( $this->stack ); 308 } 309 else { 310 array_shift( $this->stack ); 311 } 312 313 $this->current_namespace = false; 314 } 315 316 function concat (&$str1, $str2="") { 317 if (!isset($str1) ) { 318 $str1=""; 319 } 320 $str1 .= $str2; 321 } 322 323 324 325 function append_content($text) { 326 if ( $this->initem ) { 327 $this->concat( $this->current_item[ $this->incontent ], $text ); 328 } 329 elseif ( $this->inchannel ) { 330 $this->concat( $this->channel[ $this->incontent ], $text ); 331 } 332 } 333 334 // smart append - field and namespace aware 335 function append($el, $text) { 336 if (!$el) { 337 return; 338 } 339 if ( $this->current_namespace ) 340 { 341 if ( $this->initem ) { 342 $this->concat( 343 $this->current_item[ $this->current_namespace ][ $el ], $text); 344 } 345 elseif ($this->inchannel) { 346 $this->concat( 347 $this->channel[ $this->current_namespace][ $el ], $text ); 348 } 349 elseif ($this->intextinput) { 350 $this->concat( 351 $this->textinput[ $this->current_namespace][ $el ], $text ); 352 } 353 elseif ($this->inimage) { 354 $this->concat( 355 $this->image[ $this->current_namespace ][ $el ], $text ); 356 } 357 } 358 else { 359 if ( $this->initem ) { 360 $this->concat( 361 $this->current_item[ $el ], $text); 362 } 363 elseif ($this->intextinput) { 364 $this->concat( 365 $this->textinput[ $el ], $text ); 366 } 367 elseif ($this->inimage) { 368 $this->concat( 369 $this->image[ $el ], $text ); 370 } 371 elseif ($this->inchannel) { 372 $this->concat( 373 $this->channel[ $el ], $text ); 374 } 375 376 } 377 } 378 379 function normalize () { 380 // if atom populate rss fields 381 if ( $this->is_atom() ) { 382 $this->channel['description'] = $this->channel['tagline']; 383 for ( $i = 0; $i < count($this->items); $i++) { 384 $item = $this->items[$i]; 385 if ( isset($item['summary']) ) 386 $item['description'] = $item['summary']; 387 if ( isset($item['atom_content'])) 388 $item['content']['encoded'] = $item['atom_content']; 389 390 $atom_date = (isset($item['issued']) ) ? $item['issued'] : $item['modified']; 391 if ( $atom_date ) { 392 $epoch = @parse_w3cdtf($atom_date); 393 if ($epoch and $epoch > 0) { 394 $item['date_timestamp'] = $epoch; 395 } 396 } 397 398 $this->items[$i] = $item; 399 } 400 } 401 elseif ( $this->is_rss() ) { 402 $this->channel['tagline'] = $this->channel['description']; 403 for ( $i = 0; $i < count($this->items); $i++) { 404 $item = $this->items[$i]; 405 if ( isset($item['description'])) 406 $item['summary'] = $item['description']; 407 if ( isset($item['content']['encoded'] ) ) 408 $item['atom_content'] = $item['content']['encoded']; 409 410 if ( $this->is_rss() == '1.0' and isset($item['dc']['date']) ) { 411 $epoch = @parse_w3cdtf($item['dc']['date']); 412 if ($epoch and $epoch > 0) { 413 $item['date_timestamp'] = $epoch; 414 } 415 } 416 elseif ( isset($item['pubdate']) ) { 417 $epoch = @strtotime($item['pubdate']); 418 if ($epoch > 0) { 419 $item['date_timestamp'] = $epoch; 420 } 421 } 422 423 $this->items[$i] = $item; 424 } 425 } 426 } 427 428 429 function is_rss () { 430 if ( $this->feed_type == RSS ) { 431 return $this->feed_version; 432 } 433 else { 434 return false; 435 } 436 } 437 438 function is_atom() { 439 if ( $this->feed_type == ATOM ) { 440 return $this->feed_version; 441 } 442 else { 443 return false; 444 } 445 } 446 447 /** 448 * return XML parser, and possibly re-encoded source 449 * 450 */ 451 function create_parser($source, $out_enc, $in_enc, $detect) { 452 if ( substr(phpversion(),0,1) == 5) { 453 $parser = $this->php5_create_parser($in_enc, $detect); 454 } 455 else { 456 list($parser, $source) = $this->php4_create_parser($source, $in_enc, $detect); 457 } 458 if ($out_enc) { 459 $this->encoding = $out_enc; 460 xml_parser_set_option($parser, XML_OPTION_TARGET_ENCODING, $out_enc); 461 } 462 463 return array($parser, $source); 464 } 465 466 /** 467 * Instantiate an XML parser under PHP5 468 * 469 * PHP5 will do a fine job of detecting input encoding 470 * if passed an empty string as the encoding. 471 * 472 * All hail libxml2! 473 * 474 */ 475 function php5_create_parser($in_enc, $detect) { 476 // by default php5 does a fine job of detecting input encodings 477 if(!$detect && $in_enc) { 478 return xml_parser_create($in_enc); 479 } 480 else { 481 return xml_parser_create(''); 482 } 483 } 484 485 /** 486 * Instaniate an XML parser under PHP4 487 * 488 * Unfortunately PHP4's support for character encodings 489 * and especially XML and character encodings sucks. As 490 * long as the documents you parse only contain characters 491 * from the ISO-8859-1 character set (a superset of ASCII, 492 * and a subset of UTF-8) you're fine. However once you 493 * step out of that comfy little world things get mad, bad, 494 * and dangerous to know. 495 * 496 * The following code is based on SJM's work with FoF 497 * @see http://minutillo.com/steve/weblog/2004/6/17/php-xml-and-character-encodings-a-tale-of-sadness-rage-and-data-loss 498 * 499 */ 500 function php4_create_parser($source, $in_enc, $detect) { 501 if ( !$detect ) { 502 return array(xml_parser_create($in_enc), $source); 503 } 504 505 if (!$in_enc) { 506 if (preg_match('/<?xml.*encoding=[\'"](.*?)[\'"].*?>/m', $source, $m)) { 507 $in_enc = strtoupper($m[1]); 508 $this->source_encoding = $in_enc; 509 } 510 else { 511 $in_enc = 'UTF-8'; 512 } 513 } 514 515 if ($this->known_encoding($in_enc)) { 516 return array(xml_parser_create($in_enc), $source); 517 } 518 519 // the dectected encoding is not one of the simple encodings PHP knows 520 521 // attempt to use the iconv extension to 522 // cast the XML to a known encoding 523 // @see http://php.net/iconv 524 525 if (function_exists('iconv')) { 526 $encoded_source = iconv($in_enc,'UTF-8', $source); 527 if ($encoded_source) { 528 return array(xml_parser_create('UTF-8'), $encoded_source); 529 } 530 } 531 532 // iconv didn't work, try mb_convert_encoding 533 // @see http://php.net/mbstring 534 if(function_exists('mb_convert_encoding')) { 535 $encoded_source = mb_convert_encoding($source, 'UTF-8', $in_enc ); 536 if ($encoded_source) { 537 return array(xml_parser_create('UTF-8'), $encoded_source); 538 } 539 } 540 541 // else 542 $this->error("Feed is in an unsupported character encoding. ($in_enc) " . 543 "You may see strange artifacts, and mangled characters.", 544 E_USER_NOTICE); 545 546 return array(xml_parser_create(), $source); 547 } 548 549 function known_encoding($enc) { 550 $enc = strtoupper($enc); 551 if ( in_array($enc, $this->_KNOWN_ENCODINGS) ) { 552 return $enc; 553 } 554 else { 555 return false; 556 } 557 } 558 559 function error ($errormsg, $lvl=E_USER_WARNING) { 560 // append PHP's error message if track_errors enabled 561 if ( isset($php_errormsg) ) { 562 $errormsg .= " ($php_errormsg)"; 563 } 564 if ( MAGPIE_DEBUG ) { 565 trigger_error( $errormsg, $lvl); 566 } 567 else { 568 error_log( $errormsg, 0); 569 } 570 571 $notices = E_USER_NOTICE|E_NOTICE; 572 if ( $lvl&$notices ) { 573 $this->WARNING = $errormsg; 574 } else { 575 $this->ERROR = $errormsg; 576 } 577 } 578 579 580 } // end class RSS 581 582 function map_attrs($k, $v) { 583 return "$k=\"$v\""; 584 } 585 586 // patch to support medieval versions of PHP4.1.x, 587 // courtesy, Ryan Currie, ryan@digibliss.com 588 589 if (!function_exists('array_change_key_case')) { 590 define("CASE_UPPER",1); 591 define("CASE_LOWER",0); 592 593 594 function array_change_key_case($array,$case=CASE_LOWER) { 595 if ($case=CASE_LOWER) $cmd=strtolower; 596 elseif ($case=CASE_UPPER) $cmd=strtoupper; 597 foreach($array as $key=>$value) { 598 $output[$cmd($key)]=$value; 599 } 600 return $output; 601 } 602 603 } 604 605 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 10:22:19 2007 | par Balluche grâce à PHPXref 0.7 |