[ Index ] |
|
Code source de vtiger CRM 5.0.2 |
1 <?php 2 /* 3 * Project: MagpieRSS: a simple RSS integration tool 4 * File: rss_fetch.inc, a simple functional interface 5 to fetching and parsing RSS files, via the 6 function fetch_rss() 7 * Author: Kellan Elliott-McCrea <kellan@protest.net> 8 * License: GPL 9 * 10 * The lastest version of MagpieRSS can be obtained from: 11 * http://magpierss.sourceforge.net 12 * 13 * For questions, help, comments, discussion, etc., please join the 14 * Magpie mailing list: 15 * magpierss-general@lists.sourceforge.net 16 * 17 */ 18 19 // Setup MAGPIE_DIR for use on hosts that don't include 20 // the current path in include_path. 21 // with thanks to rajiv and smarty 22 if (!defined('DIR_SEP')) { 23 define('DIR_SEP', DIRECTORY_SEPARATOR); 24 } 25 26 if (!defined('MAGPIE_DIR')) { 27 define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP); 28 } 29 30 require_once ( MAGPIE_DIR . 'rss_parse.inc' ); 31 require_once ( MAGPIE_DIR . 'rss_cache.inc' ); 32 33 // for including 3rd party libraries 34 define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP); 35 require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc'); 36 37 38 /* 39 * CONSTANTS - redefine these in your script to change the 40 * behaviour of fetch_rss() currently, most options effect the cache 41 * 42 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? 43 * For me a built in cache was essential to creating a "PHP-like" 44 * feel to Magpie, see rss_cache.inc for rationale 45 * 46 * 47 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects? 48 * This should be a location that the webserver can write to. If this 49 * directory does not already exist Mapie will try to be smart and create 50 * it. This will often fail for permissions reasons. 51 * 52 * 53 * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds. 54 * 55 * 56 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error 57 * instead of returning stale object? 58 * 59 * MAGPIE_DEBUG - Display debugging notices? 60 * 61 */ 62 63 64 /*=======================================================================*\ 65 Function: fetch_rss: 66 Purpose: return RSS object for the give url 67 maintain the cache 68 Input: url of RSS file 69 Output: parsed RSS object (see rss_parse.inc) 70 71 NOTES ON CACHEING: 72 If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache. 73 74 NOTES ON RETRIEVING REMOTE FILES: 75 If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will 76 return a cached object, and touch the cache object upon recieving a 77 304. 78 79 NOTES ON FAILED REQUESTS: 80 If there is an HTTP error while fetching an RSS object, the cached 81 version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off) 82 \*=======================================================================*/ 83 84 define('MAGPIE_VERSION', '0.72'); 85 86 $MAGPIE_ERROR = ""; 87 88 function fetch_rss ($url) { 89 // initialize constants 90 init(); 91 92 if ( !isset($url) ) { 93 error("fetch_rss called without a url"); 94 return false; 95 } 96 97 // if cache is disabled 98 if ( !MAGPIE_CACHE_ON ) { 99 // fetch file, and parse it 100 $resp = _fetch_remote_file( $url ); 101 if ( is_success( $resp->status ) ) { 102 return _response_to_rss( $resp ); 103 } 104 else { 105 error("Failed to fetch $url and cache is off"); 106 return false; 107 } 108 } 109 // else cache is ON 110 else { 111 // Flow 112 // 1. check cache 113 // 2. if there is a hit, make sure its fresh 114 // 3. if cached obj fails freshness check, fetch remote 115 // 4. if remote fails, return stale object, or error 116 117 $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE ); 118 119 if (MAGPIE_DEBUG and $cache->ERROR) { 120 debug($cache->ERROR, E_USER_WARNING); 121 } 122 123 124 $cache_status = 0; // response of check_cache 125 $request_headers = array(); // HTTP headers to send with fetch 126 $rss = 0; // parsed RSS object 127 $errormsg = 0; // errors, if any 128 129 // store parsed XML by desired output encoding 130 // as character munging happens at parse time 131 $cache_key = $url . MAGPIE_OUTPUT_ENCODING; 132 133 if (!$cache->ERROR) { 134 // return cache HIT, MISS, or STALE 135 $cache_status = $cache->check_cache( $cache_key); 136 } 137 138 // if object cached, and cache is fresh, return cached obj 139 if ( $cache_status == 'HIT' ) { 140 $rss = $cache->get( $cache_key ); 141 if ( isset($rss) and $rss ) { 142 // should be cache age 143 $rss->from_cache = 1; 144 if ( MAGPIE_DEBUG > 1) { 145 debug("MagpieRSS: Cache HIT", E_USER_NOTICE); 146 } 147 return $rss; 148 } 149 } 150 151 // else attempt a conditional get 152 153 // setup headers 154 if ( $cache_status == 'STALE' ) { 155 $rss = $cache->get( $cache_key ); 156 if ( $rss and $rss->etag and $rss->last_modified ) { 157 $request_headers['If-None-Match'] = $rss->etag; 158 $request_headers['If-Last-Modified'] = $rss->last_modified; 159 } 160 } 161 162 $resp = _fetch_remote_file( $url, $request_headers ); 163 164 if (isset($resp) and $resp) { 165 if ($resp->status == '304' ) { 166 // we have the most current copy 167 if ( MAGPIE_DEBUG > 1) { 168 debug("Got 304 for $url"); 169 } 170 // reset cache on 304 (at minutillo insistent prodding) 171 $cache->set($cache_key, $rss); 172 return $rss; 173 } 174 elseif ( is_success( $resp->status ) ) { 175 $rss = _response_to_rss( $resp ); 176 if ( $rss ) { 177 if (MAGPIE_DEBUG > 1) { 178 debug("Fetch successful"); 179 } 180 // add object to cache 181 $cache->set( $cache_key, $rss ); 182 return $rss; 183 } 184 } 185 else { 186 $errormsg = "Failed to fetch $url "; 187 if ( $resp->status == '-100' ) { 188 $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)"; 189 } 190 elseif ( $resp->error ) { 191 # compensate for Snoopy's annoying habbit to tacking 192 # on '\n' 193 $http_error = substr($resp->error, 0, -2); 194 $errormsg .= "(HTTP Error: $http_error)"; 195 } 196 else { 197 $errormsg .= "(HTTP Response: " . $resp->response_code .')'; 198 } 199 } 200 } 201 else { 202 $errormsg = "Unable to retrieve RSS file for unknown reasons."; 203 } 204 205 // else fetch failed 206 207 // attempt to return cached object 208 if ($rss) { 209 if ( MAGPIE_DEBUG ) { 210 debug("Returning STALE object for $url"); 211 } 212 return $rss; 213 } 214 215 // else we totally failed 216 error( $errormsg ); 217 218 return false; 219 220 } // end if ( !MAGPIE_CACHE_ON ) { 221 } // end fetch_rss() 222 223 /*=======================================================================*\ 224 Function: error 225 Purpose: set MAGPIE_ERROR, and trigger error 226 \*=======================================================================*/ 227 228 function error ($errormsg, $lvl=E_USER_WARNING) { 229 global $MAGPIE_ERROR; 230 231 // append PHP's error message if track_errors enabled 232 if ( isset($php_errormsg) ) { 233 $errormsg .= " ($php_errormsg)"; 234 } 235 if ( $errormsg ) { 236 $errormsg = "MagpieRSS: $errormsg"; 237 $MAGPIE_ERROR = $errormsg; 238 //trigger_error( $errormsg, $lvl); 239 } 240 } 241 242 function debug ($debugmsg, $lvl=E_USER_NOTICE) { 243 trigger_error("MagpieRSS [debug] $debugmsg", $lvl); 244 } 245 246 /*=======================================================================*\ 247 Function: magpie_error 248 Purpose: accessor for the magpie error variable 249 \*=======================================================================*/ 250 function magpie_error ($errormsg="") { 251 global $MAGPIE_ERROR; 252 253 if ( isset($errormsg) and $errormsg ) { 254 $MAGPIE_ERROR = $errormsg; 255 } 256 257 return $MAGPIE_ERROR; 258 } 259 260 /*=======================================================================*\ 261 Function: _fetch_remote_file 262 Purpose: retrieve an arbitrary remote file 263 Input: url of the remote file 264 headers to send along with the request (optional) 265 Output: an HTTP response object (see Snoopy.class.inc) 266 \*=======================================================================*/ 267 function _fetch_remote_file ($url, $headers = "" ) { 268 // Snoopy is an HTTP client in PHP 269 $client = new Snoopy(); 270 $client->agent = MAGPIE_USER_AGENT; 271 $client->read_timeout = MAGPIE_FETCH_TIME_OUT; 272 $client->use_gzip = MAGPIE_USE_GZIP; 273 if (is_array($headers) ) { 274 $client->rawheaders = $headers; 275 } 276 277 $client->fetch($url); 278 return $client; 279 280 } 281 282 /*=======================================================================*\ 283 Function: _response_to_rss 284 Purpose: parse an HTTP response object into an RSS object 285 Input: an HTTP response object (see Snoopy) 286 Output: parsed RSS object (see rss_parse) 287 \*=======================================================================*/ 288 function _response_to_rss ($resp) { 289 $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING ); 290 291 // if RSS parsed successfully 292 if ( $rss and !$rss->ERROR) { 293 294 // find Etag, and Last-Modified 295 foreach($resp->headers as $h) { 296 // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1" 297 if (strpos($h, ": ")) { 298 list($field, $val) = explode(": ", $h, 2); 299 } 300 else { 301 $field = $h; 302 $val = ""; 303 } 304 305 if ( $field == 'ETag' ) { 306 $rss->etag = $val; 307 } 308 309 if ( $field == 'Last-Modified' ) { 310 $rss->last_modified = $val; 311 } 312 } 313 314 return $rss; 315 } // else construct error message 316 else { 317 $errormsg = "Failed to parse RSS file."; 318 319 if ($rss) { 320 $errormsg .= " (" . $rss->ERROR . ")"; 321 } 322 error($errormsg); 323 324 return false; 325 } // end if ($rss and !$rss->error) 326 } 327 328 /*=======================================================================*\ 329 Function: init 330 Purpose: setup constants with default values 331 check for user overrides 332 \*=======================================================================*/ 333 function init () { 334 if ( defined('MAGPIE_INITALIZED') ) { 335 return; 336 } 337 else { 338 define('MAGPIE_INITALIZED', true); 339 } 340 341 if ( !defined('MAGPIE_CACHE_ON') ) { 342 define('MAGPIE_CACHE_ON', true); 343 } 344 345 if ( !defined('MAGPIE_CACHE_DIR') ) { 346 define('MAGPIE_CACHE_DIR', './cache'); 347 } 348 349 if ( !defined('MAGPIE_CACHE_AGE') ) { 350 define('MAGPIE_CACHE_AGE', 60*60); // one hour 351 } 352 353 if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) { 354 define('MAGPIE_CACHE_FRESH_ONLY', false); 355 } 356 357 if ( !defined('MAGPIE_OUTPUT_ENCODING') ) { 358 define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1'); 359 } 360 361 if ( !defined('MAGPIE_INPUT_ENCODING') ) { 362 define('MAGPIE_INPUT_ENCODING', null); 363 } 364 365 if ( !defined('MAGPIE_DETECT_ENCODING') ) { 366 define('MAGPIE_DETECT_ENCODING', true); 367 } 368 369 if ( !defined('MAGPIE_DEBUG') ) { 370 define('MAGPIE_DEBUG', 0); 371 } 372 373 if ( !defined('MAGPIE_USER_AGENT') ) { 374 $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net'; 375 376 if ( MAGPIE_CACHE_ON ) { 377 $ua = $ua . ')'; 378 } 379 else { 380 $ua = $ua . '; No cache)'; 381 } 382 383 define('MAGPIE_USER_AGENT', $ua); 384 } 385 386 if ( !defined('MAGPIE_FETCH_TIME_OUT') ) { 387 define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout 388 } 389 390 // use gzip encoding to fetch rss files if supported? 391 if ( !defined('MAGPIE_USE_GZIP') ) { 392 define('MAGPIE_USE_GZIP', true); 393 } 394 } 395 396 // NOTE: the following code should really be in Snoopy, or at least 397 // somewhere other then rss_fetch! 398 399 /*=======================================================================*\ 400 HTTP STATUS CODE PREDICATES 401 These functions attempt to classify an HTTP status code 402 based on RFC 2616 and RFC 2518. 403 404 All of them take an HTTP status code as input, and return true or false 405 406 All this code is adapted from LWP's HTTP::Status. 407 \*=======================================================================*/ 408 409 410 /*=======================================================================*\ 411 Function: is_info 412 Purpose: return true if Informational status code 413 \*=======================================================================*/ 414 function is_info ($sc) { 415 return $sc >= 100 && $sc < 200; 416 } 417 418 /*=======================================================================*\ 419 Function: is_success 420 Purpose: return true if Successful status code 421 \*=======================================================================*/ 422 function is_success ($sc) { 423 return $sc >= 200 && $sc < 300; 424 } 425 426 /*=======================================================================*\ 427 Function: is_redirect 428 Purpose: return true if Redirection status code 429 \*=======================================================================*/ 430 function is_redirect ($sc) { 431 return $sc >= 300 && $sc < 400; 432 } 433 434 /*=======================================================================*\ 435 Function: is_error 436 Purpose: return true if Error status code 437 \*=======================================================================*/ 438 function is_error ($sc) { 439 return $sc >= 400 && $sc < 600; 440 } 441 442 /*=======================================================================*\ 443 Function: is_client_error 444 Purpose: return true if Error status code, and its a client error 445 \*=======================================================================*/ 446 function is_client_error ($sc) { 447 return $sc >= 400 && $sc < 500; 448 } 449 450 /*=======================================================================*\ 451 Function: is_client_error 452 Purpose: return true if Error status code, and its a server error 453 \*=======================================================================*/ 454 function is_server_error ($sc) { 455 return $sc >= 500 && $sc < 600; 456 } 457 458 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 10:22:19 2007 | par Balluche grâce à PHPXref 0.7 |