[ Index ]
 

Code source de Dolibarr 2.0.1

Accédez au Source d'autres logiciels libres

Classes | Fonctions | Variables | Constantes | Tables

title

Body

[fermer]

/htdocs/includes/magpierss/ -> rss_fetch.inc (source)

   1  <?php
   2  /*
   3   * Project:     MagpieRSS: a simple RSS integration tool
   4   * File:        rss_fetch.inc, a simple functional interface
   5                   to fetching and parsing RSS files, via the
   6                  function fetch_rss()
   7   * Author:      Kellan Elliott-McCrea <kellan@protest.net>
   8   *              Modified by Laurent Destailleur <eldy@users.sourceforge.net> for Dolibarr
   9   * License:        GPL
  10   *
  11   * The lastest version of MagpieRSS can be obtained from:
  12   * http://magpierss.sourceforge.net
  13   *
  14   * For questions, help, comments, discussion, etc., please join the
  15   * Magpie mailing list:
  16   * magpierss-general@lists.sourceforge.net
  17   *
  18   */
  19   
  20  // Setup MAGPIE_DIR for use on hosts that don't include
  21  // the current path in include_path.
  22  // with thanks to rajiv and smarty
  23  if (!defined('DIR_SEP')) {
  24      define('DIR_SEP', DIRECTORY_SEPARATOR);
  25  }
  26  
  27  if (!defined('MAGPIE_DIR')) {
  28      define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
  29  }
  30  
  31  require_once ( MAGPIE_DIR . 'rss_parse.inc' );
  32  require_once ( MAGPIE_DIR . 'rss_cache.inc' );
  33  
  34  // for including 3rd party libraries
  35  define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
  36  require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
  37  
  38  
  39  /* 
  40   * CONSTANTS - redefine these in your script to change the
  41   * behaviour of fetch_rss() currently, most options effect the cache
  42   *
  43   * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? 
  44   * For me a built in cache was essential to creating a "PHP-like" 
  45   * feel to Magpie, see rss_cache.inc for rationale
  46   *
  47   *
  48   * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
  49   * This should be a location that the webserver can write to.   If this 
  50   * directory does not already exist Mapie will try to be smart and create 
  51   * it.  This will often fail for permissions reasons.
  52   *
  53   *
  54   * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
  55   *
  56   *
  57   * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
  58   * instead of returning stale object?
  59   *
  60   * MAGPIE_DEBUG - Display debugging notices?
  61   *
  62  */
  63  
  64  
  65  /*=======================================================================*\
  66      Function: fetch_rss: 
  67      Purpose:  return RSS object for the give url
  68                maintain the cache
  69      Input:      url of RSS file
  70      Output:      parsed RSS object (see rss_parse.inc)
  71  
  72      NOTES ON CACHEING:  
  73      If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
  74      
  75      NOTES ON RETRIEVING REMOTE FILES:
  76      If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
  77      return a cached object, and touch the cache object upon recieving a
  78      304.
  79      
  80      NOTES ON FAILED REQUESTS:
  81      If there is an HTTP error while fetching an RSS object, the cached
  82      version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
  83  \*=======================================================================*/
  84  
  85  define('MAGPIE_VERSION', '0.7');
  86  
  87  $MAGPIE_ERROR = "";
  88  
  89  function fetch_rss ($url) {
  90      // initialize constants
  91      init();
  92      
  93      if ( !isset($url) ) {
  94          error("fetch_rss called without a url");
  95          return false;
  96      }
  97  
  98      // if cache is disabled
  99      if ( !MAGPIE_CACHE_ON ) {
 100          // fetch file, and parse it
 101          $resp = _fetch_remote_file( $url );
 102          if ( is_success( $resp->status ) ) {
 103              return _response_to_rss( $resp );
 104          }
 105          else {
 106              error("Failed to fetch $url and cache is off");
 107              return false;
 108          }
 109      } 
 110      // else cache is ON
 111      else {
 112          // Flow
 113          // 1. check cache
 114          // 2. if there is a hit, make sure its fresh
 115          // 3. if cached obj fails freshness check, fetch remote
 116          // 4. if remote fails, return stale object, or error
 117          
 118          $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
 119          
 120          if (MAGPIE_DEBUG and $cache->ERROR) {
 121              debug($cache->ERROR, E_USER_WARNING);
 122          }
 123          
 124          
 125          $cache_status      = 0;        // response of check_cache
 126          $request_headers = array(); // HTTP headers to send with fetch
 127          $rss              = 0;        // parsed RSS object
 128          $errormsg         = 0;        // errors, if any
 129          
 130          // store parsed XML by desired output encoding
 131          // as character munging happens at parse time
 132          $cache_key       = $url . MAGPIE_OUTPUT_ENCODING;
 133          
 134          if (!$cache->ERROR) {
 135              // return cache HIT, MISS, or STALE
 136              $cache_status = $cache->check_cache( $cache_key);
 137          }
 138          
 139          // if object cached, and cache is fresh, return cached obj
 140          if ( $cache_status == 'HIT' ) {
 141              $rss = $cache->get( $cache_key );
 142              if ( isset($rss) and $rss ) {
 143                  // should be cache age
 144                  $rss->from_cache = 1;
 145                  if ( MAGPIE_DEBUG > 1) {
 146                  debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
 147              }
 148                  return $rss;
 149              }
 150          }
 151          
 152          // else attempt a conditional get
 153          
 154          // setup headers
 155          if ( $cache_status == 'STALE' ) {
 156              $rss = $cache->get( $cache_key );
 157              if ( $rss and $rss->etag and $rss->last_modified ) {
 158                  $request_headers['If-None-Match'] = $rss->etag;
 159                  $request_headers['If-Last-Modified'] = $rss->last_modified;
 160              }
 161          }
 162          
 163          $resp = _fetch_remote_file( $url, $request_headers );
 164  
 165          if (isset($resp) and $resp) {
 166              if ($resp->status == '304' ) {
 167                  // we have the most current copy
 168                  if ( MAGPIE_DEBUG > 1) {
 169                      debug("Got 304 for $url");
 170                  }
 171                  // reset cache on 304 (at minutillo insistent prodding)
 172                  $cache->set($cache_key, $rss);
 173                  return $rss;
 174              }
 175              elseif ( is_success( $resp->status ) ) {
 176                  $rss = _response_to_rss( $resp );
 177                  if ( $rss ) {
 178                      if (MAGPIE_DEBUG > 1) {
 179                          debug("Fetch successful");
 180                      }
 181                      // add object to cache
 182                      $cache->set( $cache_key, $rss );
 183                      return $rss;
 184                  }
 185              }
 186              else {
 187                  $errormsg = "Failed to fetch $url ";
 188                  if ( $resp->status == '-100' ) {
 189                      $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
 190                  }
 191                  elseif ( $resp->error ) {
 192                      # compensate for Snoopy's annoying habbit to tacking
 193                      # on '\n'
 194                      $http_error = substr($resp->error, 0, -2); 
 195                      
 196                      // LDR FIX BUG (plus necessaire car corrigé par ligne du dessus)
 197                      //$http_error = eregi_replace("\n","",$resp->error); 
 198  
 199                      $errormsg .= "(HTTP Error: $http_error)";
 200                  }
 201                  else {
 202                      $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
 203                  }
 204                  
 205                  // LDR FIX BUG Si echec recup http mais cache bien lu,
 206                  // on stock erreur dans object rss
 207                  if ($rss)
 208                  {
 209                      if ($cache && $cache_key) $rss->date=filemtime($cache->file_name($cache_key));
 210                  }
 211                  else
 212                  {
 213                      $rss=$cache;
 214                  }
 215                  $rss->ERROR=$errormsg;
 216              }
 217          }
 218          else {
 219              $errormsg = "Unable to retrieve RSS file for unknown reasons.";
 220          }
 221          
 222          // else fetch failed
 223          
 224          // attempt to return cached object
 225          if ($rss) {
 226              if ( MAGPIE_DEBUG ) {
 227                  debug("Returning STALE object for $url");
 228              }
 229              return $rss;
 230          }
 231  
 232          // else we totally failed
 233          error( $errormsg );    
 234          
 235          return false;
 236          
 237      } // end if ( !MAGPIE_CACHE_ON ) {
 238  } // end fetch_rss()
 239  
 240  /*=======================================================================*\
 241      Function:    error
 242      Purpose:    set MAGPIE_ERROR, and trigger error
 243  \*=======================================================================*/
 244  
 245  function error ($errormsg, $lvl=E_USER_WARNING) {
 246          global $MAGPIE_ERROR;
 247          
 248          // append PHP's error message if track_errors enabled
 249          if ( isset($php_errormsg) ) { 
 250              $errormsg .= " ($php_errormsg)";
 251          }
 252          if ( $errormsg ) {
 253              $errormsg = "MagpieRSS: $errormsg";
 254              $MAGPIE_ERROR = $errormsg;
 255              // LDR BUG FIX On affiche erreur que si en mode debug
 256              if ( MAGPIE_DEBUG ) {
 257                  trigger_error( $errormsg, $lvl);
 258              }
 259          }
 260  }
 261  
 262  function debug ($debugmsg, $lvl=E_USER_NOTICE) {
 263      // LDR BUG FIX On affiche erreur que si en mode debug
 264      if ( MAGPIE_DEBUG ) {
 265          trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
 266      }
 267  }
 268              
 269  /*=======================================================================*\
 270      Function:    magpie_error
 271      Purpose:    accessor for the magpie error variable
 272  \*=======================================================================*/
 273  function magpie_error ($errormsg="") {
 274      global $MAGPIE_ERROR;
 275      
 276      if ( isset($errormsg) and $errormsg ) { 
 277          $MAGPIE_ERROR = $errormsg;
 278      }
 279      
 280      return $MAGPIE_ERROR;    
 281  }
 282  
 283  /*=======================================================================*\
 284      Function:    _fetch_remote_file
 285      Purpose:    retrieve an arbitrary remote file
 286      Input:        url of the remote file
 287                  headers to send along with the request (optional)
 288      Output:        an HTTP response object (see Snoopy.class.inc)    
 289  \*=======================================================================*/
 290  function _fetch_remote_file ($url, $headers = "" ) {
 291      // Snoopy is an HTTP client in PHP
 292      $client = new Snoopy();
 293      $client->agent = MAGPIE_USER_AGENT;
 294      $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
 295      $client->use_gzip = MAGPIE_USE_GZIP;
 296      if (is_array($headers) ) {
 297          $client->rawheaders = $headers;
 298      }
 299  
 300      @$client->fetch($url);
 301      return $client;
 302  
 303  }
 304  
 305  /*=======================================================================*\
 306      Function:    _response_to_rss
 307      Purpose:    parse an HTTP response object into an RSS object
 308      Input:        an HTTP response object (see Snoopy)
 309      Output:        parsed RSS object (see rss_parse)
 310  \*=======================================================================*/
 311  function _response_to_rss ($resp) {
 312      $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
 313      
 314      // if RSS parsed successfully        
 315      if ( $rss and !$rss->ERROR) {
 316          
 317          // find Etag, and Last-Modified
 318          foreach($resp->headers as $h) {
 319              // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
 320              if (strpos($h, ": ")) {
 321                  list($field, $val) = explode(": ", $h, 2);
 322              }
 323              else {
 324                  $field = $h;
 325                  $val = "";
 326              }
 327              
 328              if ( $field == 'ETag' ) {
 329                  $rss->etag = $val;
 330              }
 331              
 332              if ( $field == 'Last-Modified' ) {
 333                  $rss->last_modified = $val;
 334              }
 335          }
 336          
 337          return $rss;    
 338      } // else construct error message
 339      else {
 340          $errormsg = "Failed to parse RSS file.";
 341          
 342          if ($rss) {
 343              $errormsg .= " (" . $rss->ERROR . ")";
 344          }
 345          error($errormsg);
 346          
 347          return false;
 348      } // end if ($rss and !$rss->error)
 349  }
 350  
 351  /*=======================================================================*\
 352      Function:    init
 353      Purpose:    setup constants with default values
 354                  check for user overrides
 355  \*=======================================================================*/
 356  function init () {
 357      if ( defined('MAGPIE_INITALIZED') ) {
 358          return;
 359      }
 360      else {
 361          define('MAGPIE_INITALIZED', true);
 362      }
 363      
 364      if ( !defined('MAGPIE_CACHE_ON') ) {
 365          define('MAGPIE_CACHE_ON', true);
 366      }
 367  
 368      if ( !defined('MAGPIE_CACHE_DIR') ) {
 369  
 370          // Le rep de cache est defini dans DOL_DATA_ROOT
 371          $ret=true;
 372          if (! file_exists(DOL_DATA_ROOT)) {
 373              $ret=mkdir(DOL_DATA_ROOT);
 374          }
 375          if ($ret && ! file_exists(DOL_DATA_ROOT.'/rsscache')) {
 376              $ret=mkdir(DOL_DATA_ROOT.'/rsscache');
 377          }
 378          define('MAGPIE_CACHE_DIR', DOL_DATA_ROOT.'/rsscache');
 379  
 380          // Si le rep de cache n'a pu etre trouvé ou créé, on utilise
 381          // l'ancien dans DOL_DOCUMENT_ROOT pour raison de compatibilite
 382          // avec anciennes versions
 383          if (! $ret) {
 384              define('MAGPIE_CACHE_DIR', DOL_DOCUMENT_ROOT.'/rsscache');
 385          }
 386  
 387      }
 388  
 389      if ( !defined('MAGPIE_CACHE_AGE') ) {
 390          define('MAGPIE_CACHE_AGE', 60*60); // one hour
 391      }
 392  
 393      if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
 394          define('MAGPIE_CACHE_FRESH_ONLY', false);
 395      }
 396  
 397      if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
 398          define('MAGPIE_OUTPUT_ENCODING', 'ISO-8859-1');
 399      }
 400      
 401      if ( !defined('MAGPIE_INPUT_ENCODING') ) {
 402          define('MAGPIE_INPUT_ENCODING', null);
 403      }
 404      
 405      if ( !defined('MAGPIE_DETECT_ENCODING') ) {
 406          define('MAGPIE_DETECT_ENCODING', true);
 407      }
 408      
 409      if ( !defined('MAGPIE_DEBUG') ) {
 410          define('MAGPIE_DEBUG', 0);
 411      }
 412      
 413      if ( !defined('MAGPIE_USER_AGENT') ) {
 414          $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
 415          
 416          if ( MAGPIE_CACHE_ON ) {
 417              $ua = $ua . ')';
 418          }
 419          else {
 420              $ua = $ua . '; No cache)';
 421          }
 422          
 423          define('MAGPIE_USER_AGENT', $ua);
 424      }
 425      
 426      if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
 427          define('MAGPIE_FETCH_TIME_OUT', 5);    // 5 second timeout
 428      }
 429      
 430      // use gzip encoding to fetch rss files if supported?
 431      if ( !defined('MAGPIE_USE_GZIP') ) {
 432          define('MAGPIE_USE_GZIP', true);    
 433      }
 434  }
 435  
 436  // NOTE: the following code should really be in Snoopy, or at least
 437  // somewhere other then rss_fetch!
 438  
 439  /*=======================================================================*\
 440      HTTP STATUS CODE PREDICATES
 441      These functions attempt to classify an HTTP status code
 442      based on RFC 2616 and RFC 2518.
 443      
 444      All of them take an HTTP status code as input, and return true or false
 445  
 446      All this code is adapted from LWP's HTTP::Status.
 447  \*=======================================================================*/
 448  
 449  
 450  /*=======================================================================*\
 451      Function:    is_info
 452      Purpose:    return true if Informational status code
 453  \*=======================================================================*/
 454  function is_info ($sc) { 
 455      return $sc >= 100 && $sc < 200; 
 456  }
 457  
 458  /*=======================================================================*\
 459      Function:    is_success
 460      Purpose:    return true if Successful status code
 461  \*=======================================================================*/
 462  function is_success ($sc) { 
 463      return $sc >= 200 && $sc < 300; 
 464  }
 465  
 466  /*=======================================================================*\
 467      Function:    is_redirect
 468      Purpose:    return true if Redirection status code
 469  \*=======================================================================*/
 470  function is_redirect ($sc) { 
 471      return $sc >= 300 && $sc < 400; 
 472  }
 473  
 474  /*=======================================================================*\
 475      Function:    is_error
 476      Purpose:    return true if Error status code
 477  \*=======================================================================*/
 478  function is_error ($sc) { 
 479      return $sc >= 400 && $sc < 600; 
 480  }
 481  
 482  /*=======================================================================*\
 483      Function:    is_client_error
 484      Purpose:    return true if Error status code, and its a client error
 485  \*=======================================================================*/
 486  function is_client_error ($sc) { 
 487      return $sc >= 400 && $sc < 500; 
 488  }
 489  
 490  /*=======================================================================*\
 491      Function:    is_client_error
 492      Purpose:    return true if Error status code, and its a server error
 493  \*=======================================================================*/
 494  function is_server_error ($sc) { 
 495      return $sc >= 500 && $sc < 600; 
 496  }
 497  
 498  ?>


Généré le : Mon Nov 26 12:29:37 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics