SPIP 1.9.2c : /ecrire/inc/feedfinder.php source

[Sommaire] [Imprimer]
   1  <?php
   2  
   3  /**********************************
   4  adaptation en php de feedfinder.py :
   5  
   6  """Ultra-liberal feed finder, de Mark Pilgrim
   7  <http://diveintomark.org/projects/feed_finder/>
   8  
   9  Par: courcy.michael@wanadoo.fr
  10  
  11  adaptation en php, je ne reprends qu'une partie de cette algorithme
  12  
  13  0) A chaque étape on vérifie si les feed indiqué sont rééllement des feeds
  14  1) Si l'uri passé est un feed on retourne le résultat tout simplement
  15  2) Si le header de la page contient des balises LINK qui renvoient vers des feed on les retourne
  16  3) on cherche les liens <a> qui se termine par  ".rss", ".rdf", ".xml", ou ".atom"
  17  4) on cherche les liens <a> contenant "rss", "rdf", "xml", ou "atom"
  18  
  19  j'intègre pas l'interrogation  avec xml_rpc de syndic8, mais on peut le faire assez facilement
  20  dans la phase de test sur différentes url je n'ai constaté aucune diffrérence entre les réponses 
  21  donné par feedfinder.py et les miennes donc je ne suis pas sur de voir l'interet
  22  
  23  Je ne me préoccupe pas comme l'auteur de savoir si mes liens de feed sont sur le même serveur ou pas
  24  
  25  exemple d'utilisation
  26  
  27  print_r (get_feed_from_url("http://willy.boerland.com/myblog/"));
  28  
  29  on obtient
  30  
  31  Array
  32  (
  33      [0] => http://willy.boerland.com/myblog/atom/feed
  34      [1] => http://willy.boerland.com/myblog/blogapi/rsd
  35      [2] => http://willy.boerland.com/myblog/rss.xml
  36      [3] => http://willy.boerland.com/myblog/node/feed
  37  )
  38  
  39  *****************************************************************/
  40  
  41  $verif_complete = 0; //mettez le à 1 si vous voulez controler la validité des feed trouvés mais le temps d'execution
  42                       //est alors plus long
  43  
  44  //une fonction qui permet de si un lien est un feed ou nom, si c'est un feed elle retourne son type
  45  //si c'est pas un feed elle retourne 0, cette vérification est évidemment tres tres légère
  46  // http://doc.spip.org/@is_feed
  47  function is_feed($url){
  48  
  49      # methode SPIP
  50      if (function_exists('recuperer_page')) {
  51          $feed = recuperer_page($url);
  52          if (preg_match("/<(\w*) .*/", $buffer, $matches)){
  53                  //ici on détecte la premiere balise
  54                  $type_feed = $matches[1];
  55                  switch ($type_feed) {
  56                         case "rss": return "rss";
  57                         case "feed": return "atom";
  58                         case "rdf": return "rdf";
  59                  }
  60          }
  61          return '';
  62      }
  63  
  64        $fp = @fopen($url, "r");
  65        if (!$fp ) {
  66             return 0;
  67        }
  68        //vérifion la nature de ce fichier
  69        while (!feof($fp)) {
  70             $buffer = fgets($fp, 4096);
  71             if (preg_match("/<(\w*) .*/", $buffer, $matches)){
  72                  //ici on détecte la premiere balise
  73                  $type_feed = $matches[1];
  74                  switch ($type_feed) {
  75                         case "rss": fclose($fp); return "rss";
  76                         case "feed": fclose($fp); return "atom";
  77                         case "rdf": fclose($fp); return "rdf";
  78                         default : fclose($fp); return 0;
  79                  }
  80             }
  81        }
  82  }
  83  
  84  /*****************test is_feed******************************
  85  echo is_feed("http://spip-contrib.net/backend" _EXTENSIO_PHP") . "<br />"; //retourne rss
  86  echo is_feed("http://liberation.fr/rss.php") . "<br />"; //retourne rss
  87  echo is_feed("http://liberation.fr/rss.php") . "<br />"; //retourne rss
  88  echo is_feed("http://willy.boerland.com/myblog/atom/feed") //retourne atom
  89  echo is_feed("http://spip.net/") . "<br />"; //retoune 0
  90  //pas trouver d'exmples avec rdf j'ai encore du mal à saisir ce que rdf apporte de plus que rss
  91  //mais bon j'ai pas aprofondi
  92  ************************************************************/
  93  
  94  //fonction sans finesse mais efficace
  95  //on parcourt ligne par ligne à la recherche de balise <a> ou <link>
  96  //si dans le corps de celle-ci on trouve les mots rss, xml, atom ou rdf
  97  //alors on recupere la valeur href='<url>', on adapte celle-ci si elle
  98  //est relative et on vérifie que c'est bien un feed si oui on l'ajoute
  99  //au tableau des feed si on ne trouve rien ou si aucun feed est trouvé on retourne 
 100  //un tableau vide
 101  // http://doc.spip.org/@get_feed_from_url
 102  function get_feed_from_url($url, $buffer=false){
 103           global $verif_complete;
 104           //j'ai prévenu ce sera pas fin
 105           if (!preg_match("/^http:\/\/.*/", $url)) $url = "http://www." . $url;
 106           if (!$buffer) $buffer = @file_get_contents($url);
 107  
 108           $feed_list = array();
 109           //extraction des <link>
 110           if (preg_match_all("/<link [^>]*>/i", $buffer, $matches)){
 111                      //y a t-y rss atom rdf ou xml dans ces balises
 112                      foreach($matches[0] as $link){
 113                        if (  strpos($link, "rss")
 114                           || strpos($link, "rdf")
 115                           || strpos($link, "atom")
 116                           || strpos($link, "xml") ){
 117                              //voila un candidat on va extraire sa partie href et la placer dans notre tableau
 118                              if (preg_match("/href=['|\"]?([^\s'\"]*)['|\"]?/",$link,$matches2)){
 119                                   //on aura pris soin de vérifier si ce lien est relatif d'en faire un absolu
 120                                   if (!preg_match("/^http:\/\/.*/", $matches2[1])){
 121                                          $matches2[1] = concat_url($url,$matches2[1]);
 122                                   }
 123                                   if($verif_complete){
 124                                          if (is_feed($matches2[1])) $feed_list[] = $matches2[1];
 125                                   }else  $feed_list[] = $matches2[1];
 126                              }
 127                        }
 128                      }
 129                      //print_r($matches);
 130           }
 131           //extraction des <a>
 132           if (preg_match_all("/<a [^>]*>/i", $buffer, $matches)){
 133                      //y a t-y rss atom rdf ou xml dans ces balises
 134                      foreach($matches[0] as $link){
 135                         if (  strpos($link, "rss")
 136                           || strpos($link, "rdf")
 137                           || strpos($link, "atom")
 138                           || strpos($link, "xml") ){
 139                              //voila un candidat on va extraire sa partie href et la placer dans notre tableau
 140                              if (preg_match("/href=['|\"]?([^\s'\"]*)['|\"]?/",$link,$matches2)){
 141                                   //on aura pris soin de vérifier si ce lien est relatif d'en faire un absolu
 142                                   if (!preg_match("/^http:\/\/.*/", $matches2[1])){
 143                                          $matches2[1] = concat_url($url,$matches2[1]);
 144                                   }
 145                                   if($verif_complete){
 146                                          if (is_feed($matches2[1])) $feed_list[] = $matches2[1];
 147                                   }else  $feed_list[] = $matches2[1];
 148                              }
 149                         }
 150                      }
 151           }
 152           return $feed_list;
 153  }
 154  /************************************ getFeed ****************************
 155  print_r (get_feed_from_url("spip-contrib.net"));
 156  print_r (get_feed_from_url("http://liberation.fr/"));
 157  print_r (get_feed_from_url("cnn.com"));
 158  print_r (get_feed_from_url("http://willy.boerland.com/myblog/"));
 159  *****************************    Résultat *****************************************
 160  Array
 161  (
 162      [0] => http://www.spip-contrib.net/backend.php
 163  )
 164  Array
 165  (
 166      [0] => http://www.liberation.fr/rss.php
 167  )
 168  Array
 169  (
 170      [0] => http://rss.cnn.com/rss/cnn_topstories.rss
 171      [1] => http://rss.cnn.com/rss/cnn_latest.rss
 172      [2] => http://www.cnn.com/services/rss/
 173      [3] => http://www.cnn.com/services/rss/
 174      [4] => http://www.cnn.com/services/rss/
 175  )
 176  Array
 177  (
 178      [0] => http://willy.boerland.com/myblog/atom/feed
 179      [1] => http://willy.boerland.com/myblog/blogapi/rsd
 180      [2] => http://willy.boerland.com/myblog/rss.xml
 181      [3] => http://willy.boerland.com/myblog/node/feed
 182  )
 183  ************************************************************************/
 184  
 185  //petite fonction qui prend en charge les problemes de double slash
 186  //qunad on concatene les lien 
 187  // http://doc.spip.org/@concat_url
 188  function concat_url($url1, $path){
 189      # methode spip
 190      if(function_exists('suivre_lien')) {
 191          return suivre_lien($url1,$path);
 192      }
 193          $url = $url1 . "/" . $path;
 194          //cette opération peut très facilement avoir généré // ou /// 
 195          $url = str_replace("///", "/", $url);
 196          $url = str_replace("//", "/", $url); 
 197          //cas particulier de http://
 198          $url = str_replace("http:/", "http://", $url);
 199          return $url;
 200  }
 201  
 202  /****************************test concat**********************
 203  echo concat_url("http://spip.net" , "ecrire")."<br />";
 204  echo concat_url("http://spip.net/" , "ecrire")."<br />";
 205  echo concat_url("http://spip.net" , "/ecrire")."<br />";
 206  echo concat_url("http://spip.net/" , "/ecrire")."<br />";
 207  *************************************************************/
 208  
 209  
 210  
 211  
 212  ?>
Code source de SPIP 1.9.2c

/ecrire/inc/ -> feedfinder.php (source)

Généré le : Wed Nov 21 10:20:27 2007	par Balluche grâce à PHPXref 0.7