SPIP 1.8.3 : /ecrire/feedfinder.php source

[Sommaire] [Imprimer]
   1  <?php
   2  
   3  /**********************************
   4  adaptation en php de feedfinder.py :
   5  
   6  """Ultra-liberal feed finder, de Mark Pilgrim
   7  <http://diveintomark.org/projects/feed_finder/>
   8  
   9  Par: courcy.michael@wanadoo.fr
  10  
  11  adaptation en php, je ne reprends qu'une partie de cette algorithme
  12  
  13  0) A chaque étape on vérifie si les feed indiqué sont rééllement des feeds
  14  1) Si l'uri passé est un feed on retourne le résultat tout simplement
  15  2) Si le header de la page contient des balises LINK qui renvoient vers des feed on les retourne
  16  3) on cherche les liens <a> qui se termine par  ".rss", ".rdf", ".xml", ou ".atom"
  17  4) on cherche les liens <a> contenant "rss", "rdf", "xml", ou "atom"
  18  
  19  j'intègre pas l'interrogation  avec xml_rpc de syndic8, mais on peut le faire assez facilement
  20  dans la phase de test sur différentes url je n'ai constaté aucune diffrérence entre les réponses 
  21  donné par feedfinder.py et les miennes donc je ne suis pas sur de voir l'interet
  22  
  23  Je ne me préoccupe pas comme l'auteur de savoir si mes liens de feed sont sur le même serveur ou pas
  24  
  25  exemple d'utilisation
  26  
  27  print_r (get_feed_from_url("http://willy.boerland.com/myblog/"));
  28  
  29  on obtient
  30  
  31  Array
  32  (
  33      [0] => http://willy.boerland.com/myblog/atom/feed
  34      [1] => http://willy.boerland.com/myblog/blogapi/rsd
  35      [2] => http://willy.boerland.com/myblog/rss.xml
  36      [3] => http://willy.boerland.com/myblog/node/feed
  37  )
  38  
  39  *****************************************************************/
  40  
  41  $verif_complete = 0; //mettez le à 1 si vous voulez controler la validité des feed trouvés mais le temps d'execution
  42                       //est alors plus long
  43  
  44  //une fonction qui permet de si un lien est un feed ou nom, si c'est un feed elle retourne son type
  45  //si c'est pas un feed elle retourne 0, cette vérification est évidemment tres tres légère
  46  function is_feed($url){
  47  
  48      # methode SPIP
  49      if (function_exists('recuperer_page')) {
  50          $feed = recuperer_page($url);
  51          if (preg_match("/<(\w*) .*/", $buffer, $matches)){
  52                  //ici on détecte la premiere balise
  53                  $type_feed = $matches[1];
  54                  switch ($type_feed) {
  55                         case "rss": return "rss";
  56                         case "feed": return "atom";
  57                         case "rdf": return "rdf";
  58                  }
  59          }
  60          return '';
  61      }
  62  
  63        $fp = @fopen($url, "r");
  64        if (!$fp ) {
  65             return 0;
  66        }
  67        //vérifion la nature de ce fichier
  68        while (!feof($fp)) {
  69             $buffer = fgets($fp, 4096);
  70             if (preg_match("/<(\w*) .*/", $buffer, $matches)){
  71                  //ici on détecte la premiere balise
  72                  $type_feed = $matches[1];
  73                  switch ($type_feed) {
  74                         case "rss": fclose($fp); return "rss";
  75                         case "feed": fclose($fp); return "atom";
  76                         case "rdf": fclose($fp); return "rdf";
  77                         default : fclose($fp); return 0;
  78                  }
  79             }
  80        }
  81  }
  82  
  83  /*****************test is_feed******************************
  84  echo is_feed("http://spip-contrib.net/backend.php3") . "<br>"; //retourne rss
  85  echo is_feed("http://liberation.fr/rss.php") . "<br>"; //retourne rss
  86  echo is_feed("http://liberation.fr/rss.php") . "<br>"; //retourne rss
  87  echo is_feed("http://willy.boerland.com/myblog/atom/feed") //retourne atom
  88  echo is_feed("http://spip.net/") . "<br>"; //retoune 0
  89  //pas trouver d'exmples avec rdf j'ai encore du mal à saisir ce que rdf apporte de plus que rss
  90  //mais bon j'ai pas aprofondi
  91  ************************************************************/
  92  
  93  //fonction sans finesse mais efficace
  94  //on parcourt ligne par ligne à la recherche de balise <a> ou <link>
  95  //si dans le corps de celle-ci on trouve les mots rss, xml, atom ou rdf
  96  //alors on recupere la valeur href='<url>', on adapte celle-ci si elle
  97  //est relative et on vérifie que c'est bien un feed si oui on l'ajoute
  98  //au tableau des feed si on ne trouve rien ou si aucun feed est trouvé on retourne 
  99  //un tableau vide
 100  function get_feed_from_url($url, $buffer=false){
 101           global $verif_complete;
 102           //j'ai prévenu ce sera pas fin
 103           if (!preg_match("/^http:\/\/.*/", $url)) $url = "http://www." . $url;
 104           if (!$buffer) $buffer = @file_get_contents($url);
 105  
 106           $feed_list = array();
 107           //extraction des <LINK>
 108           if (preg_match_all("/<link [^>]*>/i", $buffer, $matches)){
 109                      //y a t-y rss atom rdf ou xml dans ces balises
 110                      foreach($matches[0] as $link){
 111                        if (  strpos($link, "rss")
 112                           || strpos($link, "rdf")
 113                           || strpos($link, "atom")
 114                           || strpos($link, "xml") ){
 115                              //voila un candidat on va extraire sa partie href et la placer dans notre tableau
 116                              if (preg_match("/href=['|\"]?([^\s'\"]*)['|\"]?/",$link,$matches2)){
 117                                   //on aura pris soin de vérifier si ce lien est relatif d'en faire un absolu
 118                                   if (!preg_match("/^http:\/\/.*/", $matches2[1])){
 119                                          $matches2[1] = concat_url($url,$matches2[1]);
 120                                   }
 121                                   if($verif_complete){
 122                                          if (is_feed($matches2[1])) $feed_list[] = $matches2[1];
 123                                   }else  $feed_list[] = $matches2[1];
 124                              }
 125                        }
 126                      }
 127                      //print_r($matches);
 128           }
 129           //extraction des <A>
 130           if (preg_match_all("/<a [^>]*>/i", $buffer, $matches)){
 131                      //y a t-y rss atom rdf ou xml dans ces balises
 132                      foreach($matches[0] as $link){
 133                         if (  strpos($link, "rss")
 134                           || strpos($link, "rdf")
 135                           || strpos($link, "atom")
 136                           || strpos($link, "xml") ){
 137                              //voila un candidat on va extraire sa partie href et la placer dans notre tableau
 138                              if (preg_match("/href=['|\"]?([^\s'\"]*)['|\"]?/",$link,$matches2)){
 139                                   //on aura pris soin de vérifier si ce lien est relatif d'en faire un absolu
 140                                   if (!preg_match("/^http:\/\/.*/", $matches2[1])){
 141                                          $matches2[1] = concat_url($url,$matches2[1]);
 142                                   }
 143                                   if($verif_complete){
 144                                          if (is_feed($matches2[1])) $feed_list[] = $matches2[1];
 145                                   }else  $feed_list[] = $matches2[1];
 146                              }
 147                         }
 148                      }
 149           }
 150           return $feed_list;
 151  }
 152  /************************************ getFeed ****************************
 153  print_r (get_feed_from_url("spip-contrib.net"));
 154  print_r (get_feed_from_url("http://liberation.fr/"));
 155  print_r (get_feed_from_url("cnn.com"));
 156  print_r (get_feed_from_url("http://willy.boerland.com/myblog/"));
 157  *****************************    Résultat *****************************************
 158  Array
 159  (
 160      [0] => http://www.spip-contrib.net/backend.php3
 161  )
 162  Array
 163  (
 164      [0] => http://www.liberation.fr/rss.php
 165  )
 166  Array
 167  (
 168      [0] => http://rss.cnn.com/rss/cnn_topstories.rss
 169      [1] => http://rss.cnn.com/rss/cnn_latest.rss
 170      [2] => http://www.cnn.com/services/rss/
 171      [3] => http://www.cnn.com/services/rss/
 172      [4] => http://www.cnn.com/services/rss/
 173  )
 174  Array
 175  (
 176      [0] => http://willy.boerland.com/myblog/atom/feed
 177      [1] => http://willy.boerland.com/myblog/blogapi/rsd
 178      [2] => http://willy.boerland.com/myblog/rss.xml
 179      [3] => http://willy.boerland.com/myblog/node/feed
 180  )
 181  ************************************************************************/
 182  
 183  //petite fonction qui prend en charge les problemes de double slash
 184  //qunad on concatene les lien 
 185  function concat_url($url1, $path){
 186      # methode spip
 187      if(function_exists('suivre_lien')) {
 188          return suivre_lien($url1,$path);
 189      }
 190          $url = $url1 . "/" . $path;
 191          //cette opération peut très facilement avoir généré // ou /// 
 192          $url = str_replace("///", "/", $url);
 193          $url = str_replace("//", "/", $url); 
 194          //cas particulier de http://
 195          $url = str_replace("http:/", "http://", $url);
 196          return $url;
 197  }
 198  
 199  /****************************test concat**********************
 200  echo concat_url("http://spip.net" , "ecrire")."<br>";
 201  echo concat_url("http://spip.net/" , "ecrire")."<br>";
 202  echo concat_url("http://spip.net" , "/ecrire")."<br>";
 203  echo concat_url("http://spip.net/" , "/ecrire")."<br>";
 204  *************************************************************/
 205  
 206  
 207  
 208  
 209  ?>
Code source de SPIP 1.8.3

/ecrire/ -> feedfinder.php (source)