[ Index ]
 

Code source de Cr@wltr@ck 2.2.1

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/include/ -> cleaning-double-entry.php (source)

   1  <?php
   2  //----------------------------------------------------------------------

   3  //  CrawlTrack 2.2.1

   4  //----------------------------------------------------------------------

   5  // Crawler Tracker for website

   6  //----------------------------------------------------------------------

   7  // Author: Jean-Denis Brun

   8  //----------------------------------------------------------------------

   9  // Website: www.crawltrack.fr

  10  //----------------------------------------------------------------------

  11  // That script is distributed under GNU GPL license

  12  //----------------------------------------------------------------------

  13  // file: cleaning-double-entry.php

  14  //----------------------------------------------------------------------

  15  
  16  //initialize array

  17  $testunique=array();
  18  $table=array();
  19  $date=array();
  20  $idtosuppress=array();
  21  
  22   //update the crawlt_config table to enter the last cleaning date (now - 1 hour)

  23  $datecleaning = date("Y-m-d H:i:s",(strtotime("now")-3600));
  24  $sqlupdate ="UPDATE crawlt_config SET datelastcleaning='".sql_quote($datecleaning)."'";
  25  $requeteupdate = mysql_query($sqlupdate, $connexion) or die("MySQL query error"); 
  26  
  27  /*cleaning of the crawlt_visits_human table

  28  to suppress double entry (same search engine, same keyword, same site, same page view, with less than 5mn between visit)

  29  since the last cleaning*/
  30  $sqlcleaning = "SELECT  id_visit,crawlt_site_id_site,keyword,crawlt_id_crawler, date, crawlt_id_page FROM crawlt_visits_human, crawlt_keyword
  31  WHERE crawlt_visits_human.crawlt_keyword_id_keyword = crawlt_keyword.id_keyword
  32  AND  date >'".sql_quote($datecleaning)."'"; 
  33  
  34  
  35  $requetecleaning = mysql_query($sqlcleaning, $connexion) or die("MySQL query error");
  36  $visitstotal=mysql_num_rows($requetecleaning);
  37  if($visitstotal>=1)
  38      {  
  39      while ($ligne = mysql_fetch_row($requetecleaning))                                                                              
  40          {
  41          $testunique[]=$ligne[1].urlencode($ligne[2]).$ligne[3].$ligne[5];
  42          $table[]=$ligne[0];
  43          $date[]= strtotime($ligne[4]);  
  44          } 
  45  
  46      $testnodouble = array_unique($testunique);
  47      $testdouble= array_diff_assoc($testunique,$testnodouble);
  48      
  49                  
  50      $somethingtosuppress=0;
  51  
  52      foreach($testdouble as $i=>$value)
  53          {        
  54          foreach($testnodouble as $j=>$value2)
  55              {
  56              if($testunique[$i]==$testunique[$j] && abs($date[$i]-$date[$j])<300)
  57                  {        
  58                  $idtosuppress[]=$table[$i];
  59                  $somethingtosuppress=1;        
  60                  }
  61              }
  62          }
  63          
  64        
  65  
  66      if($somethingtosuppress==1)
  67          {
  68          //request to suppress double entry in the visit table

  69          $listidtosuppress=implode("','",$idtosuppress);
  70          $sqlsuppress = "DELETE FROM crawlt_visits_human WHERE id_visit IN ('$listidtosuppress')";
  71          $requetesuppress = mysql_query($sqlsuppress, $connexion) or die("MySQL query error");
  72          }
  73  
  74     }
  75  
  76  ?>


Généré le : Thu Sep 6 14:14:11 2007 par Balluche grâce à PHPXref 0.7