[ Index ] |
|
Code source de Cr@wltr@ck 2.2.1 |
1 <?php 2 //---------------------------------------------------------------------- 3 // CrawlTrack 2.2.1 4 //---------------------------------------------------------------------- 5 // Crawler Tracker for website 6 //---------------------------------------------------------------------- 7 // Author: Jean-Denis Brun 8 //---------------------------------------------------------------------- 9 // Website: www.crawltrack.fr 10 //---------------------------------------------------------------------- 11 // That script is distributed under GNU GPL license 12 //---------------------------------------------------------------------- 13 // file: cleaning-double-entry.php 14 //---------------------------------------------------------------------- 15 16 //initialize array 17 $testunique=array(); 18 $table=array(); 19 $date=array(); 20 $idtosuppress=array(); 21 22 //update the crawlt_config table to enter the last cleaning date (now - 1 hour) 23 $datecleaning = date("Y-m-d H:i:s",(strtotime("now")-3600)); 24 $sqlupdate ="UPDATE crawlt_config SET datelastcleaning='".sql_quote($datecleaning)."'"; 25 $requeteupdate = mysql_query($sqlupdate, $connexion) or die("MySQL query error"); 26 27 /*cleaning of the crawlt_visits_human table 28 to suppress double entry (same search engine, same keyword, same site, same page view, with less than 5mn between visit) 29 since the last cleaning*/ 30 $sqlcleaning = "SELECT id_visit,crawlt_site_id_site,keyword,crawlt_id_crawler, date, crawlt_id_page FROM crawlt_visits_human, crawlt_keyword 31 WHERE crawlt_visits_human.crawlt_keyword_id_keyword = crawlt_keyword.id_keyword 32 AND date >'".sql_quote($datecleaning)."'"; 33 34 35 $requetecleaning = mysql_query($sqlcleaning, $connexion) or die("MySQL query error"); 36 $visitstotal=mysql_num_rows($requetecleaning); 37 if($visitstotal>=1) 38 { 39 while ($ligne = mysql_fetch_row($requetecleaning)) 40 { 41 $testunique[]=$ligne[1].urlencode($ligne[2]).$ligne[3].$ligne[5]; 42 $table[]=$ligne[0]; 43 $date[]= strtotime($ligne[4]); 44 } 45 46 $testnodouble = array_unique($testunique); 47 $testdouble= array_diff_assoc($testunique,$testnodouble); 48 49 50 $somethingtosuppress=0; 51 52 foreach($testdouble as $i=>$value) 53 { 54 foreach($testnodouble as $j=>$value2) 55 { 56 if($testunique[$i]==$testunique[$j] && abs($date[$i]-$date[$j])<300) 57 { 58 $idtosuppress[]=$table[$i]; 59 $somethingtosuppress=1; 60 } 61 } 62 } 63 64 65 66 if($somethingtosuppress==1) 67 { 68 //request to suppress double entry in the visit table 69 $listidtosuppress=implode("','",$idtosuppress); 70 $sqlsuppress = "DELETE FROM crawlt_visits_human WHERE id_visit IN ('$listidtosuppress')"; 71 $requetesuppress = mysql_query($sqlsuppress, $connexion) or die("MySQL query error"); 72 } 73 74 } 75 76 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Thu Sep 6 14:14:11 2007 | par Balluche grâce à PHPXref 0.7 |