[ Index ] |
|
Code source de GeekLog 1.4.1 |
1 <?php 2 3 /** 4 * File: SLVbase.class.php 5 * Spam Link Verification (SLV) Base Class 6 * 7 * Copyright (C) 2006 by the following authors: 8 * Author Dirk Haun dirk AT haun-online DOT de 9 * 10 * Licensed under the GNU General Public License 11 * 12 * $Id: SLVbase.class.php,v 1.5 2006/12/10 08:57:49 dhaun Exp $ 13 */ 14 15 if (strpos ($_SERVER['PHP_SELF'], 'SLVbase.class.php') !== false) { 16 die ('This file can not be used on its own!'); 17 } 18 19 /** 20 * Sends posts to SLV (linksleeve.org) for examination 21 * 22 * @author Dirk Haun dirk AT haun-online DOT de 23 * 24 * based on the works of Tom Willet (Spam-X) and Russ Jones (SLV) 25 */ 26 27 class SLVbase { 28 29 var $_debug = false; 30 31 var $_verbose = false; 32 33 /** 34 * Constructor 35 */ 36 function SLVbase() 37 { 38 $this->_debug = false; 39 $this->_verbose = false; 40 } 41 42 /** 43 * Check for spam links 44 * 45 * @param string $post post to check for spam 46 * @return boolean true = spam found, false = no spam 47 * 48 * Note: Also returns 'false' in case of problems communicating with SLV. 49 * Error messages are logged in Geeklog's error.log 50 * 51 */ 52 function CheckForSpam ($post) 53 { 54 global $_SPX_CONF; 55 56 require_once ('XML/RPC.php'); 57 58 $retval = false; 59 60 if (empty ($post)) { 61 return $retval; 62 } 63 64 $links = $this->prepareLinks ($post); 65 if (empty ($links)) { 66 return $retval; 67 } 68 69 if (!isset ($_SPX_CONF['timeout'])) { 70 $_SPX_CONF['timeout'] = 5; // seconds 71 } 72 73 if ($this->_verbose) { 74 SPAMX_log ("Sending to SLV: $links"); 75 } 76 77 $params = array (new XML_RPC_Value ($links, 'string')); 78 $msg = new XML_RPC_Message ('slv', $params); 79 $cli = new XML_RPC_Client ('/slv.php', 'http://www.linksleeve.org'); 80 81 if ($this->_debug) { 82 $client->setDebug (1); 83 } 84 85 $resp = $cli->send ($msg, $_SPX_CONF['timeout']); 86 if (!$resp) { 87 COM_errorLog ('Error communicating with SLV: ' . $cli->errstr 88 . '; Message was ' . $msg->serialize()); 89 } else if ($resp->faultCode ()) { 90 COM_errorLog ('Error communicating with SLV. Fault code: ' 91 . $resp->faultCode() . ', Fault reason: ' 92 . $resp->faultString() . '; Message was ' 93 . $msg->serialize()); 94 } else { 95 $val = $resp->value(); 96 // note that SLV returns '1' for acceptable posts and '0' for spam 97 if ($val->scalarval() != '1') { 98 $retval = true; 99 SPAMX_log ("SLV: spam detected"); 100 } else if ($this->_verbose) { 101 SPAMX_log ("SLV: no spam detected"); 102 } 103 } 104 105 return $retval; 106 } 107 108 /** 109 * Check whitelist 110 * 111 * Check against our whitelist of sites not to report to SLV. Note that 112 * URLs starting with $_CONF['site_url'] have already been removed earlier. 113 * 114 * @param array &$links array of URLs from a post 115 * @return void ($links is passed by reference and modified in place) 116 * 117 */ 118 function checkWhitelist (&$links) 119 { 120 global $_TABLES; 121 122 $result = DB_query ("SELECT value FROM {$_TABLES['spamx']} WHERE name='SLVwhitelist'", 1); 123 $nrows = DB_numRows ($result); 124 125 for ($i = 0; $i < $nrows; $i++) { 126 $A = DB_fetchArray ($result); 127 $val = $A['value']; 128 $val = str_replace ('#', '\\#', $val); 129 130 foreach ($links as $key => $link) { 131 if (!empty ($link)) { 132 if (preg_match ("#$val#i", $link)) { 133 $links[$key] = ''; 134 } 135 } 136 } 137 } 138 } 139 140 /** 141 * Extract links 142 * 143 * Extracts all the links from a post; expects HTML links, i.e. <a> tags 144 * 145 * @param string $comment The post to check 146 * @return array All the URLs in the post 147 * 148 */ 149 function getLinks ($comment) 150 { 151 global $_CONF; 152 153 $links = array(); 154 155 preg_match_all( "/<a[^>]*href=[\"']([^\"']*)[\"'][^>]*>(.*?)<\/a>/i", 156 $comment, $matches ); 157 for ($i = 0; $i < count ($matches[0]); $i++) { 158 $url = $matches[1][$i]; 159 if (strpos ($url, $_CONF['site_url']) === 0) { 160 // skip links to our own site 161 continue; 162 } else { 163 $links[] = $url; 164 } 165 } 166 167 return $links; 168 } 169 170 /** 171 * Extract only the links from the post 172 * 173 * SLV has a problem with non-ASCII character sets, so we feed it the URLs 174 * only. We also remove all URLs containing our site's URL. 175 * 176 * Since we don't know if the post is in HTML or plain ASCII, we run it 177 * through getLinks() twice. 178 * 179 * @param string $comment The post to check 180 * @return string All the URLs in the post, sep. by linefeeds 181 * 182 */ 183 function prepareLinks ($comment) 184 { 185 $links = array(); 186 $linklist = ''; 187 188 // some spam posts have extra backslashes 189 $comment = stripslashes ($comment); 190 191 // some spammers have yet to realize that we're not supporting BBcode 192 // but since we want the URLs, convert it here ... 193 $comment = preg_replace ('/\[url=([^\]]*)\]/i', '<a href="\1">', 194 $comment); 195 $comment = str_replace (array ('[/url]', '[/URL]'), 196 array ('</a>', '</a>' ), $comment); 197 198 // get all links from <a href="..."> tags 199 $links = $this->getLinks ($comment); 200 201 // strip all HTML, then get all the plain text links 202 $comment = COM_makeClickableLinks (strip_tags ($comment)); 203 $links += $this->getLinks ($comment); 204 205 if (count ($links) > 0) { 206 $this->checkWhitelist ($links); 207 $linklist = implode ("\n", $links); 208 } 209 210 return trim ($linklist); 211 } 212 } 213 214 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Wed Nov 21 12:27:40 2007 | par Balluche grâce à PHPXref 0.7 |
![]() |