[ Index ]
 

Code source de GeekLog 1.4.1

Accédez au Source d'autres logiciels libres

title

Body

[fermer]

/plugins/spamx/ -> SLVbase.class.php (source)

   1  <?php
   2  
   3  /**
   4  * File: SLVbase.class.php
   5  * Spam Link Verification (SLV) Base Class
   6  *
   7  * Copyright (C) 2006 by the following authors:
   8  * Author        Dirk Haun       dirk AT haun-online DOT de
   9  *
  10  * Licensed under the GNU General Public License
  11  *
  12  * $Id: SLVbase.class.php,v 1.5 2006/12/10 08:57:49 dhaun Exp $
  13  */
  14  
  15  if (strpos ($_SERVER['PHP_SELF'], 'SLVbase.class.php') !== false) {
  16      die ('This file can not be used on its own!');
  17  }
  18  
  19  /**
  20  * Sends posts to SLV (linksleeve.org) for examination
  21  *
  22  * @author Dirk Haun     dirk AT haun-online DOT de
  23  *
  24  * based on the works of Tom Willet (Spam-X) and Russ Jones (SLV)
  25  */
  26  
  27  class SLVbase {
  28  
  29      var $_debug = false;
  30  
  31      var $_verbose = false;
  32  
  33      /**
  34      * Constructor
  35      */
  36      function SLVbase()
  37      {
  38          $this->_debug = false;
  39          $this->_verbose = false;
  40      }
  41  
  42      /**
  43      * Check for spam links
  44      *
  45      * @param    string  $post   post to check for spam
  46      * @return   boolean         true = spam found, false = no spam
  47      *
  48      * Note: Also returns 'false' in case of problems communicating with SLV.
  49      *       Error messages are logged in Geeklog's error.log
  50      *
  51      */
  52      function CheckForSpam ($post)
  53      {
  54          global $_SPX_CONF;
  55  
  56          require_once ('XML/RPC.php');
  57  
  58          $retval = false;
  59  
  60          if (empty ($post)) {
  61              return $retval;
  62          }
  63  
  64          $links = $this->prepareLinks ($post);
  65          if (empty ($links)) {
  66              return $retval;
  67          }
  68  
  69          if (!isset ($_SPX_CONF['timeout'])) {
  70              $_SPX_CONF['timeout'] = 5; // seconds
  71          }
  72  
  73          if ($this->_verbose) {
  74              SPAMX_log ("Sending to SLV: $links");
  75          }
  76  
  77          $params = array (new XML_RPC_Value ($links, 'string'));
  78          $msg = new XML_RPC_Message ('slv', $params);
  79          $cli = new XML_RPC_Client ('/slv.php', 'http://www.linksleeve.org');
  80  
  81          if ($this->_debug) {
  82              $client->setDebug (1);
  83          }
  84  
  85          $resp = $cli->send ($msg, $_SPX_CONF['timeout']);
  86          if (!$resp) {
  87              COM_errorLog ('Error communicating with SLV: ' . $cli->errstr
  88                            . '; Message was ' . $msg->serialize());
  89          } else if ($resp->faultCode ()) {
  90              COM_errorLog ('Error communicating with SLV. Fault code: '
  91                            . $resp->faultCode() . ', Fault reason: '
  92                            . $resp->faultString() . '; Message was '
  93                            . $msg->serialize());
  94          } else {
  95              $val = $resp->value();
  96              // note that SLV returns '1' for acceptable posts and '0' for spam
  97              if ($val->scalarval() != '1') {
  98                  $retval = true;
  99                  SPAMX_log ("SLV: spam detected");
 100              } else if ($this->_verbose) {
 101                  SPAMX_log ("SLV: no spam detected");
 102              }
 103          }
 104  
 105          return $retval;
 106      }
 107  
 108      /**
 109      * Check whitelist
 110      *
 111      * Check against our whitelist of sites not to report to SLV. Note that
 112      * URLs starting with $_CONF['site_url'] have already been removed earlier.
 113      *
 114      * @param    array   &$links     array of URLs from a post
 115      * @return   void ($links is passed by reference and modified in place)
 116      *
 117      */
 118      function checkWhitelist (&$links)
 119      {
 120          global $_TABLES;
 121  
 122          $result = DB_query ("SELECT value FROM {$_TABLES['spamx']} WHERE name='SLVwhitelist'", 1);
 123          $nrows = DB_numRows ($result);
 124  
 125          for ($i = 0; $i < $nrows; $i++) {
 126              $A = DB_fetchArray ($result);
 127              $val = $A['value'];
 128              $val = str_replace ('#', '\\#', $val);
 129  
 130              foreach ($links as $key => $link) {
 131                  if (!empty ($link)) {
 132                      if (preg_match ("#$val#i", $link)) {
 133                          $links[$key] = '';
 134                      }
 135                  }
 136              }
 137          }
 138      }
 139  
 140      /**
 141      * Extract links
 142      *
 143      * Extracts all the links from a post; expects HTML links, i.e. <a> tags
 144      *
 145      * @param    string  $comment    The post to check
 146      * @return   array               All the URLs in the post
 147      *
 148      */
 149      function getLinks ($comment)
 150      {
 151          global $_CONF;
 152  
 153          $links = array();
 154  
 155          preg_match_all( "/<a[^>]*href=[\"']([^\"']*)[\"'][^>]*>(.*?)<\/a>/i",
 156                          $comment, $matches );
 157          for ($i = 0; $i < count ($matches[0]); $i++) {
 158              $url = $matches[1][$i];
 159              if (strpos ($url, $_CONF['site_url']) === 0) {
 160                  // skip links to our own site
 161                  continue;
 162              } else {
 163                  $links[] = $url;
 164              }
 165          }
 166  
 167          return $links;
 168      }
 169  
 170      /**
 171      * Extract only the links from the post
 172      *
 173      * SLV has a problem with non-ASCII character sets, so we feed it the URLs
 174      * only. We also remove all URLs containing our site's URL.
 175      *
 176      * Since we don't know if the post is in HTML or plain ASCII, we run it
 177      * through getLinks() twice.
 178      *
 179      * @param    string  $comment    The post to check
 180      * @return   string              All the URLs in the post, sep. by linefeeds
 181      *
 182      */
 183      function prepareLinks ($comment)
 184      {
 185          $links = array();
 186          $linklist = '';
 187  
 188          // some spam posts have extra backslashes
 189          $comment = stripslashes ($comment);
 190  
 191          // some spammers have yet to realize that we're not supporting BBcode
 192          // but since we want the URLs, convert it here ...
 193          $comment = preg_replace ('/\[url=([^\]]*)\]/i', '<a href="\1">',
 194                                   $comment);
 195          $comment = str_replace (array ('[/url]', '[/URL]'),
 196                                  array ('</a>',   '</a>'  ), $comment);
 197  
 198          // get all links from <a href="..."> tags
 199          $links = $this->getLinks ($comment);
 200  
 201          // strip all HTML, then get all the plain text links
 202          $comment = COM_makeClickableLinks (strip_tags ($comment));
 203          $links += $this->getLinks ($comment);
 204  
 205          if (count ($links) > 0) {
 206              $this->checkWhitelist ($links);
 207              $linklist = implode ("\n", $links);
 208          }
 209  
 210          return trim ($linklist);
 211      }
 212  }
 213  
 214  ?>


Généré le : Wed Nov 21 12:27:40 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics