[ Index ] |
|
Code source de b2evolution 2.1.0-beta |
1 <?php 2 /** 3 * This file implements the basic Antispam plugin. 4 * 5 * This file is part of the b2evolution project - {@link http://b2evolution.net/} 6 * 7 * @copyright (c)2003-2007 by Francois PLANQUE - {@link http://fplanque.net/} 8 * Parts of this file are copyright (c)2004-2006 by Daniel HAHLER - {@link http://thequod.de/contact}. 9 * 10 * {@internal License choice 11 * - If you have received this file as part of a package, please find the license.txt file in 12 * the same folder or the closest folder above for complete license terms. 13 * - If you have received this file individually (e-g: from http://evocms.cvs.sourceforge.net/) 14 * then you must choose one of the following licenses before using the file: 15 * - GNU General Public License 2 (GPL) - http://www.opensource.org/licenses/gpl-license.php 16 * - Mozilla Public License 1.1 (MPL) - http://www.opensource.org/licenses/mozilla1.1.php 17 * }} 18 * 19 * {@internal Open Source relicensing agreement: 20 * Daniel HAHLER grants Francois PLANQUE the right to license 21 * Daniel HAHLER's contributions to this file and the b2evolution project 22 * under any OSI approved OSS license (http://www.opensource.org/licenses/). 23 * }} 24 * 25 * @package plugins 26 * 27 * {@internal Below is a list of authors who have contributed to design/coding of this file: }} 28 * @author blueyed: Daniel HAHLER - {@link http://daniel.hahler.de/} 29 * 30 * @version $Id: _basic_antispam.plugin.php,v 1.30 2007/06/25 11:02:32 fplanque Exp $ 31 */ 32 if( !defined('EVO_MAIN_INIT') ) die( 'Please, do not access this page directly.' ); 33 34 35 /** 36 * Basic Antispam Plugin 37 * 38 * This plugin doublechecks referers/referrers for Hit logging and trackbacks. 39 * 40 * @todo Ideas: 41 * - forbid cloned comments (same content) (on the same entry or all entries) 42 * - detect same/similar URLs in a short period (also look at author name: if it differs, it's more likely to be spam) 43 */ 44 class basic_antispam_plugin extends Plugin 45 { 46 /** 47 * Variables below MUST be overriden by plugin implementations, 48 * either in the subclass declaration or in the subclass constructor. 49 */ 50 var $name = 'Basic Antispam'; 51 var $code = ''; 52 var $priority = 60; 53 var $version = '2.0-dev'; 54 var $author = 'The b2evo Group'; 55 var $group = 'antispam'; 56 var $number_of_installs = 1; 57 58 59 /** 60 * Init 61 */ 62 function PluginInit( & $params ) 63 { 64 $this->short_desc = T_('Basic antispam methods'); 65 $this->long_desc = T_('This plugin provides basic methods to detect & block spam on referers, comments & trackbacks.'); 66 } 67 68 69 function GetDefaultSettings() 70 { 71 return array( 72 'allow_anon_comments' => array( 73 'type' => 'checkbox', 74 'label' => T_('Allow anonymous comments'), 75 'note' => T_('Allow non-registered visitors to leave comments.'), 76 'defaultvalue' => '1', 77 ), 78 'check_dupes' => array( 79 'type' => 'checkbox', 80 'label' => T_('Detect feedback duplicates'), 81 'note' => T_('Check this to check comments and trackback for duplicate content.'), 82 'defaultvalue' => '1', 83 ), 84 'max_number_of_links_feedback' => array( 85 'type' => 'integer', 86 'label' => T_('Feedback sensitivity to links'), 87 'note' => T_('If a comment has more than this number of links in it, it will get 100 percent spam karma. -1 to disable it.'), 88 'help' => '#set_max_number_of_links', 89 'defaultvalue' => '4', 90 'size' => 3, 91 ), 92 'nofollow_for_hours' => array( 93 'type' => 'integer', 94 'label' => T_('Apply rel="nofollow"'), 95 'note'=>T_('hours. For how long should rel="nofollow" be applied to comment links? (0 means never, -1 means always)'), 96 'defaultvalue' => '-1', // use "nofollow" infinitely by default so lazy admins won't promote spam 97 'size' => 5, 98 ), 99 100 'check_url_referers' => array( 101 'type' => 'checkbox', 102 'label' => T_('Check referers for URL'), 103 'note' => T_('Check refering pages, if they contain our URL. This may generate a lot of additional traffic!'), 104 'defaultvalue' => '0', 105 ), 106 107 ); 108 } 109 110 111 /** 112 * We check if this is an anonymous visitor and do not allow comments, if we're setup 113 * to do so. 114 */ 115 function ItemCanComment( & $params ) 116 { 117 if( ! is_logged_in() && ! $this->Settings->get('allow_anon_comments') ) 118 { 119 return T_('Comments are not allowed from anonymous visitors.'); 120 } 121 122 // return NULL 123 } 124 125 126 /** 127 * Handle max_number_of_links_feedback setting. 128 * 129 * Try to detect as many links as possible 130 */ 131 function GetSpamKarmaForComment( & $params ) 132 { 133 $max_comments = $this->Settings->get('max_number_of_links_feedback'); 134 if( $max_comments != -1 ) 135 { // not deactivated: 136 $count = preg_match_all( '~(https?|ftp)://~i', $params['Comment']->content, $matches ); 137 138 if( $count > $max_comments ) 139 { 140 return 100; 141 } 142 143 if( $count == 0 ) 144 { 145 return 0; 146 } 147 148 return (100/$max_comments) * $count; 149 } 150 } 151 152 153 /** 154 * Disable/Enable events according to settings. 155 * 156 * "AppendHitLog" gets enabled according to check_url_referers setting. 157 * "BeforeTrackbackInsert" gets disabled, if we do not check for duplicate content. 158 */ 159 function BeforeEnable() 160 { 161 if( $this->Settings->get('check_url_referers') ) 162 { 163 $this->enable_event( 'AppendHitLog' ); 164 } 165 else 166 { 167 $this->disable_event( 'AppendHitLog' ); 168 } 169 170 if( ! $this->Settings->get('check_dupes') ) 171 { 172 $this->disable_event( 'BeforeTrackbackInsert' ); 173 } 174 else 175 { 176 $this->enable_event( 'BeforeTrackbackInsert' ); 177 } 178 179 return true; 180 } 181 182 183 /** 184 * - Check for duplicate trackbacks. 185 */ 186 function BeforeTrackbackInsert( & $params ) 187 { 188 if( $this->is_duplicate_comment( $params['Comment'] ) ) 189 { 190 $this->msg( T_('The trackback seems to be a duplicate.'), 'error' ); 191 } 192 } 193 194 195 /** 196 * Check for duplicate comments. 197 */ 198 function BeforeCommentFormInsert( & $params ) 199 { 200 if( $this->is_duplicate_comment( $params['Comment'] ) ) 201 { 202 $this->msg( T_('The comment seems to be a duplicate.'), 'error' ); 203 } 204 } 205 206 207 /** 208 * If we use "makelink", handle nofollow rel attrib. 209 * 210 * @uses basic_antispam_plugin::apply_nofollow() 211 */ 212 function FilterCommentAuthor( & $params ) 213 { 214 if( ! $params['makelink'] ) 215 { 216 return false; 217 } 218 219 $this->apply_nofollow( $params['data'], $params['Comment'] ); 220 } 221 222 223 /** 224 * Handle nofollow in author URL (if it's made clickable) 225 * 226 * @uses basic_antispam_plugin::FilterCommentAuthor() 227 */ 228 function FilterCommentAuthorUrl( & $params ) 229 { 230 $this->FilterCommentAuthor( $params ); 231 } 232 233 234 /** 235 * Handle nofollow rel attrib in comment content. 236 * 237 * @uses basic_antispam_plugin::FilterCommentAuthor() 238 */ 239 function FilterCommentContent( & $params ) 240 { 241 $this->apply_nofollow( $params['data'], $params['Comment'] ); 242 } 243 244 245 /** 246 * Do we want to apply rel="nofollow" tag? 247 * 248 * @return boolean 249 */ 250 function apply_nofollow( & $data, $Comment ) 251 { 252 global $localtimenow; 253 254 $hours = $this->Settings->get('nofollow_for_hours'); // 0=never, -1 always, otherwise for x hours 255 256 if( $hours == 0 ) 257 { // "never" 258 return; 259 } 260 261 if( $hours > 0 // -1 is "always" 262 && mysql2timestamp( $Comment->date ) <= ( $localtimenow - $hours*3600 ) ) 263 { 264 return; 265 } 266 267 $data = preg_replace_callback( '~(<a\s)([^>]+)>~i', create_function( '$m', ' 268 if( preg_match( \'~\brel=([\\\'"])(.*?)\1~\', $m[2], $match ) ) 269 { // there is already a rel attrib: 270 $rel_values = explode( " ", $match[2] ); 271 272 if( ! in_array( \'nofollow\', $rel_values ) ) 273 { 274 $rel_values[] = \'nofollow\'; 275 } 276 277 return $m[1] 278 .preg_replace( 279 \'~\brel=([\\\'"]).*?\1~\', 280 \'rel=$1\'.implode( " ", $rel_values ).\'$1\', 281 $m[2] ) 282 .">"; 283 } 284 else 285 { 286 return $m[1].$m[2].\' rel="nofollow">\'; 287 }' ), $data ); 288 } 289 290 291 /** 292 * Check if the deprecated hit_doublecheck_referer setting is set and then 293 * do not disable the AppendHitLog event. Also removes the old setting. 294 */ 295 function AfterInstall() 296 { 297 global $Settings; 298 299 if( $Settings->get('hit_doublecheck_referer') ) 300 { // old general settings, "transform it" 301 $this->Settings->set( 'check_url_referers', '1' ); 302 $this->Settings->dbupdate(); 303 } 304 305 $Settings->delete('hit_doublecheck_referer'); 306 $Settings->dbupdate(); 307 } 308 309 310 /** 311 * Check if our Host+URI is in the referred page, preferrably through 312 * {@link register_shutdown_function()}. 313 * 314 * @return boolean true, if we handle {@link Hit::record_the_hit() recording of the Hit} ourself 315 */ 316 function AppendHitLog( & $params ) 317 { 318 global $debug_no_register_shutdown; 319 320 $Hit = & $params['Hit']; 321 322 if( $Hit->referer_type != 'referer' ) 323 { 324 return false; 325 } 326 327 if( empty($debug_no_register_shutdown) && function_exists( 'register_shutdown_function' ) ) 328 { // register it as a shutdown function, because it will be slow! 329 $this->debug_log( 'AppendHitLog: loading referering page.. (through register_shutdown_function())' ); 330 331 register_shutdown_function( array( &$this, 'double_check_referer' ), $Hit->referer ); // this will also call Hit::record_the_hit() 332 } 333 else 334 { 335 // flush now, so that the meat of the page will get shown before it tries to check back against the refering URL. 336 flush(); 337 338 $this->debug_log( 'AppendHitLog: loading referering page..' ); 339 340 $this->double_check_referer($Hit->referer); // this will also call Hit::record_the_hit() 341 } 342 343 return true; // we handle recording 344 } 345 346 347 /** 348 * This function gets called (as a {@link register_shutdown_function() shutdown function}, if possible) and checks 349 * if the referering URL's content includes the current URL - if not it is probably spam! 350 * 351 * On success, this methods records the hit. 352 * 353 * @uses Hit::record_the_hit() 354 */ 355 function double_check_referer( $referer ) 356 { 357 global $Hit, $ReqURI; 358 359 if( $this->is_referer_linking_us( $referer, $ReqURI ) ) 360 { 361 $Hit->record_the_hit(); 362 } 363 364 return; 365 } 366 367 368 /** 369 * Check the content of a given URL (referer), if the requested URI (with different hostname variations) 370 * is present. 371 * 372 * @todo Use DB cache to avoid checking the same page again and again! (Plugin DB table) 373 * 374 * @param string 375 * @param string URI to append to matching pattern for hostnames 376 * @return boolean 377 */ 378 function is_referer_linking_us( $referer, $uri ) 379 { 380 global $misc_inc_path, $lib_subdir, $ReqHost; 381 382 if( empty($referer) ) 383 { 384 return false; 385 } 386 387 // Load page content (max. 500kb), using fsockopen: 388 $url_parsed = parse_url($referer); 389 if( empty($url_parsed['scheme']) ) { 390 $url_parsed = parse_url('http://'.$referer); 391 } 392 393 $host = $url_parsed['host']; 394 $port = ( empty($url_parsed['port']) ? 80 : $url_parsed['port'] ); 395 $path = empty($url_parsed['path']) ? '/' : $url_parsed['path']; 396 if( ! empty($url_parsed['query']) ) 397 { 398 $path .= '?'.$url_parsed['query']; 399 } 400 401 $fp = @fsockopen($host, $port, $errno, $errstr, 30); 402 if( ! $fp ) 403 { // could not access referring page 404 $this->debug_log( 'is_referer_linking_us(): could not access «'.$referer.'» (host: '.$host.'): '.$errstr.' (#'.$errno.')' ); 405 return false; 406 } 407 408 // Set timeout for data: 409 if( function_exists('stream_set_timeout') ) 410 stream_set_timeout( $fp, 20 ); // PHP 4.3.0 411 else 412 socket_set_timeout( $fp, 20 ); // PHP 4 413 414 // Send request: 415 $out = "GET $path HTTP/1.0\r\n"; 416 $out .= "Host: $host:$port\r\n"; 417 $out .= "Connection: Close\r\n\r\n"; 418 fwrite($fp, $out); 419 420 // Skip headers: 421 $i = 0; 422 $source_charset = 'iso-8859-1'; // default 423 while( ($s = fgets($fp, 4096)) !== false ) 424 { 425 $i++; 426 if( $s == "\r\n" || $i > 100 /* max 100 head lines */ ) 427 { 428 break; 429 } 430 if( preg_match('~^Content-Type:.*?charset=([\w-]+)~i', $s, $match ) ) 431 { 432 $source_charset = $match[1]; 433 } 434 } 435 436 // Get the refering page's content 437 $content_ref_page = ''; 438 $bytes_read = 0; 439 while( ($s = fgets($fp, 4096)) !== false ) 440 { 441 $content_ref_page .= $s; 442 $bytes_read += strlen($s); 443 if( $bytes_read > 512000 ) 444 { // do not pull more than 500kb of data! 445 break; 446 } 447 } 448 fclose($fp); 449 450 if( ! strlen($content_ref_page) ) 451 { 452 $this->debug_log( 'is_referer_linking_us(): empty $content_ref_page ('.bytesreadable($bytes_read).' read)' ); 453 return false; 454 } 455 456 457 /** 458 * IDNA converter class 459 */ 460 load_funcs('_ext/idna/_idna_convert.class.php'); 461 $IDNA = new Net_IDNA_php4(); 462 463 $have_idn_name = false; 464 465 // Build the search pattern: 466 // We match for basically for 'href="[SERVER][URI]', where [SERVER] is a list of possible hosts (especially IDNA) 467 $search_pattern = '~\shref=["\']?https?://('; 468 $possible_hosts = array( $_SERVER['HTTP_HOST'] ); 469 if( $_SERVER['SERVER_NAME'] != $_SERVER['HTTP_HOST'] ) 470 { 471 $possible_hosts[] = $_SERVER['SERVER_NAME']; 472 } 473 $search_pattern_hosts = array(); 474 foreach( $possible_hosts as $l_host ) 475 { 476 if( preg_match( '~^([^.]+\.)(.*?)([^.]+\.[^.]+)$~', $l_host, $match ) ) 477 { // we have subdomains in this hostname 478 if( stristr( $match[1], 'www' ) ) 479 { // search also for hostname without 'www.' 480 $search_pattern_hosts[] = $match[2].$match[3]; 481 } 482 } 483 $search_pattern_hosts[] = $l_host; 484 } 485 $search_pattern_hosts = array_unique($search_pattern_hosts); 486 foreach( $search_pattern_hosts as $l_host ) 487 { // add IDN, because this could be linked: 488 $l_idn_host = $IDNA->decode( $l_host ); // the decoded puny-code ("xn--..") name (utf8) 489 490 if( $l_idn_host != $l_host ) 491 { 492 $have_idn_name = true; 493 $search_pattern_hosts[] = $l_idn_host; 494 } 495 } 496 497 // add hosts to pattern, preg_quoted 498 for( $i = 0, $n = count($search_pattern_hosts); $i < $n; $i++ ) 499 { 500 $search_pattern_hosts[$i] = preg_quote( $search_pattern_hosts[$i], '~' ); 501 } 502 $search_pattern .= implode( '|', $search_pattern_hosts ).')'; 503 if( empty($uri) ) 504 { // host(s) should end with "/", "'", '"', "?" or whitespace 505 $search_pattern .= '[/"\'\s?]'; 506 } 507 else 508 { 509 $search_pattern .= preg_quote($uri, '~'); 510 // URI should end with "'", '"' or whitespace 511 $search_pattern .= '["\'\s]'; 512 } 513 $search_pattern .= '~i'; 514 515 if( $have_idn_name ) 516 { // Convert charset to UTF-8, because the decoded domain name is UTF-8, too: 517 if( can_convert_charsets( 'utf-8', $source_charset ) ) 518 { 519 $content_ref_page = convert_charset( $content_ref_page, 'utf-8', $source_charset ); 520 } 521 else 522 { 523 $this->debug_log( 'is_referer_linking_us(): warning: cannot convert charset of referring page' ); 524 } 525 } 526 527 if( preg_match( $search_pattern, $content_ref_page ) ) 528 { 529 $this->debug_log( 'is_referer_linking_us(): found current URL in page ('.bytesreadable($bytes_read).' read)' ); 530 531 return true; 532 } 533 else 534 { 535 if( strpos( $referer, $ReqHost ) === 0 && ! empty($uri) ) 536 { // Referer is the same host.. just search for $uri 537 if( strpos( $content_ref_page, $uri ) !== false ) 538 { 539 $this->debug_log( 'is_referer_linking_us(): found current URI in page ('.bytesreadable($bytes_read).' read)' ); 540 541 return true; 542 } 543 } 544 $this->debug_log( 'is_referer_linking_us(): '.sprintf('did not find «%s» in «%s» (%s bytes read).', $search_pattern, $referer, bytesreadable($bytes_read) ) ); 545 546 return false; 547 } 548 } 549 550 551 /** 552 * Simple check for duplicate comment/content from same author 553 * 554 * @param Comment 555 */ 556 function is_duplicate_comment( $Comment ) 557 { 558 global $DB; 559 560 if( ! $this->Settings->get('check_dupes') ) 561 { 562 return false; 563 } 564 565 $sql = ' 566 SELECT comment_ID 567 FROM T_comments 568 WHERE comment_post_ID = '.$Comment->item_ID; 569 570 if( isset($Comment->author_user_ID) ) 571 { // registered user: 572 $sql .= ' AND comment_author_ID = '.$Comment->author_user_ID; 573 } 574 else 575 { // visitor (also trackback): 576 $sql_ors = array(); 577 if( ! empty($Comment->author) ) 578 { 579 $sql_ors[] = 'comment_author = '.$DB->quote($Comment->author); 580 } 581 if( ! empty($Comment->author_email) ) 582 { 583 $sql_ors[] = 'comment_author_email = '.$DB->quote($Comment->author_email); 584 } 585 if( ! empty($Comment->author_url) ) 586 { 587 $sql_ors[] = 'comment_author_url = '.$DB->quote($Comment->author_url); 588 } 589 590 if( ! empty($sql_ors) ) 591 { 592 $sql .= ' AND ( '.implode( ' OR ', $sql_ors ).' )'; 593 } 594 } 595 596 $sql .= ' AND comment_content = '.$DB->quote($Comment->content).' LIMIT 1'; 597 598 return $DB->get_var( $sql, 0, 0, 'Checking for duplicate feedback content.' ); 599 } 600 601 602 /** 603 * A little housekeeping. 604 * @return true 605 */ 606 function PluginVersionChanged( & $params ) 607 { 608 $this->Settings->delete('check_url_trackbacks'); 609 $this->Settings->dbupdate(); 610 return true; 611 } 612 613 } 614 615 616 /* 617 * $Log: _basic_antispam.plugin.php,v $ 618 * Revision 1.30 2007/06/25 11:02:32 fplanque 619 * MODULES (refactored MVC) 620 * 621 * Revision 1.29 2007/04/26 00:11:05 fplanque 622 * (c) 2007 623 * 624 * Revision 1.28 2007/04/20 02:53:13 fplanque 625 * limited number of installs 626 * 627 * Revision 1.27 2007/01/30 19:55:04 blueyed 628 * Return explictly true in PluginVersionChanged 629 * 630 * Revision 1.26 2006/12/26 03:19:12 fplanque 631 * assigned a few significant plugin groups 632 * 633 * Revision 1.25 2006/12/21 16:14:25 blueyed 634 * Basic Antispam Plugin: 635 * - Use fsockopen instead of url fopen to get refering page contents 636 * - Removed "check_url_trackbacks" setting: it has been unreliable and is against the trackback specs anyway. This is what pingbacks are for. 637 * - Convert charset of the refering page contents, if we have a decoded/utf-8 encoded IDN 638 * - Some improvements to matching pattern 639 * 640 * Revision 1.24 2006/11/24 18:27:27 blueyed 641 * Fixed link to b2evo CVS browsing interface in file docblocks 642 * 643 * Revision 1.23 2006/07/10 20:19:31 blueyed 644 * Fixed PluginInit behaviour. It now gets called on both installed and non-installed Plugins, but with the "is_installed" param appropriately set. 645 * 646 * Revision 1.22 2006/07/07 21:26:49 blueyed 647 * Bumped to 1.9-dev 648 * 649 * Revision 1.21 2006/07/07 19:28:32 blueyed 650 * Trans fix. "%" would need to be escaped.. :/ 651 * 652 * Revision 1.20 2006/06/22 19:47:06 blueyed 653 * "Block spam referers" as global option 654 * 655 * Revision 1.19 2006/06/16 21:30:57 fplanque 656 * Started clean numbering of plugin versions (feel free do add dots...) 657 * 658 * Revision 1.18 2006/06/05 17:45:06 blueyed 659 * Disable events at settings time, according to Settings checkboxes. 660 * 661 * Revision 1.17 2006/06/01 18:36:10 fplanque 662 * no message 663 * 664 * Revision 1.16 2006/05/30 21:25:27 blueyed 665 * todo-question 666 * 667 * Revision 1.15 2006/05/30 20:32:57 blueyed 668 * Lazy-instantiate "expensive" properties of Comment and Item. 669 * 670 * Revision 1.14 2006/05/30 19:39:56 fplanque 671 * plugin cleanup 672 * 673 * Revision 1.13 2006/05/30 00:18:29 blueyed 674 * http://dev.b2evolution.net/todo.php?p=87686 675 * 676 * Revision 1.12 2006/05/29 21:13:19 fplanque 677 * no message 678 * 679 * Revision 1.11 2006/05/29 21:03:07 fplanque 680 * Also count links if < tags have been filtered before! 681 * 682 * Revision 1.10 2006/05/20 01:56:07 blueyed 683 * ItemCanComment hook; "disable anonymous feedback" through basic antispam plugin 684 * 685 * Revision 1.9 2006/05/14 16:30:37 blueyed 686 * SQL error fixed with empty visitor comments 687 * 688 * Revision 1.8 2006/05/12 21:35:24 blueyed 689 * Apply karma by number of links in a comment. Note: currently the default is to not allow A tags in comments! 690 * 691 * Revision 1.7 2006/05/02 22:43:39 blueyed 692 * typo 693 * 694 * Revision 1.6 2006/05/02 15:32:01 blueyed 695 * Moved blocking of "spam referers" into basic antispam plugin: does not block backoffice requests in general and can be easily get disabled. 696 * 697 * Revision 1.5 2006/05/02 04:36:25 blueyed 698 * Spam karma changed (-100..100 instead of abs/max); Spam weight for plugins; publish/delete threshold 699 * 700 * Revision 1.4 2006/05/02 01:27:55 blueyed 701 * Moved nofollow handling to basic antispam plugin; added Filter events to Comment class 702 * 703 * Revision 1.3 2006/05/01 05:20:38 blueyed 704 * Check for duplicate content in comments/trackback. 705 * 706 * Revision 1.2 2006/05/01 04:25:07 blueyed 707 * Normalization 708 * 709 * Revision 1.1 2006/04/29 23:11:23 blueyed 710 * Added basic_antispam_plugin; Moved double-check-referers there; added check, if trackback links to us 711 * 712 */ 713 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Thu Nov 29 23:58:50 2007 | par Balluche grâce à PHPXref 0.7 |
![]() |