[ Index ] |
|
Code source de Drupal 5.3 |
1 <?php 2 // $Id: search.module,v 1.209.2.5 2007/07/26 19:16:48 drumm Exp $ 3 4 /** 5 * @file 6 * Enables site-wide keyword searching. 7 */ 8 9 /** 10 * Matches Unicode character classes to exclude from the search index. 11 * 12 * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values 13 * 14 * The index only contains the following character classes: 15 * Lu Letter, Uppercase 16 * Ll Letter, Lowercase 17 * Lt Letter, Titlecase 18 * Lo Letter, Other 19 * Nd Number, Decimal Digit 20 * No Number, Other 21 */ 22 define('PREG_CLASS_SEARCH_EXCLUDE', 23 '\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'. 24 '\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'. 25 '\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'. 26 '\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'. 27 '\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'. 28 '\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'. 29 '\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'. 30 '\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'. 31 '\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'. 32 '\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'. 33 '\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'. 34 '\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'. 35 '\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'. 36 '\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'. 37 '\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'. 38 '\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'. 39 '\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'. 40 '\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'. 41 '\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'. 42 '\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'. 43 '\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'. 44 '\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'. 45 '\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'. 46 '\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'. 47 '\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'. 48 '\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'. 49 '\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}'); 50 51 /** 52 * Matches all 'N' Unicode character classes (numbers) 53 */ 54 define('PREG_CLASS_NUMBERS', 55 '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'. 56 '\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'. 57 '\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'. 58 '\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'. 59 '\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'. 60 '\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'. 61 '\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'. 62 '\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'. 63 '\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}'); 64 65 /** 66 * Matches all 'P' Unicode character classes (punctuation) 67 */ 68 define('PREG_CLASS_PUNCTUATION', 69 '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'. 70 '\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'. 71 '\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'. 72 '\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'. 73 '\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'. 74 '\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'. 75 '\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'. 76 '\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'. 77 '\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'. 78 '\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'. 79 '\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'. 80 '\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'. 81 '\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'. 82 '\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'. 83 '\x{ff65}'); 84 85 /** 86 * Matches all CJK characters that are candidates for auto-splitting 87 * (Chinese, Japanese, Korean). 88 * Contains kana and BMP ideographs. 89 */ 90 define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'. 91 '\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}'); 92 93 /** 94 * Implementation of hook_help(). 95 */ 96 function search_help($section) { 97 switch ($section) { 98 case 'admin/help#search': 99 $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site. Search is useful for finding users and posts by searching on keywords.') .'</p>'; 100 $output .= '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. It indexes the posts and users. You can adjust the settings to tweak the indexing behaviour. Note that the search requires cron to be set up correctly. The index percentage sets the maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.') .'</p>'; 101 $output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@search">Search page</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>'; 102 return $output; 103 case 'admin/settings/search': 104 return '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.') .'</p>'; 105 case 'search#noresults': 106 return t('<ul> 107 <li>Check if your spelling is correct.</li> 108 <li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li> 109 <li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li> 110 </ul>'); 111 } 112 } 113 114 /** 115 * Implementation of hook_perm(). 116 */ 117 function search_perm() { 118 return array('search content', 'use advanced search', 'administer search'); 119 } 120 121 /** 122 * Implementation of hook_block(). 123 */ 124 function search_block($op = 'list', $delta = 0) { 125 if ($op == 'list') { 126 $blocks[0]['info'] = t('Search form'); 127 return $blocks; 128 } 129 else if ($op == 'view' && user_access('search content')) { 130 $block['content'] = drupal_get_form('search_block_form'); 131 $block['subject'] = t('Search'); 132 return $block; 133 } 134 } 135 136 /** 137 * Implementation of hook_menu(). 138 */ 139 function search_menu($may_cache) { 140 $items = array(); 141 142 if ($may_cache) { 143 $items[] = array('path' => 'search', 144 'title' => t('Search'), 145 'callback' => 'search_view', 146 'access' => user_access('search content'), 147 'type' => MENU_SUGGESTED_ITEM); 148 $items[] = array('path' => 'admin/settings/search', 149 'title' => t('Search settings'), 150 'description' => t('Configure relevance settings for search and other indexing options'), 151 'callback' => 'drupal_get_form', 152 'callback arguments' => array('search_admin_settings'), 153 'access' => user_access('administer search'), 154 'type' => MENU_NORMAL_ITEM); 155 $items[] = array('path' => 'admin/settings/search/wipe', 156 'title' => t('Clear index'), 157 'callback' => 'drupal_get_form', 158 'callback arguments' => array('search_wipe_confirm'), 159 'access' => user_access('administer search'), 160 'type' => MENU_CALLBACK); 161 $items[] = array('path' => 'admin/logs/search', 'title' => t('Top search phrases'), 162 'description' => t('View most popular search phrases.'), 163 'callback' => 'watchdog_top', 164 'callback arguments' => array('search')); 165 } 166 else if (arg(0) == 'search') { 167 // To remember the user's search keywords when switching across tabs, 168 // we dynamically add the keywords to the search tabs' paths. 169 $keys = search_get_keys(); 170 $keys = strlen($keys) ? '/'. $keys : ''; 171 foreach (module_list() as $name) { 172 if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) { 173 $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title, 174 'callback' => 'search_view', 175 'access' => user_access('search content'), 176 'type' => MENU_LOCAL_TASK); 177 } 178 } 179 } 180 181 return $items; 182 } 183 184 /** 185 * Validate callback. 186 */ 187 function search_admin_settings_validate($form_id, $form_values) { 188 if ($form_values['op'] == t('Re-index site')) { 189 drupal_goto('admin/settings/search/wipe'); 190 } 191 // If these settings change, the index needs to be rebuilt. 192 if ((variable_get('minimum_word_size', 3) != $form_values['minimum_word_size']) || 193 (variable_get('overlap_cjk', TRUE) != $form_values['overlap_cjk'])) { 194 drupal_set_message(t('The index will be rebuilt.')); 195 search_wipe(); 196 } 197 } 198 199 /** 200 * Menu callback; displays the search module settings page. 201 */ 202 function search_admin_settings() { 203 // Collect some stats 204 $remaining = 0; 205 $total = 0; 206 foreach (module_list() as $module) { 207 if (module_hook($module, 'search')) { 208 $status = module_invoke($module, 'search', 'status'); 209 $remaining += $status['remaining']; 210 $total += $status['total']; 211 } 212 } 213 $count = format_plural($remaining, 'There is 1 item left to index.', 'There are @count items left to index.'); 214 $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) .'%'; 215 $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>'; 216 $form['status'] = array('#type' => 'fieldset', '#title' => t('Indexing status')); 217 $form['status']['status'] = array('#value' => $status); 218 $form['status']['wipe'] = array('#type' => 'submit', '#value' => t('Re-index site')); 219 220 $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500)); 221 222 // Indexing throttle: 223 $form['indexing_throttle'] = array('#type' => 'fieldset', '#title' => t('Indexing throttle')); 224 $form['indexing_throttle']['search_cron_limit'] = array('#type' => 'select', '#title' => t('Items to index per cron run'), '#default_value' => variable_get('search_cron_limit', 100), '#options' => $items, '#description' => t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.')); 225 // Indexing settings: 226 $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings')); 227 $form['indexing_settings']['info'] = array('#value' => '<em>'. t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>'); 228 $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).')); 229 $form['indexing_settings']['overlap_cjk'] = array('#type' => 'checkbox', '#title' => t('Simple CJK handling'), '#default_value' => variable_get('overlap_cjk', TRUE), '#description' => t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.')); 230 231 // Per module settings 232 $form = array_merge($form, module_invoke_all('search', 'admin')); 233 234 return system_settings_form($form); 235 } 236 237 /** 238 * Menu callback: confirm wiping of the index. 239 */ 240 function search_wipe_confirm() { 241 return confirm_form(array(), t('Are you sure you want to re-index the site?'), 242 'admin/settings/search', t(' The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed. This action cannot be undone.'), t('Re-index site'), t('Cancel')); 243 } 244 245 /** 246 * Handler for wipe confirmation 247 */ 248 function search_wipe_confirm_submit($form_id, &$form) { 249 if ($form['confirm']) { 250 search_wipe(); 251 drupal_set_message(t('The index will be rebuilt.')); 252 return 'admin/settings/search'; 253 } 254 } 255 256 /** 257 * Wipes a part of or the entire search index. 258 * 259 * @param $sid 260 * (optional) The SID of the item to wipe. If specified, $type must be passed 261 * too. 262 * @param $type 263 * (optional) The type of item to wipe. 264 */ 265 function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) { 266 if ($type == NULL && $sid == NULL) { 267 module_invoke_all('search', 'reset'); 268 } 269 else { 270 db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type); 271 db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type); 272 // When re-indexing, keep link references 273 db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type); 274 } 275 } 276 277 /** 278 * Marks a word as dirty (or retrieves the list of dirty words). This is used 279 * during indexing (cron). Words which are dirty have outdated total counts in 280 * the search_total table, and need to be recounted. 281 */ 282 function search_dirty($word = NULL) { 283 static $dirty = array(); 284 if ($word !== NULL) { 285 $dirty[$word] = TRUE; 286 } 287 else { 288 return $dirty; 289 } 290 } 291 292 /** 293 * Implementation of hook_cron(). 294 * 295 * Fires hook_update_index() in all modules and cleans up dirty words (see 296 * search_dirty). 297 */ 298 function search_cron() { 299 // We register a shutdown function to ensure that search_total is always up 300 // to date. 301 register_shutdown_function('search_update_totals'); 302 303 // Update word index 304 foreach (module_list() as $module) { 305 module_invoke($module, 'update_index'); 306 } 307 } 308 309 /** 310 * This function is called on shutdown to ensure that search_total is always 311 * up to date (even if cron times out or otherwise fails). 312 */ 313 function search_update_totals() { 314 // Update word IDF (Inverse Document Frequency) counts for new/changed words 315 foreach (search_dirty() as $word => $dummy) { 316 // Get total count 317 $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word)); 318 // Apply Zipf's law to equalize the probability distribution 319 $total = log10(1 + 1/(max(1, $total))); 320 db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word); 321 if (!db_affected_rows()) { 322 db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total); 323 } 324 } 325 // Find words that were deleted from search_index, but are still in 326 // search_total. We use a LEFT JOIN between the two tables and keep only the 327 // rows which fail to join. 328 $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL"); 329 while ($word = db_fetch_object($result)) { 330 db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword); 331 } 332 } 333 334 /** 335 * Simplifies a string according to indexing rules. 336 */ 337 function search_simplify($text) { 338 // Decode entities to UTF-8 339 $text = decode_entities($text); 340 341 // Lowercase 342 $text = drupal_strtolower($text); 343 344 // Call an external processor for word handling. 345 search_preprocess($text); 346 347 // Simple CJK handling 348 if (variable_get('overlap_cjk', TRUE)) { 349 $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text); 350 } 351 352 // To improve searching for numerical data such as dates, IP addresses 353 // or version numbers, we consider a group of numerical characters 354 // separated only by punctuation characters to be one piece. 355 // This also means that searching for e.g. '20/03/1984' also returns 356 // results with '20-03-1984' in them. 357 // Readable regexp: ([number]+)[punctuation]+(?=[number]) 358 $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text); 359 360 // The dot, underscore and dash are simply removed. This allows meaningful 361 // search behaviour with acronyms and URLs. 362 $text = preg_replace('/[._-]+/', '', $text); 363 364 // With the exception of the rules above, we consider all punctuation, 365 // marks, spacers, etc, to be a word boundary. 366 $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text); 367 368 return $text; 369 } 370 371 /** 372 * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping 373 * sequences of characters ('minimum_word_size' long). 374 */ 375 function search_expand_cjk($matches) { 376 $min = variable_get('minimum_word_size', 3); 377 $str = $matches[0]; 378 $l = drupal_strlen($str); 379 // Passthrough short words 380 if ($l <= $min) { 381 return ' '. $str .' '; 382 } 383 $tokens = ' '; 384 // FIFO queue of characters 385 $chars = array(); 386 // Begin loop 387 for ($i = 0; $i < $l; ++$i) { 388 // Grab next character 389 $current = drupal_substr($str, 0, 1); 390 $str = substr($str, strlen($current)); 391 $chars[] = $current; 392 if ($i >= $min - 1) { 393 $tokens .= implode('', $chars) .' '; 394 array_shift($chars); 395 } 396 } 397 return $tokens; 398 } 399 400 /** 401 * Splits a string into tokens for indexing. 402 */ 403 function search_index_split($text) { 404 static $last = NULL; 405 static $lastsplit = NULL; 406 407 if ($last == $text) { 408 return $lastsplit; 409 } 410 // Process words 411 $text = search_simplify($text); 412 $words = explode(' ', $text); 413 array_walk($words, '_search_index_truncate'); 414 415 // Save last keyword result 416 $last = $text; 417 $lastsplit = $words; 418 419 return $words; 420 } 421 422 /** 423 * Helper function for array_walk in search_index_split. 424 */ 425 function _search_index_truncate(&$text) { 426 $text = truncate_utf8($text, 50); 427 } 428 429 /** 430 * Invokes hook_search_preprocess() in modules. 431 */ 432 function search_preprocess(&$text) { 433 foreach (module_implements('search_preprocess') as $module) { 434 $text = module_invoke($module, 'search_preprocess', $text); 435 } 436 } 437 438 /** 439 * Update the full-text search index for a particular item. 440 * 441 * @param $sid 442 * A number identifying this particular item (e.g. node id). 443 * 444 * @param $type 445 * A string defining this type of item (e.g. 'node') 446 * 447 * @param $text 448 * The content of this item. Must be a piece of HTML text. 449 * 450 * @ingroup search 451 */ 452 function search_index($sid, $type, $text) { 453 $minimum_word_size = variable_get('minimum_word_size', 3); 454 455 // Link matching 456 global $base_url; 457 $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i'; 458 459 // Multipliers for scores of words inside certain HTML tags. 460 // Note: 'a' must be included for link ranking to work. 461 $tags = array('h1' => 25, 462 'h2' => 18, 463 'h3' => 15, 464 'h4' => 12, 465 'h5' => 9, 466 'h6' => 6, 467 'u' => 3, 468 'b' => 3, 469 'i' => 3, 470 'strong' => 3, 471 'em' => 3, 472 'a' => 10); 473 474 // Strip off all ignored tags to speed up processing, but insert space before/after 475 // them to keep word boundaries. 476 $text = str_replace(array('<', '>'), array(' <', '> '), $text); 477 $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>'); 478 479 // Split HTML tags from plain text. 480 $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE); 481 // Note: PHP ensures the array consists of alternating delimiters and literals 482 // and begins and ends with a literal (inserting $null as required). 483 484 $tag = FALSE; // Odd/even counter. Tag or no tag. 485 $link = FALSE; // State variable for link analyser 486 $score = 1; // Starting score per word 487 $accum = ' '; // Accumulator for cleaned up data 488 $tagstack = array(); // Stack with open tags 489 $tagwords = 0; // Counter for consecutive words 490 $focus = 1; // Focus state 491 492 $results = array(0 => array()); // Accumulator for words for index 493 494 foreach ($split as $value) { 495 if ($tag) { 496 // Increase or decrease score per word based on tag 497 list($tagname) = explode(' ', $value, 2); 498 $tagname = drupal_strtolower($tagname); 499 // Closing or opening tag? 500 if ($tagname[0] == '/') { 501 $tagname = substr($tagname, 1); 502 // If we encounter unexpected tags, reset score to avoid incorrect boosting. 503 if (!count($tagstack) || $tagstack[0] != $tagname) { 504 $tagstack = array(); 505 $score = 1; 506 } 507 else { 508 // Remove from tag stack and decrement score 509 $score = max(1, $score - $tags[array_shift($tagstack)]); 510 } 511 if ($tagname == 'a') { 512 $link = FALSE; 513 } 514 } 515 else { 516 if ($tagstack[0] == $tagname) { 517 // None of the tags we look for make sense when nested identically. 518 // If they are, it's probably broken HTML. 519 $tagstack = array(); 520 $score = 1; 521 } 522 else { 523 // Add to open tag stack and increment score 524 array_unshift($tagstack, $tagname); 525 $score += $tags[$tagname]; 526 } 527 if ($tagname == 'a') { 528 // Check if link points to a node on this site 529 if (preg_match($node_regexp, $value, $match)) { 530 $path = drupal_get_normal_path($match[1]); 531 if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) { 532 $linknid = $match[1]; 533 if ($linknid > 0) { 534 // Note: ignore links to uncachable nodes to avoid redirect bugs. 535 $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid)); 536 if (filter_format_allowcache($node->format)) { 537 $link = TRUE; 538 $linktitle = $node->title; 539 } 540 } 541 } 542 } 543 } 544 } 545 // A tag change occurred, reset counter. 546 $tagwords = 0; 547 } 548 else { 549 // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values 550 if ($value != '') { 551 if ($link) { 552 // Check to see if the node link text is its URL. If so, we use the target node title instead. 553 if (preg_match('!^https?://!i', $value)) { 554 $value = $linktitle; 555 } 556 } 557 $words = search_index_split($value); 558 foreach ($words as $word) { 559 // Add word to accumulator 560 $accum .= $word .' '; 561 $num = is_numeric($word); 562 // Check wordlength 563 if ($num || drupal_strlen($word) >= $minimum_word_size) { 564 // Normalize numbers 565 if ($num) { 566 $word = (int)ltrim($word, '-0'); 567 } 568 569 if ($link) { 570 if (!isset($results[$linknid])) { 571 $results[$linknid] = array(); 572 } 573 $results[$linknid][$word] += $score * $focus; 574 } 575 else { 576 $results[0][$word] += $score * $focus; 577 // Focus is a decaying value in terms of the amount of unique words up to this point. 578 // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words. 579 $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015)); 580 } 581 } 582 $tagwords++; 583 // Too many words inside a single tag probably mean a tag was accidentally left open. 584 if (count($tagstack) && $tagwords >= 15) { 585 $tagstack = array(); 586 $score = 1; 587 } 588 } 589 } 590 } 591 $tag = !$tag; 592 } 593 594 search_wipe($sid, $type, TRUE); 595 596 // Insert cleaned up data into dataset 597 db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum); 598 599 // Insert results into search index 600 foreach ($results[0] as $word => $score) { 601 db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score); 602 search_dirty($word); 603 } 604 unset($results[0]); 605 606 // Now insert links to nodes 607 foreach ($results as $nid => $words) { 608 foreach ($words as $word => $score) { 609 db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score); 610 search_dirty($word); 611 } 612 } 613 } 614 615 /** 616 * Extract a module-specific search option from a search query. e.g. 'type:book' 617 */ 618 function search_query_extract($keys, $option) { 619 if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) { 620 return $matches[2]; 621 } 622 } 623 624 /** 625 * Return a query with the given module-specific search option inserted in. 626 * e.g. 'type:book'. 627 */ 628 function search_query_insert($keys, $option, $value = '') { 629 if (search_query_extract($keys, $option)) { 630 $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys)); 631 } 632 if ($value != '') { 633 $keys .= ' '. $option .':'. $value; 634 } 635 return $keys; 636 } 637 638 /** 639 * Parse a search query into SQL conditions. 640 * 641 * We build a query that matches the dataset bodies. 642 */ 643 function search_parse_query($text) { 644 $keys = array('positive' => array(), 'negative' => array()); 645 646 // Tokenize query string 647 preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER); 648 649 if (count($matches) < 1) { 650 return NULL; 651 } 652 653 // Classify tokens 654 $or = FALSE; 655 foreach ($matches as $match) { 656 $phrase = FALSE; 657 // Strip off phrase quotes 658 if ($match[2]{0} == '"') { 659 $match[2] = substr($match[2], 1, -1); 660 $phrase = TRUE; 661 } 662 // Simplify keyword according to indexing rules and external preprocessors 663 $words = search_simplify($match[2]); 664 // Re-explode in case simplification added more words, except when matching a phrase 665 $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY); 666 // Negative matches 667 if ($match[1] == '-') { 668 $keys['negative'] = array_merge($keys['negative'], $words); 669 } 670 // OR operator: instead of a single keyword, we store an array of all 671 // OR'd keywords. 672 elseif ($match[2] == 'OR' && count($keys['positive'])) { 673 $last = array_pop($keys['positive']); 674 // Starting a new OR? 675 if (!is_array($last)) { 676 $last = array($last); 677 } 678 $keys['positive'][] = $last; 679 $or = TRUE; 680 continue; 681 } 682 // Plain keyword 683 else { 684 if ($or) { 685 // Add to last element (which is an array) 686 $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words); 687 } 688 else { 689 $keys['positive'] = array_merge($keys['positive'], $words); 690 } 691 } 692 $or = FALSE; 693 } 694 695 // Convert keywords into SQL statements. 696 $query = array(); 697 $query2 = array(); 698 $arguments = array(); 699 $arguments2 = array(); 700 $matches = 0; 701 // Positive matches 702 foreach ($keys['positive'] as $key) { 703 // Group of ORed terms 704 if (is_array($key) && count($key)) { 705 $queryor = array(); 706 $any = FALSE; 707 foreach ($key as $or) { 708 list($q, $count) = _search_parse_query($or, $arguments2); 709 $any |= $count; 710 if ($q) { 711 $queryor[] = $q; 712 $arguments[] = $or; 713 } 714 } 715 if (count($queryor)) { 716 $query[] = '('. implode(' OR ', $queryor) .')'; 717 // A group of OR keywords only needs to match once 718 $matches += ($any > 0); 719 } 720 } 721 // Single ANDed term 722 else { 723 list($q, $count) = _search_parse_query($key, $arguments2); 724 if ($q) { 725 $query[] = $q; 726 $arguments[] = $key; 727 // Each AND keyword needs to match at least once 728 $matches += $count; 729 } 730 } 731 } 732 // Negative matches 733 foreach ($keys['negative'] as $key) { 734 list($q) = _search_parse_query($key, $arguments2, TRUE); 735 if ($q) { 736 $query[] = $q; 737 $arguments[] = $key; 738 } 739 } 740 $query = implode(' AND ', $query); 741 742 // Build word-index conditions for the first pass 743 $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4); 744 745 return array($query, $arguments, $query2, $arguments2, $matches); 746 } 747 748 /** 749 * Helper function for search_parse_query(); 750 */ 751 function _search_parse_query(&$word, &$scores, $not = FALSE) { 752 $count = 0; 753 // Determine the scorewords of this word/phrase 754 if (!$not) { 755 $split = explode(' ', $word); 756 foreach ($split as $s) { 757 $num = is_numeric($s); 758 if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) { 759 $s = $num ? ((int)ltrim($s, '-0')) : $s; 760 if (!isset($scores[$s])) { 761 $scores[$s] = $s; 762 $count++; 763 } 764 } 765 } 766 } 767 // Return matching snippet and number of added words 768 return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count); 769 } 770 771 /** 772 * Do a query on the full-text search index for a word or words. 773 * 774 * This function is normally only called by each module that support the 775 * indexed search (and thus, implements hook_update_index()). 776 * 777 * Two queries are performed which can be extended by the caller. 778 * 779 * The first query selects a set of possible matches based on the search index 780 * and any extra given restrictions. This is the classic "OR" search. 781 * 782 * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance 783 * FROM {search_index} i 784 * INNER JOIN {search_total} t ON i.word = t.word 785 * $join1 786 * WHERE $where1 AND (...) 787 * GROUP BY i.type, i.sid 788 * 789 * The second query further refines this set by verifying advanced text 790 * conditions (such as AND, negative or phrase matches), and orders the results 791 * on a the column or expression 'score': 792 * 793 * SELECT i.type, i.sid, $select2 794 * FROM temp_search_sids i 795 * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type 796 * $join2 797 * WHERE (...) 798 * ORDER BY score DESC 799 * 800 * @param $keywords 801 * A search string as entered by the user. 802 * 803 * @param $type 804 * A string identifying the calling module. 805 * 806 * @param $join1 807 * (optional) Inserted into the JOIN part of the first SQL query. 808 * For example "INNER JOIN {node} n ON n.nid = i.sid". 809 * 810 * @param $where1 811 * (optional) Inserted into the WHERE part of the first SQL query. 812 * For example "(n.status > %d)". 813 * 814 * @param $arguments1 815 * (optional) Extra SQL arguments belonging to the first query. 816 * 817 * @param $select2 818 * (optional) Inserted into the SELECT pat of the second query. Must contain 819 * a column selected as 'score'. 820 * defaults to 'i.relevance AS score' 821 * 822 * @param $join2 823 * (optional) Inserted into the JOIN par of the second SQL query. 824 * For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid" 825 * 826 * @param $arguments2 827 * (optional) Extra SQL arguments belonging to the second query parameter. 828 * 829 * @param $sort_parameters 830 * (optional) SQL arguments for sorting the final results. 831 * Default: 'ORDER BY score DESC' 832 * 833 * @return 834 * An array of SIDs for the search results. 835 * 836 * @ingroup search 837 */ 838 function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') { 839 $query = search_parse_query($keywords); 840 841 if ($query[2] == '') { 842 form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3)))); 843 } 844 if ($query === NULL || $query[0] == '' || $query[2] == '') { 845 return array(); 846 } 847 848 // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords. 849 // 'matches' is used to reject those items that cannot possibly match the query. 850 $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'"; 851 $arguments = array_merge($arguments1, $query[3], array($type, $query[4])); 852 $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d", $arguments, 'temp_search_sids'); 853 854 // Calculate maximum relevance, to normalize it 855 $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids')); 856 if (!$normalize) { 857 return array(); 858 } 859 $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2); 860 861 // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...) 862 $conditions = '('. $query[0] .')'; 863 $arguments = array_merge($arguments2, $query[1]); 864 $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions $sort_parameters", $arguments, 'temp_search_results'); 865 if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) { 866 return array(); 867 } 868 $count_query = "SELECT $count"; 869 870 // Do actual search query 871 $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query); 872 $results = array(); 873 while ($item = db_fetch_object($result)) { 874 $results[] = $item; 875 } 876 return $results; 877 } 878 879 /** 880 * Helper function for grabbing search keys. 881 */ 882 function search_get_keys() { 883 // Extract keys as remainder of path 884 // Note: support old GET format of searches for existing links. 885 $path = explode('/', $_GET['q'], 3); 886 return count($path) == 3 ? $path[2] : $_REQUEST['keys']; 887 } 888 889 /** 890 * Menu callback; presents the search form and/or search results. 891 */ 892 function search_view() { 893 $type = arg(1); 894 895 // Search form submits with POST but redirects to GET. This way we can keep 896 // the search query URL clean as a whistle: 897 // search/type/keyword+keyword 898 if (!isset($_POST['form_id'])) { 899 if ($type == '') { 900 // Note: search/node can not be a default tab because it would take on the 901 // path of its parent (search). It would prevent remembering keywords when 902 // switching tabs. This is why we drupal_goto to it from the parent instead. 903 drupal_goto('search/node'); 904 } 905 906 $keys = search_get_keys(); 907 // Only perform search if there is non-whitespace search term: 908 if (trim($keys)) { 909 // Log the search keys: 910 watchdog('search', t('%keys (@type).', array('%keys' => $keys, '@type' => module_invoke($type, 'search', 'name'))), WATCHDOG_NOTICE, l(t('results'), 'search/'. $type .'/'. $keys)); 911 912 // Collect the search results: 913 $results = search_data($keys, $type); 914 915 if ($results) { 916 $results = theme('box', t('Search results'), $results); 917 } 918 else { 919 $results = theme('box', t('Your search yielded no results'), search_help('search#noresults')); 920 } 921 } 922 923 // Construct the search form. 924 $output = drupal_get_form('search_form', NULL, $keys, $type); 925 $output .= $results; 926 927 return $output; 928 } 929 930 return drupal_get_form('search_form', NULL, $keys, $type); 931 } 932 933 /** 934 * @defgroup search Search interface 935 * @{ 936 * The Drupal search interface manages a global search mechanism. 937 * 938 * Modules may plug into this system to provide searches of different types of 939 * data. Most of the system is handled by search.module, so this must be enabled 940 * for all of the search features to work. 941 * 942 * There are three ways to interact with the search system: 943 * - Specifically for searching nodes, you can implement nodeapi('update index') 944 * and nodeapi('search result'). However, note that the search system already 945 * indexes all visible output of a node, i.e. everything displayed normally 946 * by hook_view() and hook_nodeapi('view'). This is usually sufficient. 947 * You should only use this mechanism if you want additional, non-visible data 948 * to be indexed. 949 * - Implement hook_search(). This will create a search tab for your module on 950 * the /search page with a simple keyword search form. You may optionally 951 * implement hook_search_item() to customize the display of your results. 952 * - Implement hook_update_index(). This allows your module to use Drupal's 953 * HTML indexing mechanism for searching full text efficiently. 954 * 955 * If your module needs to provide a more complicated search form, then you need 956 * to implement it yourself without hook_search(). In that case, you should 957 * define it as a local task (tab) under the /search page (e.g. /search/mymodule) 958 * so that users can easily find it. 959 */ 960 961 /** 962 * Render a search form. 963 * 964 * @param $action 965 * Form action. Defaults to "search". 966 * @param $keys 967 * The search string entered by the user, containing keywords for the search. 968 * @param $type 969 * The type of search to render the node for. Must be the name of module 970 * which implements hook_search(). Defaults to 'node'. 971 * @param $prompt 972 * A piece of text to put before the form (e.g. "Enter your keywords") 973 * @return 974 * An HTML string containing the search form. 975 */ 976 function search_form($action = '', $keys = '', $type = NULL, $prompt = NULL) { 977 978 // Add CSS 979 drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE); 980 981 if (!$action) { 982 $action = url('search/'. $type); 983 } 984 if (is_null($prompt)) { 985 $prompt = t('Enter your keywords'); 986 } 987 988 $form = array( 989 '#action' => $action, 990 '#attributes' => array('class' => 'search-form'), 991 ); 992 $form['module'] = array('#type' => 'value', '#value' => $type); 993 $form['basic'] = array('#type' => 'item', '#title' => $prompt); 994 $form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>'); 995 $form['basic']['inline']['keys'] = array( 996 '#type' => 'textfield', 997 '#title' => '', 998 '#default_value' => $keys, 999 '#size' => $prompt ? 40 : 20, 1000 '#maxlength' => 255, 1001 ); 1002 // processed_keys is used to coordinate keyword passing between other forms 1003 // that hook into the basic search form. 1004 $form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array()); 1005 $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search')); 1006 1007 return $form; 1008 } 1009 1010 /** 1011 * As the search form collates keys from other modules hooked in via 1012 * hook_form_alter, the validation takes place in _submit. 1013 * search_form_validate() is used solely to set the 'processed_keys' form 1014 * value for the basic search form. 1015 */ 1016 function search_form_validate($form_id, $form_values, $form) { 1017 form_set_value($form['basic']['inline']['processed_keys'], trim($form_values['keys'])); 1018 } 1019 1020 /** 1021 * Process a search form submission. 1022 */ 1023 function search_form_submit($form_id, $form_values) { 1024 $keys = $form_values['processed_keys']; 1025 if ($keys == '') { 1026 form_set_error('keys', t('Please enter some keywords.')); 1027 // Fall through to the drupal_goto() call. 1028 } 1029 1030 $type = $form_values['module'] ? $form_values['module'] : 'node'; 1031 return 'search/'. $type .'/'. $keys; 1032 } 1033 1034 /** 1035 * Output a search form for the search block and the theme's search box. 1036 */ 1037 function search_box($form_id) { 1038 // Use search_keys instead of keys to avoid ID conflicts with the search block. 1039 $form[$form_id .'_keys'] = array( 1040 '#type' => 'textfield', 1041 '#size' => 15, 1042 '#default_value' => '', 1043 '#attributes' => array('title' => t('Enter the terms you wish to search for.')), 1044 ); 1045 $form['submit'] = array('#type' => 'submit', '#value' => t('Search')); 1046 // Always go to the search page since the search form is not guaranteed to be 1047 // on every page. 1048 $form['#action'] = url('search/node'); 1049 $form['#base'] = 'search_box_form'; 1050 1051 return $form; 1052 } 1053 1054 /** 1055 * Process a block search form submission. 1056 */ 1057 function search_box_form_submit($form_id, $form_values) { 1058 return 'search/node/'. trim($form_values[$form_id .'_keys']); 1059 } 1060 1061 /** 1062 * Theme the theme search form. 1063 */ 1064 function theme_search_theme_form($form) { 1065 return '<div id="search" class="container-inline">'. drupal_render($form) .'</div>'; 1066 } 1067 1068 /** 1069 * Theme the block search form. 1070 */ 1071 function theme_search_block_form($form) { 1072 return '<div class="container-inline">'. drupal_render($form) .'</div>'; 1073 } 1074 1075 /** 1076 * Perform a standard search on the given keys, and return the formatted results. 1077 */ 1078 function search_data($keys = NULL, $type = 'node') { 1079 1080 if (isset($keys)) { 1081 if (module_hook($type, 'search')) { 1082 $results = module_invoke($type, 'search', 'search', $keys); 1083 if (isset($results) && is_array($results) && count($results)) { 1084 if (module_hook($type, 'search_page')) { 1085 return module_invoke($type, 'search_page', $results); 1086 } 1087 else { 1088 return theme('search_page', $results, $type); 1089 } 1090 } 1091 } 1092 } 1093 } 1094 1095 /** 1096 * Returns snippets from a piece of text, with certain keywords highlighted. 1097 * Used for formatting search results. 1098 * 1099 * @param $keys 1100 * A string containing a search query. 1101 * 1102 * @param $text 1103 * The text to extract fragments from. 1104 * 1105 * @return 1106 * A string containing HTML for the excerpt. 1107 */ 1108 function search_excerpt($keys, $text) { 1109 // We highlight around non-indexable or CJK characters. 1110 $boundary = '(?:(?<=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .'])|(?=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .']))'; 1111 1112 // Extract positive keywords and phrases 1113 preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches); 1114 $keys = array_merge($matches[2], $matches[3]); 1115 1116 // Prepare text 1117 $text = ' '. strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) .' '; 1118 array_walk($keys, '_search_excerpt_replace'); 1119 $workkeys = $keys; 1120 1121 // Extract a fragment per keyword for at most 4 keywords. 1122 // First we collect ranges of text around each keyword, starting/ending 1123 // at spaces. 1124 // If the sum of all fragments is too short, we look for second occurrences. 1125 $ranges = array(); 1126 $included = array(); 1127 $length = 0; 1128 while ($length < 256 && count($workkeys)) { 1129 foreach ($workkeys as $k => $key) { 1130 if (strlen($key) == 0) { 1131 unset($workkeys[$k]); 1132 unset($keys[$k]); 1133 continue; 1134 } 1135 if ($length >= 256) { 1136 break; 1137 } 1138 // Remember occurrence of key so we can skip over it if more occurrences 1139 // are desired. 1140 if (!isset($included[$key])) { 1141 $included[$key] = 0; 1142 } 1143 // Locate a keyword (position $p), then locate a space in front (position 1144 // $q) and behind it (position $s) 1145 if (preg_match('/'. $boundary . $key . $boundary .'/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) { 1146 $p = $match[0][1]; 1147 if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) { 1148 $end = substr($text, $p, 80); 1149 if (($s = strrpos($end, ' ')) !== FALSE) { 1150 $ranges[$q] = $p + $s; 1151 $length += $p + $s - $q; 1152 $included[$key] = $p + 1; 1153 } 1154 else { 1155 unset($workkeys[$k]); 1156 } 1157 } 1158 else { 1159 unset($workkeys[$k]); 1160 } 1161 } 1162 else { 1163 unset($workkeys[$k]); 1164 } 1165 } 1166 } 1167 1168 // If we didn't find anything, return the beginning. 1169 if (count($ranges) == 0) { 1170 return truncate_utf8($text, 256) .' ...'; 1171 } 1172 1173 // Sort the text ranges by starting position. 1174 ksort($ranges); 1175 1176 // Now we collapse overlapping text ranges into one. The sorting makes it O(n). 1177 $newranges = array(); 1178 foreach ($ranges as $from2 => $to2) { 1179 if (!isset($from1)) { 1180 $from1 = $from2; 1181 $to1 = $to2; 1182 continue; 1183 } 1184 if ($from2 <= $to1) { 1185 $to1 = max($to1, $to2); 1186 } 1187 else { 1188 $newranges[$from1] = $to1; 1189 $from1 = $from2; 1190 $to1 = $to2; 1191 } 1192 } 1193 $newranges[$from1] = $to1; 1194 1195 // Fetch text 1196 $out = array(); 1197 foreach ($newranges as $from => $to) { 1198 $out[] = substr($text, $from, $to - $from); 1199 } 1200 $text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...'; 1201 1202 // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>'). 1203 $text = preg_replace('/'. $boundary .'('. implode('|', $keys) .')'. $boundary .'/iu', '<strong>\0</strong>', $text); 1204 return $text; 1205 } 1206 1207 /** 1208 * @} End of "defgroup search". 1209 */ 1210 1211 /** 1212 * Helper function for array_walk in search_except. 1213 */ 1214 function _search_excerpt_replace(&$text) { 1215 $text = preg_quote($text, '/'); 1216 } 1217 1218 /** 1219 * Format a single result entry of a search query. This function is normally 1220 * called by theme_search_page() or hook_search_page(). 1221 * 1222 * @param $item 1223 * A single search result as returned by hook_search(). The result should be 1224 * an array with keys "link", "title", "type", "user", "date", and "snippet". 1225 * Optionally, "extra" can be an array of extra info to show along with the 1226 * result. 1227 * @param $type 1228 * The type of item found, such as "user" or "node". 1229 * 1230 * @ingroup themeable 1231 */ 1232 function theme_search_item($item, $type) { 1233 $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>'; 1234 $info = array(); 1235 if ($item['type']) { 1236 $info[] = check_plain($item['type']); 1237 } 1238 if ($item['user']) { 1239 $info[] = $item['user']; 1240 } 1241 if ($item['date']) { 1242 $info[] = format_date($item['date'], 'small'); 1243 } 1244 if (is_array($item['extra'])) { 1245 $info = array_merge($info, $item['extra']); 1246 } 1247 $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] .'</p>' : '') .'<p class="search-info">'. implode(' - ', $info) .'</p></dd>'; 1248 return $output; 1249 } 1250 1251 /** 1252 * Format the result page of a search query. 1253 * 1254 * Modules may implement hook_search_page() in order to override this default 1255 * function to display search results. In that case it is expected they provide 1256 * their own themeable functions. 1257 * 1258 * @param $results 1259 * All search result as returned by hook_search(). 1260 * @param $type 1261 * The type of item found, such as "user" or "node". 1262 * 1263 * @ingroup themeable 1264 */ 1265 function theme_search_page($results, $type) { 1266 $output = '<dl class="search-results">'; 1267 1268 foreach ($results as $entry) { 1269 $output .= theme('search_item', $entry, $type); 1270 } 1271 $output .= '</dl>'; 1272 $output .= theme('pager', NULL, 10, 0); 1273 1274 return $output; 1275 } 1276 1277 function search_forms() { 1278 $forms['search_theme_form']= array( 1279 'callback' => 'search_box', 1280 'callback arguments' => array('search_theme_form'), 1281 ); 1282 $forms['search_block_form']= array( 1283 'callback' => 'search_box', 1284 'callback arguments' => array('search_block_form'), 1285 ); 1286 return $forms; 1287 }
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Fri Nov 30 16:20:15 2007 | par Balluche grâce à PHPXref 0.7 |
![]() |