[ Index ]
 

Code source de Drupal 5.3

Accédez au Source d'autres logiciels libres

Classes | Fonctions | Variables | Constantes | Tables

title

Body

[fermer]

/modules/search/ -> search.module (source)

   1  <?php
   2  // $Id: search.module,v 1.209.2.5 2007/07/26 19:16:48 drumm Exp $
   3  
   4  /**
   5   * @file
   6   * Enables site-wide keyword searching.
   7   */
   8  
   9  /**
  10   * Matches Unicode character classes to exclude from the search index.
  11   *
  12   * See: http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values
  13   *
  14   * The index only contains the following character classes:
  15   * Lu     Letter, Uppercase
  16   * Ll     Letter, Lowercase
  17   * Lt     Letter, Titlecase
  18   * Lo     Letter, Other
  19   * Nd     Number, Decimal Digit
  20   * No     Number, Other
  21   */
  22  define('PREG_CLASS_SEARCH_EXCLUDE',
  23  '\x{0}-\x{2f}\x{3a}-\x{40}\x{5b}-\x{60}\x{7b}-\x{bf}\x{d7}\x{f7}\x{2b0}-'.
  24  '\x{385}\x{387}\x{3f6}\x{482}-\x{489}\x{559}-\x{55f}\x{589}-\x{5c7}\x{5f3}-'.
  25  '\x{61f}\x{640}\x{64b}-\x{65e}\x{66a}-\x{66d}\x{670}\x{6d4}\x{6d6}-\x{6ed}'.
  26  '\x{6fd}\x{6fe}\x{700}-\x{70f}\x{711}\x{730}-\x{74a}\x{7a6}-\x{7b0}\x{901}-'.
  27  '\x{903}\x{93c}\x{93e}-\x{94d}\x{951}-\x{954}\x{962}-\x{965}\x{970}\x{981}-'.
  28  '\x{983}\x{9bc}\x{9be}-\x{9cd}\x{9d7}\x{9e2}\x{9e3}\x{9f2}-\x{a03}\x{a3c}-'.
  29  '\x{a4d}\x{a70}\x{a71}\x{a81}-\x{a83}\x{abc}\x{abe}-\x{acd}\x{ae2}\x{ae3}'.
  30  '\x{af1}-\x{b03}\x{b3c}\x{b3e}-\x{b57}\x{b70}\x{b82}\x{bbe}-\x{bd7}\x{bf0}-'.
  31  '\x{c03}\x{c3e}-\x{c56}\x{c82}\x{c83}\x{cbc}\x{cbe}-\x{cd6}\x{d02}\x{d03}'.
  32  '\x{d3e}-\x{d57}\x{d82}\x{d83}\x{dca}-\x{df4}\x{e31}\x{e34}-\x{e3f}\x{e46}-'.
  33  '\x{e4f}\x{e5a}\x{e5b}\x{eb1}\x{eb4}-\x{ebc}\x{ec6}-\x{ecd}\x{f01}-\x{f1f}'.
  34  '\x{f2a}-\x{f3f}\x{f71}-\x{f87}\x{f90}-\x{fd1}\x{102c}-\x{1039}\x{104a}-'.
  35  '\x{104f}\x{1056}-\x{1059}\x{10fb}\x{10fc}\x{135f}-\x{137c}\x{1390}-\x{1399}'.
  36  '\x{166d}\x{166e}\x{1680}\x{169b}\x{169c}\x{16eb}-\x{16f0}\x{1712}-\x{1714}'.
  37  '\x{1732}-\x{1736}\x{1752}\x{1753}\x{1772}\x{1773}\x{17b4}-\x{17db}\x{17dd}'.
  38  '\x{17f0}-\x{180e}\x{1843}\x{18a9}\x{1920}-\x{1945}\x{19b0}-\x{19c0}\x{19c8}'.
  39  '\x{19c9}\x{19de}-\x{19ff}\x{1a17}-\x{1a1f}\x{1d2c}-\x{1d61}\x{1d78}\x{1d9b}-'.
  40  '\x{1dc3}\x{1fbd}\x{1fbf}-\x{1fc1}\x{1fcd}-\x{1fcf}\x{1fdd}-\x{1fdf}\x{1fed}-'.
  41  '\x{1fef}\x{1ffd}-\x{2070}\x{2074}-\x{207e}\x{2080}-\x{2101}\x{2103}-\x{2106}'.
  42  '\x{2108}\x{2109}\x{2114}\x{2116}-\x{2118}\x{211e}-\x{2123}\x{2125}\x{2127}'.
  43  '\x{2129}\x{212e}\x{2132}\x{213a}\x{213b}\x{2140}-\x{2144}\x{214a}-\x{2b13}'.
  44  '\x{2ce5}-\x{2cff}\x{2d6f}\x{2e00}-\x{3005}\x{3007}-\x{303b}\x{303d}-\x{303f}'.
  45  '\x{3099}-\x{309e}\x{30a0}\x{30fb}-\x{30fe}\x{3190}-\x{319f}\x{31c0}-\x{31cf}'.
  46  '\x{3200}-\x{33ff}\x{4dc0}-\x{4dff}\x{a015}\x{a490}-\x{a716}\x{a802}\x{a806}'.
  47  '\x{a80b}\x{a823}-\x{a82b}\x{d800}-\x{f8ff}\x{fb1e}\x{fb29}\x{fd3e}\x{fd3f}'.
  48  '\x{fdfc}-\x{fe6b}\x{feff}-\x{ff0f}\x{ff1a}-\x{ff20}\x{ff3b}-\x{ff40}\x{ff5b}-'.
  49  '\x{ff65}\x{ff70}\x{ff9e}\x{ff9f}\x{ffe0}-\x{fffd}');
  50  
  51  /**
  52   * Matches all 'N' Unicode character classes (numbers)
  53   */
  54  define('PREG_CLASS_NUMBERS',
  55  '\x{30}-\x{39}\x{b2}\x{b3}\x{b9}\x{bc}-\x{be}\x{660}-\x{669}\x{6f0}-\x{6f9}'.
  56  '\x{966}-\x{96f}\x{9e6}-\x{9ef}\x{9f4}-\x{9f9}\x{a66}-\x{a6f}\x{ae6}-\x{aef}'.
  57  '\x{b66}-\x{b6f}\x{be7}-\x{bf2}\x{c66}-\x{c6f}\x{ce6}-\x{cef}\x{d66}-\x{d6f}'.
  58  '\x{e50}-\x{e59}\x{ed0}-\x{ed9}\x{f20}-\x{f33}\x{1040}-\x{1049}\x{1369}-'.
  59  '\x{137c}\x{16ee}-\x{16f0}\x{17e0}-\x{17e9}\x{17f0}-\x{17f9}\x{1810}-\x{1819}'.
  60  '\x{1946}-\x{194f}\x{2070}\x{2074}-\x{2079}\x{2080}-\x{2089}\x{2153}-\x{2183}'.
  61  '\x{2460}-\x{249b}\x{24ea}-\x{24ff}\x{2776}-\x{2793}\x{3007}\x{3021}-\x{3029}'.
  62  '\x{3038}-\x{303a}\x{3192}-\x{3195}\x{3220}-\x{3229}\x{3251}-\x{325f}\x{3280}-'.
  63  '\x{3289}\x{32b1}-\x{32bf}\x{ff10}-\x{ff19}');
  64  
  65  /**
  66   * Matches all 'P' Unicode character classes (punctuation)
  67   */
  68  define('PREG_CLASS_PUNCTUATION',
  69  '\x{21}-\x{23}\x{25}-\x{2a}\x{2c}-\x{2f}\x{3a}\x{3b}\x{3f}\x{40}\x{5b}-\x{5d}'.
  70  '\x{5f}\x{7b}\x{7d}\x{a1}\x{ab}\x{b7}\x{bb}\x{bf}\x{37e}\x{387}\x{55a}-\x{55f}'.
  71  '\x{589}\x{58a}\x{5be}\x{5c0}\x{5c3}\x{5f3}\x{5f4}\x{60c}\x{60d}\x{61b}\x{61f}'.
  72  '\x{66a}-\x{66d}\x{6d4}\x{700}-\x{70d}\x{964}\x{965}\x{970}\x{df4}\x{e4f}'.
  73  '\x{e5a}\x{e5b}\x{f04}-\x{f12}\x{f3a}-\x{f3d}\x{f85}\x{104a}-\x{104f}\x{10fb}'.
  74  '\x{1361}-\x{1368}\x{166d}\x{166e}\x{169b}\x{169c}\x{16eb}-\x{16ed}\x{1735}'.
  75  '\x{1736}\x{17d4}-\x{17d6}\x{17d8}-\x{17da}\x{1800}-\x{180a}\x{1944}\x{1945}'.
  76  '\x{2010}-\x{2027}\x{2030}-\x{2043}\x{2045}-\x{2051}\x{2053}\x{2054}\x{2057}'.
  77  '\x{207d}\x{207e}\x{208d}\x{208e}\x{2329}\x{232a}\x{23b4}-\x{23b6}\x{2768}-'.
  78  '\x{2775}\x{27e6}-\x{27eb}\x{2983}-\x{2998}\x{29d8}-\x{29db}\x{29fc}\x{29fd}'.
  79  '\x{3001}-\x{3003}\x{3008}-\x{3011}\x{3014}-\x{301f}\x{3030}\x{303d}\x{30a0}'.
  80  '\x{30fb}\x{fd3e}\x{fd3f}\x{fe30}-\x{fe52}\x{fe54}-\x{fe61}\x{fe63}\x{fe68}'.
  81  '\x{fe6a}\x{fe6b}\x{ff01}-\x{ff03}\x{ff05}-\x{ff0a}\x{ff0c}-\x{ff0f}\x{ff1a}'.
  82  '\x{ff1b}\x{ff1f}\x{ff20}\x{ff3b}-\x{ff3d}\x{ff3f}\x{ff5b}\x{ff5d}\x{ff5f}-'.
  83  '\x{ff65}');
  84  
  85  /**
  86   * Matches all CJK characters that are candidates for auto-splitting
  87   * (Chinese, Japanese, Korean).
  88   * Contains kana and BMP ideographs.
  89   */
  90  define('PREG_CLASS_CJK', '\x{3041}-\x{30ff}\x{31f0}-\x{31ff}\x{3400}-\x{4db5}'.
  91  '\x{4e00}-\x{9fbb}\x{f900}-\x{fad9}');
  92  
  93  /**
  94   * Implementation of hook_help().
  95   */
  96  function search_help($section) {
  97    switch ($section) {
  98      case 'admin/help#search':
  99        $output = '<p>'. t('The search module adds the ability to search for content by keywords. Search is often the only practical way to find content on a large site. Search is useful for finding users and posts by searching on keywords.') .'</p>';
 100        $output .= '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. It indexes the posts and users. You can adjust the settings to tweak the indexing behaviour. Note that the search requires cron to be set up correctly. The index percentage sets the maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.') .'</p>';
 101        $output .= '<p>'. t('For more information please read the configuration and customization handbook <a href="@search">Search page</a>.', array('@search' => 'http://drupal.org/handbook/modules/search/')) .'</p>';
 102        return $output;
 103      case 'admin/settings/search':
 104        return '<p>'. t('The search engine works by maintaining an index of the words in your site\'s content. You can adjust the settings below to tweak the indexing behaviour. Note that the search requires cron to be set up correctly.') .'</p>';
 105      case 'search#noresults':
 106        return t('<ul>
 107  <li>Check if your spelling is correct.</li>
 108  <li>Remove quotes around phrases to match each word individually: <em>"blue smurf"</em> will match less than <em>blue smurf</em>.</li>
 109  <li>Consider loosening your query with <em>OR</em>: <em>blue smurf</em> will match less than <em>blue OR smurf</em>.</li>
 110  </ul>');
 111    }
 112  }
 113  
 114  /**
 115   * Implementation of hook_perm().
 116   */
 117  function search_perm() {
 118    return array('search content', 'use advanced search', 'administer search');
 119  }
 120  
 121  /**
 122   * Implementation of hook_block().
 123   */
 124  function search_block($op = 'list', $delta = 0) {
 125    if ($op == 'list') {
 126      $blocks[0]['info'] = t('Search form');
 127      return $blocks;
 128    }
 129    else if ($op == 'view' && user_access('search content')) {
 130      $block['content'] = drupal_get_form('search_block_form');
 131      $block['subject'] = t('Search');
 132      return $block;
 133    }
 134  }
 135  
 136  /**
 137   * Implementation of hook_menu().
 138   */
 139  function search_menu($may_cache) {
 140    $items = array();
 141  
 142    if ($may_cache) {
 143      $items[] = array('path' => 'search',
 144        'title' => t('Search'),
 145        'callback' => 'search_view',
 146        'access' => user_access('search content'),
 147        'type' => MENU_SUGGESTED_ITEM);
 148      $items[] = array('path' => 'admin/settings/search',
 149        'title' => t('Search settings'),
 150        'description' => t('Configure relevance settings for search and other indexing options'),
 151        'callback' => 'drupal_get_form',
 152        'callback arguments' => array('search_admin_settings'),
 153        'access' => user_access('administer search'),
 154        'type' => MENU_NORMAL_ITEM);
 155      $items[] = array('path' => 'admin/settings/search/wipe',
 156        'title' => t('Clear index'),
 157        'callback' => 'drupal_get_form',
 158        'callback arguments' => array('search_wipe_confirm'),
 159        'access' => user_access('administer search'),
 160        'type' => MENU_CALLBACK);
 161      $items[] = array('path' => 'admin/logs/search', 'title' => t('Top search phrases'),
 162        'description' => t('View most popular search phrases.'),
 163        'callback' => 'watchdog_top',
 164        'callback arguments' => array('search'));
 165    }
 166    else if (arg(0) == 'search') {
 167      // To remember the user's search keywords when switching across tabs,
 168      // we dynamically add the keywords to the search tabs' paths.
 169      $keys = search_get_keys();
 170      $keys = strlen($keys) ? '/'. $keys : '';
 171      foreach (module_list() as $name) {
 172        if (module_hook($name, 'search') && $title = module_invoke($name, 'search', 'name')) {
 173          $items[] = array('path' => 'search/'. $name . $keys, 'title' => $title,
 174            'callback' => 'search_view',
 175            'access' => user_access('search content'),
 176            'type' => MENU_LOCAL_TASK);
 177        }
 178      }
 179    }
 180  
 181    return $items;
 182  }
 183  
 184  /**
 185   * Validate callback.
 186   */
 187  function search_admin_settings_validate($form_id, $form_values) {
 188    if ($form_values['op'] == t('Re-index site')) {
 189      drupal_goto('admin/settings/search/wipe');
 190    }
 191    // If these settings change, the index needs to be rebuilt.
 192    if ((variable_get('minimum_word_size', 3) != $form_values['minimum_word_size']) ||
 193        (variable_get('overlap_cjk', TRUE) != $form_values['overlap_cjk'])) {
 194      drupal_set_message(t('The index will be rebuilt.'));
 195      search_wipe();
 196    }
 197  }
 198  
 199  /**
 200   * Menu callback; displays the search module settings page.
 201   */
 202  function search_admin_settings() {
 203    // Collect some stats
 204    $remaining = 0;
 205    $total = 0;
 206    foreach (module_list() as $module) {
 207      if (module_hook($module, 'search')) {
 208        $status = module_invoke($module, 'search', 'status');
 209        $remaining += $status['remaining'];
 210        $total += $status['total'];
 211      }
 212    }
 213    $count = format_plural($remaining, 'There is 1 item left to index.', 'There are @count items left to index.');
 214    $percentage = ((int)min(100, 100 * ($total - $remaining) / max(1, $total))) .'%';
 215    $status = '<p><strong>'. t('%percentage of the site has been indexed.', array('%percentage' => $percentage)) .' '. $count .'</strong></p>';
 216    $form['status'] = array('#type' => 'fieldset', '#title' => t('Indexing status'));
 217    $form['status']['status'] = array('#value' => $status);
 218    $form['status']['wipe'] = array('#type' => 'submit', '#value' => t('Re-index site'));
 219  
 220    $items = drupal_map_assoc(array(10, 20, 50, 100, 200, 500));
 221  
 222    // Indexing throttle:
 223    $form['indexing_throttle'] = array('#type' => 'fieldset', '#title' => t('Indexing throttle'));
 224    $form['indexing_throttle']['search_cron_limit'] = array('#type' => 'select', '#title' => t('Items to index per cron run'), '#default_value' => variable_get('search_cron_limit', 100), '#options' => $items, '#description' => t('The maximum amount of items that will be indexed in one cron run. Set this number lower if your cron is timing out or if PHP is running out of memory.'));
 225    // Indexing settings:
 226    $form['indexing_settings'] = array('#type' => 'fieldset', '#title' => t('Indexing settings'));
 227    $form['indexing_settings']['info'] = array('#value' => '<em>'. t('<p>Changing the settings below will cause the site index to be rebuilt. The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed.</p><p>The default settings should be appropriate for the majority of sites.</p>') .'</em>');
 228    $form['indexing_settings']['minimum_word_size'] = array('#type' => 'textfield', '#title' => t('Minimum word length to index'), '#default_value' => variable_get('minimum_word_size', 3), '#size' => 5, '#maxlength' => 3, '#description' => t('The number of characters a word has to be to be indexed. A lower setting means better search result ranking, but also a larger database. Each search query must contain at least one keyword that is this size (or longer).'));
 229    $form['indexing_settings']['overlap_cjk'] = array('#type' => 'checkbox', '#title' => t('Simple CJK handling'), '#default_value' => variable_get('overlap_cjk', TRUE), '#description' => t('Whether to apply a simple Chinese/Japanese/Korean tokenizer based on overlapping sequences. Turn this off if you want to use an external preprocessor for this instead. Does not affect other languages.'));
 230  
 231    // Per module settings
 232    $form = array_merge($form, module_invoke_all('search', 'admin'));
 233  
 234    return system_settings_form($form);
 235  }
 236  
 237  /**
 238   * Menu callback: confirm wiping of the index.
 239   */
 240  function search_wipe_confirm() {
 241    return confirm_form(array(), t('Are you sure you want to re-index the site?'),
 242                    'admin/settings/search', t(' The search index is not cleared but systematically updated to reflect the new settings. Searching will continue to work but new content won\'t be indexed until all existing content has been re-indexed. This action cannot be undone.'), t('Re-index site'), t('Cancel'));
 243  }
 244  
 245  /**
 246   * Handler for wipe confirmation
 247   */
 248  function search_wipe_confirm_submit($form_id, &$form) {
 249    if ($form['confirm']) {
 250      search_wipe();
 251      drupal_set_message(t('The index will be rebuilt.'));
 252      return 'admin/settings/search';
 253    }
 254  }
 255  
 256  /**
 257   * Wipes a part of or the entire search index.
 258   *
 259   * @param $sid
 260   *  (optional) The SID of the item to wipe. If specified, $type must be passed
 261   *  too.
 262   * @param $type
 263   *  (optional) The type of item to wipe.
 264   */
 265  function search_wipe($sid = NULL, $type = NULL, $reindex = FALSE) {
 266    if ($type == NULL && $sid == NULL) {
 267      module_invoke_all('search', 'reset');
 268    }
 269    else {
 270      db_query("DELETE FROM {search_dataset} WHERE sid = %d AND type = '%s'", $sid, $type);
 271      db_query("DELETE FROM {search_index} WHERE fromsid = %d AND fromtype = '%s'", $sid, $type);
 272      // When re-indexing, keep link references
 273      db_query("DELETE FROM {search_index} WHERE sid = %d AND type = '%s'". ($reindex ? " AND fromsid = 0" : ''), $sid, $type);
 274    }
 275  }
 276  
 277  /**
 278   * Marks a word as dirty (or retrieves the list of dirty words). This is used
 279   * during indexing (cron). Words which are dirty have outdated total counts in
 280   * the search_total table, and need to be recounted.
 281   */
 282  function search_dirty($word = NULL) {
 283    static $dirty = array();
 284    if ($word !== NULL) {
 285      $dirty[$word] = TRUE;
 286    }
 287    else {
 288      return $dirty;
 289    }
 290  }
 291  
 292  /**
 293   * Implementation of hook_cron().
 294   *
 295   * Fires hook_update_index() in all modules and cleans up dirty words (see
 296   * search_dirty).
 297   */
 298  function search_cron() {
 299    // We register a shutdown function to ensure that search_total is always up
 300    // to date.
 301    register_shutdown_function('search_update_totals');
 302  
 303    // Update word index
 304    foreach (module_list() as $module) {
 305      module_invoke($module, 'update_index');
 306    }
 307  }
 308  
 309  /**
 310   * This function is called on shutdown to ensure that search_total is always
 311   * up to date (even if cron times out or otherwise fails).
 312   */
 313  function search_update_totals() {
 314    // Update word IDF (Inverse Document Frequency) counts for new/changed words
 315    foreach (search_dirty() as $word => $dummy) {
 316      // Get total count
 317      $total = db_result(db_query("SELECT SUM(score) FROM {search_index} WHERE word = '%s'", $word));
 318      // Apply Zipf's law to equalize the probability distribution
 319      $total = log10(1 + 1/(max(1, $total)));
 320      db_query("UPDATE {search_total} SET count = %f WHERE word = '%s'", $total, $word);
 321      if (!db_affected_rows()) {
 322        db_query("INSERT INTO {search_total} (word, count) VALUES ('%s', %f)", $word, $total);
 323      }
 324    }
 325    // Find words that were deleted from search_index, but are still in
 326    // search_total. We use a LEFT JOIN between the two tables and keep only the
 327    // rows which fail to join.
 328    $result = db_query("SELECT t.word AS realword, i.word FROM {search_total} t LEFT JOIN {search_index} i ON t.word = i.word WHERE i.word IS NULL");
 329    while ($word = db_fetch_object($result)) {
 330      db_query("DELETE FROM {search_total} WHERE word = '%s'", $word->realword);
 331    }
 332  }
 333  
 334  /**
 335   * Simplifies a string according to indexing rules.
 336   */
 337  function search_simplify($text) {
 338    // Decode entities to UTF-8
 339    $text = decode_entities($text);
 340  
 341    // Lowercase
 342    $text = drupal_strtolower($text);
 343  
 344    // Call an external processor for word handling.
 345    search_preprocess($text);
 346  
 347    // Simple CJK handling
 348    if (variable_get('overlap_cjk', TRUE)) {
 349      $text = preg_replace_callback('/['. PREG_CLASS_CJK .']+/u', 'search_expand_cjk', $text);
 350    }
 351  
 352    // To improve searching for numerical data such as dates, IP addresses
 353    // or version numbers, we consider a group of numerical characters
 354    // separated only by punctuation characters to be one piece.
 355    // This also means that searching for e.g. '20/03/1984' also returns
 356    // results with '20-03-1984' in them.
 357    // Readable regexp: ([number]+)[punctuation]+(?=[number])
 358    $text = preg_replace('/(['. PREG_CLASS_NUMBERS .']+)['. PREG_CLASS_PUNCTUATION .']+(?=['. PREG_CLASS_NUMBERS .'])/u', '\1', $text);
 359  
 360    // The dot, underscore and dash are simply removed. This allows meaningful
 361    // search behaviour with acronyms and URLs.
 362    $text = preg_replace('/[._-]+/', '', $text);
 363  
 364    // With the exception of the rules above, we consider all punctuation,
 365    // marks, spacers, etc, to be a word boundary.
 366    $text = preg_replace('/['. PREG_CLASS_SEARCH_EXCLUDE .']+/u', ' ', $text);
 367  
 368    return $text;
 369  }
 370  
 371  /**
 372   * Basic CJK tokenizer. Simply splits a string into consecutive, overlapping
 373   * sequences of characters ('minimum_word_size' long).
 374   */
 375  function search_expand_cjk($matches) {
 376    $min = variable_get('minimum_word_size', 3);
 377    $str = $matches[0];
 378    $l = drupal_strlen($str);
 379    // Passthrough short words
 380    if ($l <= $min) {
 381      return ' '. $str .' ';
 382    }
 383    $tokens = ' ';
 384    // FIFO queue of characters
 385    $chars = array();
 386    // Begin loop
 387    for ($i = 0; $i < $l; ++$i) {
 388      // Grab next character
 389      $current = drupal_substr($str, 0, 1);
 390      $str = substr($str, strlen($current));
 391      $chars[] = $current;
 392      if ($i >= $min - 1) {
 393        $tokens .= implode('', $chars) .' ';
 394        array_shift($chars);
 395      }
 396    }
 397    return $tokens;
 398  }
 399  
 400  /**
 401   * Splits a string into tokens for indexing.
 402   */
 403  function search_index_split($text) {
 404    static $last = NULL;
 405    static $lastsplit = NULL;
 406  
 407    if ($last == $text) {
 408      return $lastsplit;
 409    }
 410    // Process words
 411    $text = search_simplify($text);
 412    $words = explode(' ', $text);
 413    array_walk($words, '_search_index_truncate');
 414  
 415    // Save last keyword result
 416    $last = $text;
 417    $lastsplit = $words;
 418  
 419    return $words;
 420  }
 421  
 422  /**
 423   * Helper function for array_walk in search_index_split.
 424   */
 425  function _search_index_truncate(&$text) {
 426    $text = truncate_utf8($text, 50);
 427  }
 428  
 429  /**
 430   * Invokes hook_search_preprocess() in modules.
 431   */
 432  function search_preprocess(&$text) {
 433    foreach (module_implements('search_preprocess') as $module) {
 434      $text = module_invoke($module, 'search_preprocess', $text);
 435    }
 436  }
 437  
 438  /**
 439   * Update the full-text search index for a particular item.
 440   *
 441   * @param $sid
 442   *   A number identifying this particular item (e.g. node id).
 443   *
 444   * @param $type
 445   *   A string defining this type of item (e.g. 'node')
 446   *
 447   * @param $text
 448   *   The content of this item. Must be a piece of HTML text.
 449   *
 450   * @ingroup search
 451   */
 452  function search_index($sid, $type, $text) {
 453    $minimum_word_size = variable_get('minimum_word_size', 3);
 454  
 455    // Link matching
 456    global $base_url;
 457    $node_regexp = '@href=[\'"]?(?:'. preg_quote($base_url, '@') .'/|'. preg_quote(base_path(), '@') .')(?:\?q=)?/?((?![a-z]+:)[^\'">]+)[\'">]@i';
 458  
 459    // Multipliers for scores of words inside certain HTML tags.
 460    // Note: 'a' must be included for link ranking to work.
 461    $tags = array('h1' => 25,
 462                  'h2' => 18,
 463                  'h3' => 15,
 464                  'h4' => 12,
 465                  'h5' => 9,
 466                  'h6' => 6,
 467                  'u' => 3,
 468                  'b' => 3,
 469                  'i' => 3,
 470                  'strong' => 3,
 471                  'em' => 3,
 472                  'a' => 10);
 473  
 474    // Strip off all ignored tags to speed up processing, but insert space before/after
 475    // them to keep word boundaries.
 476    $text = str_replace(array('<', '>'), array(' <', '> '), $text);
 477    $text = strip_tags($text, '<'. implode('><', array_keys($tags)) .'>');
 478  
 479    // Split HTML tags from plain text.
 480    $split = preg_split('/\s*<([^>]+?)>\s*/', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 481    // Note: PHP ensures the array consists of alternating delimiters and literals
 482    // and begins and ends with a literal (inserting $null as required).
 483  
 484    $tag = FALSE; // Odd/even counter. Tag or no tag.
 485    $link = FALSE; // State variable for link analyser
 486    $score = 1; // Starting score per word
 487    $accum = ' '; // Accumulator for cleaned up data
 488    $tagstack = array(); // Stack with open tags
 489    $tagwords = 0; // Counter for consecutive words
 490    $focus = 1; // Focus state
 491  
 492    $results = array(0 => array()); // Accumulator for words for index
 493  
 494    foreach ($split as $value) {
 495      if ($tag) {
 496        // Increase or decrease score per word based on tag
 497        list($tagname) = explode(' ', $value, 2);
 498        $tagname = drupal_strtolower($tagname);
 499        // Closing or opening tag?
 500        if ($tagname[0] == '/') {
 501          $tagname = substr($tagname, 1);
 502          // If we encounter unexpected tags, reset score to avoid incorrect boosting.
 503          if (!count($tagstack) || $tagstack[0] != $tagname) {
 504            $tagstack = array();
 505            $score = 1;
 506          }
 507          else {
 508            // Remove from tag stack and decrement score
 509            $score = max(1, $score - $tags[array_shift($tagstack)]);
 510          }
 511          if ($tagname == 'a') {
 512            $link = FALSE;
 513          }
 514        }
 515        else {
 516          if ($tagstack[0] == $tagname) {
 517            // None of the tags we look for make sense when nested identically.
 518            // If they are, it's probably broken HTML.
 519            $tagstack = array();
 520            $score = 1;
 521          }
 522          else {
 523            // Add to open tag stack and increment score
 524            array_unshift($tagstack, $tagname);
 525            $score += $tags[$tagname];
 526          }
 527          if ($tagname == 'a') {
 528            // Check if link points to a node on this site
 529            if (preg_match($node_regexp, $value, $match)) {
 530              $path = drupal_get_normal_path($match[1]);
 531              if (preg_match('!(?:node|book)/(?:view/)?([0-9]+)!i', $path, $match)) {
 532                $linknid = $match[1];
 533                if ($linknid > 0) {
 534                  // Note: ignore links to uncachable nodes to avoid redirect bugs.
 535                  $node = db_fetch_object(db_query('SELECT n.title, n.nid, n.vid, r.format FROM {node} n INNER JOIN {node_revisions} r ON n.vid = r.vid WHERE n.nid = %d', $linknid));
 536                  if (filter_format_allowcache($node->format)) {
 537                    $link = TRUE;
 538                    $linktitle = $node->title;
 539                  }
 540                }
 541              }
 542            }
 543          }
 544        }
 545        // A tag change occurred, reset counter.
 546        $tagwords = 0;
 547      }
 548      else {
 549        // Note: use of PREG_SPLIT_DELIM_CAPTURE above will introduce empty values
 550        if ($value != '') {
 551          if ($link) {
 552            // Check to see if the node link text is its URL. If so, we use the target node title instead.
 553            if (preg_match('!^https?://!i', $value)) {
 554              $value = $linktitle;
 555            }
 556          }
 557          $words = search_index_split($value);
 558          foreach ($words as $word) {
 559            // Add word to accumulator
 560            $accum .= $word .' ';
 561            $num = is_numeric($word);
 562            // Check wordlength
 563            if ($num || drupal_strlen($word) >= $minimum_word_size) {
 564              // Normalize numbers
 565              if ($num) {
 566                $word = (int)ltrim($word, '-0');
 567              }
 568  
 569              if ($link) {
 570                if (!isset($results[$linknid])) {
 571                  $results[$linknid] = array();
 572                }
 573                $results[$linknid][$word] += $score * $focus;
 574              }
 575              else {
 576                $results[0][$word] += $score * $focus;
 577                // Focus is a decaying value in terms of the amount of unique words up to this point.
 578                // From 100 words and more, it decays, to e.g. 0.5 at 500 words and 0.3 at 1000 words.
 579                $focus = min(1, .01 + 3.5 / (2 + count($results[0]) * .015));
 580              }
 581            }
 582            $tagwords++;
 583            // Too many words inside a single tag probably mean a tag was accidentally left open.
 584            if (count($tagstack) && $tagwords >= 15) {
 585              $tagstack = array();
 586              $score = 1;
 587            }
 588          }
 589        }
 590      }
 591      $tag = !$tag;
 592    }
 593  
 594    search_wipe($sid, $type, TRUE);
 595  
 596    // Insert cleaned up data into dataset
 597    db_query("INSERT INTO {search_dataset} (sid, type, data) VALUES (%d, '%s', '%s')", $sid, $type, $accum);
 598  
 599    // Insert results into search index
 600    foreach ($results[0] as $word => $score) {
 601      db_query("INSERT INTO {search_index} (word, sid, type, score) VALUES ('%s', %d, '%s', %f)", $word, $sid, $type, $score);
 602      search_dirty($word);
 603    }
 604    unset($results[0]);
 605  
 606    // Now insert links to nodes
 607    foreach ($results as $nid => $words) {
 608      foreach ($words as $word => $score) {
 609        db_query("INSERT INTO {search_index} (word, sid, type, fromsid, fromtype, score) VALUES ('%s', %d, '%s', %d, '%s', %f)", $word, $nid, 'node', $sid, $type, $score);
 610        search_dirty($word);
 611      }
 612    }
 613  }
 614  
 615  /**
 616   * Extract a module-specific search option from a search query. e.g. 'type:book'
 617   */
 618  function search_query_extract($keys, $option) {
 619    if (preg_match('/(^| )'. $option .':([^ ]*)( |$)/i', $keys, $matches)) {
 620      return $matches[2];
 621    }
 622  }
 623  
 624  /**
 625   * Return a query with the given module-specific search option inserted in.
 626   * e.g. 'type:book'.
 627   */
 628  function search_query_insert($keys, $option, $value = '') {
 629    if (search_query_extract($keys, $option)) {
 630      $keys = trim(preg_replace('/(^| )'. $option .':[^ ]*/i', '', $keys));
 631    }
 632    if ($value != '') {
 633      $keys .= ' '. $option .':'. $value;
 634    }
 635    return $keys;
 636  }
 637  
 638  /**
 639   * Parse a search query into SQL conditions.
 640   *
 641   * We build a query that matches the dataset bodies.
 642   */
 643  function search_parse_query($text) {
 644    $keys = array('positive' => array(), 'negative' => array());
 645  
 646    // Tokenize query string
 647    preg_match_all('/ (-?)("[^"]+"|[^" ]+)/i', ' '. $text, $matches, PREG_SET_ORDER);
 648  
 649    if (count($matches) < 1) {
 650      return NULL;
 651    }
 652  
 653    // Classify tokens
 654    $or = FALSE;
 655    foreach ($matches as $match) {
 656      $phrase = FALSE;
 657      // Strip off phrase quotes
 658      if ($match[2]{0} == '"') {
 659        $match[2] = substr($match[2], 1, -1);
 660        $phrase = TRUE;
 661      }
 662      // Simplify keyword according to indexing rules and external preprocessors
 663      $words = search_simplify($match[2]);
 664      // Re-explode in case simplification added more words, except when matching a phrase
 665      $words = $phrase ? array($words) : preg_split('/ /', $words, -1, PREG_SPLIT_NO_EMPTY);
 666      // Negative matches
 667      if ($match[1] == '-') {
 668        $keys['negative'] = array_merge($keys['negative'], $words);
 669      }
 670      // OR operator: instead of a single keyword, we store an array of all
 671      // OR'd keywords.
 672      elseif ($match[2] == 'OR' && count($keys['positive'])) {
 673        $last = array_pop($keys['positive']);
 674        // Starting a new OR?
 675        if (!is_array($last)) {
 676          $last = array($last);
 677        }
 678        $keys['positive'][] = $last;
 679        $or = TRUE;
 680        continue;
 681      }
 682      // Plain keyword
 683      else {
 684        if ($or) {
 685          // Add to last element (which is an array)
 686          $keys['positive'][count($keys['positive']) - 1] = array_merge($keys['positive'][count($keys['positive']) - 1], $words);
 687        }
 688        else {
 689          $keys['positive'] = array_merge($keys['positive'], $words);
 690        }
 691      }
 692      $or = FALSE;
 693    }
 694  
 695    // Convert keywords into SQL statements.
 696    $query = array();
 697    $query2 = array();
 698    $arguments = array();
 699    $arguments2 = array();
 700    $matches = 0;
 701    // Positive matches
 702    foreach ($keys['positive'] as $key) {
 703      // Group of ORed terms
 704      if (is_array($key) && count($key)) {
 705        $queryor = array();
 706        $any = FALSE;
 707        foreach ($key as $or) {
 708          list($q, $count) = _search_parse_query($or, $arguments2);
 709          $any |= $count;
 710          if ($q) {
 711            $queryor[] = $q;
 712            $arguments[] = $or;
 713          }
 714        }
 715        if (count($queryor)) {
 716          $query[] = '('. implode(' OR ', $queryor) .')';
 717          // A group of OR keywords only needs to match once
 718          $matches += ($any > 0);
 719        }
 720      }
 721      // Single ANDed term
 722      else {
 723        list($q, $count) = _search_parse_query($key, $arguments2);
 724        if ($q) {
 725          $query[] = $q;
 726          $arguments[] = $key;
 727          // Each AND keyword needs to match at least once
 728          $matches += $count;
 729        }
 730      }
 731    }
 732    // Negative matches
 733    foreach ($keys['negative'] as $key) {
 734      list($q) = _search_parse_query($key, $arguments2, TRUE);
 735      if ($q) {
 736        $query[] = $q;
 737        $arguments[] = $key;
 738      }
 739    }
 740    $query = implode(' AND ', $query);
 741  
 742    // Build word-index conditions for the first pass
 743    $query2 = substr(str_repeat("i.word = '%s' OR ", count($arguments2)), 0, -4);
 744  
 745    return array($query, $arguments, $query2, $arguments2, $matches);
 746  }
 747  
 748  /**
 749   * Helper function for search_parse_query();
 750   */
 751  function _search_parse_query(&$word, &$scores, $not = FALSE) {
 752    $count = 0;
 753    // Determine the scorewords of this word/phrase
 754    if (!$not) {
 755      $split = explode(' ', $word);
 756      foreach ($split as $s) {
 757        $num = is_numeric($s);
 758        if ($num || drupal_strlen($s) >= variable_get('minimum_word_size', 3)) {
 759          $s = $num ? ((int)ltrim($s, '-0')) : $s;
 760          if (!isset($scores[$s])) {
 761            $scores[$s] = $s;
 762            $count++;
 763          }
 764        }
 765      }
 766    }
 767    // Return matching snippet and number of added words
 768    return array("d.data ". ($not ? 'NOT ' : '') ."LIKE '%% %s %%'", $count);
 769  }
 770  
 771  /**
 772   * Do a query on the full-text search index for a word or words.
 773   *
 774   * This function is normally only called by each module that support the
 775   * indexed search (and thus, implements hook_update_index()).
 776   *
 777   * Two queries are performed which can be extended by the caller.
 778   *
 779   * The first query selects a set of possible matches based on the search index
 780   * and any extra given restrictions. This is the classic "OR" search.
 781   *
 782   * SELECT i.type, i.sid, SUM(i.score*t.count) AS relevance
 783   * FROM {search_index} i
 784   * INNER JOIN {search_total} t ON i.word = t.word
 785   * $join1
 786   * WHERE $where1 AND (...)
 787   * GROUP BY i.type, i.sid
 788   *
 789   * The second query further refines this set by verifying advanced text
 790   * conditions (such as AND, negative or phrase matches), and orders the results
 791   * on a the column or expression 'score':
 792   *
 793   * SELECT i.type, i.sid, $select2
 794   * FROM temp_search_sids i
 795   * INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type
 796   * $join2
 797   * WHERE (...)
 798   * ORDER BY score DESC
 799   *
 800   * @param $keywords
 801   *   A search string as entered by the user.
 802   *
 803   * @param $type
 804   *   A string identifying the calling module.
 805   *
 806   * @param $join1
 807   *   (optional) Inserted into the JOIN part of the first SQL query.
 808   *   For example "INNER JOIN {node} n ON n.nid = i.sid".
 809   *
 810   * @param $where1
 811   *   (optional) Inserted into the WHERE part of the first SQL query.
 812   *   For example "(n.status > %d)".
 813   *
 814   * @param $arguments1
 815   *   (optional) Extra SQL arguments belonging to the first query.
 816   *
 817   * @param $select2
 818   *   (optional) Inserted into the SELECT pat of the second query. Must contain
 819   *   a column selected as 'score'.
 820   *   defaults to 'i.relevance AS score'
 821   *
 822   * @param $join2
 823   *   (optional) Inserted into the JOIN par of the second SQL query.
 824   *   For example "INNER JOIN {node_comment_statistics} n ON n.nid = i.sid"
 825   *
 826   * @param $arguments2
 827   *   (optional) Extra SQL arguments belonging to the second query parameter.
 828   *
 829   * @param $sort_parameters
 830   *   (optional) SQL arguments for sorting the final results.
 831   *              Default: 'ORDER BY score DESC'
 832   *
 833   * @return
 834   *   An array of SIDs for the search results.
 835   *
 836   * @ingroup search
 837   */
 838  function do_search($keywords, $type, $join1 = '', $where1 = '1', $arguments1 = array(), $select2 = 'i.relevance AS score', $join2 = '', $arguments2 = array(), $sort_parameters = 'ORDER BY score DESC') {
 839    $query = search_parse_query($keywords);
 840  
 841    if ($query[2] == '') {
 842      form_set_error('keys', t('You must include at least one positive keyword with @count characters or more.', array('@count' => variable_get('minimum_word_size', 3))));
 843    }
 844    if ($query === NULL || $query[0] == '' || $query[2] == '') {
 845      return array();
 846    }
 847  
 848    // First pass: select all possible matching sids, doing a simple index-based OR matching on the keywords.
 849    // 'matches' is used to reject those items that cannot possibly match the query.
 850    $conditions = $where1 .' AND ('. $query[2] .") AND i.type = '%s'";
 851    $arguments = array_merge($arguments1, $query[3], array($type, $query[4]));
 852    $result = db_query_temporary("SELECT i.type, i.sid, SUM(i.score * t.count) AS relevance, COUNT(*) AS matches FROM {search_index} i INNER JOIN {search_total} t ON i.word = t.word $join1 WHERE $conditions GROUP BY i.type, i.sid HAVING COUNT(*) >= %d", $arguments, 'temp_search_sids');
 853  
 854    // Calculate maximum relevance, to normalize it
 855    $normalize = db_result(db_query('SELECT MAX(relevance) FROM temp_search_sids'));
 856    if (!$normalize) {
 857      return array();
 858    }
 859    $select2 = str_replace('i.relevance', '('. (1.0 / $normalize) .' * i.relevance)', $select2);
 860  
 861    // Second pass: only keep items that match the complicated keywords conditions (phrase search, negative keywords, ...)
 862    $conditions = '('. $query[0] .')';
 863    $arguments = array_merge($arguments2, $query[1]);
 864    $result = db_query_temporary("SELECT i.type, i.sid, $select2 FROM temp_search_sids i INNER JOIN {search_dataset} d ON i.sid = d.sid AND i.type = d.type $join2 WHERE $conditions $sort_parameters", $arguments, 'temp_search_results');
 865    if (($count = db_result(db_query('SELECT COUNT(*) FROM temp_search_results'))) == 0) {
 866      return array();
 867    }
 868    $count_query = "SELECT $count";
 869  
 870    // Do actual search query
 871    $result = pager_query("SELECT * FROM temp_search_results", 10, 0, $count_query);
 872    $results = array();
 873    while ($item = db_fetch_object($result)) {
 874      $results[] = $item;
 875    }
 876    return $results;
 877  }
 878  
 879  /**
 880   * Helper function for grabbing search keys.
 881   */
 882  function search_get_keys() {
 883    // Extract keys as remainder of path
 884    // Note: support old GET format of searches for existing links.
 885    $path = explode('/', $_GET['q'], 3);
 886    return count($path) == 3 ? $path[2] : $_REQUEST['keys'];
 887  }
 888  
 889  /**
 890   * Menu callback; presents the search form and/or search results.
 891   */
 892  function search_view() {
 893    $type = arg(1);
 894  
 895    // Search form submits with POST but redirects to GET. This way we can keep
 896    // the search query URL clean as a whistle:
 897    // search/type/keyword+keyword
 898    if (!isset($_POST['form_id'])) {
 899      if ($type == '') {
 900        // Note: search/node can not be a default tab because it would take on the
 901        // path of its parent (search). It would prevent remembering keywords when
 902        // switching tabs. This is why we drupal_goto to it from the parent instead.
 903        drupal_goto('search/node');
 904      }
 905  
 906      $keys = search_get_keys();
 907      // Only perform search if there is non-whitespace search term:
 908      if (trim($keys)) {
 909        // Log the search keys:
 910        watchdog('search', t('%keys (@type).', array('%keys' => $keys, '@type' => module_invoke($type, 'search', 'name'))), WATCHDOG_NOTICE, l(t('results'), 'search/'. $type .'/'. $keys));
 911  
 912        // Collect the search results:
 913        $results = search_data($keys, $type);
 914  
 915        if ($results) {
 916          $results = theme('box', t('Search results'), $results);
 917        }
 918        else {
 919          $results = theme('box', t('Your search yielded no results'), search_help('search#noresults'));
 920        }
 921      }
 922  
 923      // Construct the search form.
 924      $output = drupal_get_form('search_form', NULL, $keys, $type);
 925      $output .= $results;
 926  
 927      return $output;
 928    }
 929  
 930    return drupal_get_form('search_form', NULL, $keys, $type);
 931  }
 932  
 933  /**
 934   * @defgroup search Search interface
 935   * @{
 936   * The Drupal search interface manages a global search mechanism.
 937   *
 938   * Modules may plug into this system to provide searches of different types of
 939   * data. Most of the system is handled by search.module, so this must be enabled
 940   * for all of the search features to work.
 941   *
 942   * There are three ways to interact with the search system:
 943   * - Specifically for searching nodes, you can implement nodeapi('update index')
 944   *   and nodeapi('search result'). However, note that the search system already
 945   *   indexes all visible output of a node, i.e. everything displayed normally
 946   *   by hook_view() and hook_nodeapi('view'). This is usually sufficient.
 947   *   You should only use this mechanism if you want additional, non-visible data
 948   *   to be indexed.
 949   * - Implement hook_search(). This will create a search tab for your module on
 950   *   the /search page with a simple keyword search form. You may optionally
 951   *   implement hook_search_item() to customize the display of your results.
 952   * - Implement hook_update_index(). This allows your module to use Drupal's
 953   *   HTML indexing mechanism for searching full text efficiently.
 954   *
 955   * If your module needs to provide a more complicated search form, then you need
 956   * to implement it yourself without hook_search(). In that case, you should
 957   * define it as a local task (tab) under the /search page (e.g. /search/mymodule)
 958   * so that users can easily find it.
 959   */
 960  
 961  /**
 962   * Render a search form.
 963   *
 964   * @param $action
 965   *   Form action. Defaults to "search".
 966   * @param $keys
 967   *   The search string entered by the user, containing keywords for the search.
 968   * @param $type
 969   *   The type of search to render the node for. Must be the name of module
 970   *   which implements hook_search(). Defaults to 'node'.
 971   * @param $prompt
 972   *   A piece of text to put before the form (e.g. "Enter your keywords")
 973   * @return
 974   *   An HTML string containing the search form.
 975   */
 976  function search_form($action = '', $keys = '', $type = NULL, $prompt = NULL) {
 977  
 978    // Add CSS
 979    drupal_add_css(drupal_get_path('module', 'search') .'/search.css', 'module', 'all', FALSE);
 980  
 981    if (!$action) {
 982      $action = url('search/'. $type);
 983    }
 984    if (is_null($prompt)) {
 985      $prompt = t('Enter your keywords');
 986    }
 987  
 988    $form = array(
 989      '#action' => $action,
 990      '#attributes' => array('class' => 'search-form'),
 991    );
 992    $form['module'] = array('#type' => 'value', '#value' => $type);
 993    $form['basic'] = array('#type' => 'item', '#title' => $prompt);
 994    $form['basic']['inline'] = array('#prefix' => '<div class="container-inline">', '#suffix' => '</div>');
 995    $form['basic']['inline']['keys'] = array(
 996      '#type' => 'textfield',
 997      '#title' => '',
 998      '#default_value' => $keys,
 999      '#size' => $prompt ? 40 : 20,
1000      '#maxlength' => 255,
1001    );
1002    // processed_keys is used to coordinate keyword passing between other forms
1003    // that hook into the basic search form.
1004    $form['basic']['inline']['processed_keys'] = array('#type' => 'value', '#value' => array());
1005    $form['basic']['inline']['submit'] = array('#type' => 'submit', '#value' => t('Search'));
1006  
1007    return $form;
1008  }
1009  
1010  /**
1011   * As the search form collates keys from other modules hooked in via
1012   * hook_form_alter, the validation takes place in _submit.
1013   * search_form_validate() is used solely to set the 'processed_keys' form
1014   * value for the basic search form.
1015   */
1016  function search_form_validate($form_id, $form_values, $form) {
1017    form_set_value($form['basic']['inline']['processed_keys'], trim($form_values['keys']));
1018  }
1019  
1020  /**
1021   * Process a search form submission.
1022   */
1023  function search_form_submit($form_id, $form_values) {
1024    $keys = $form_values['processed_keys'];
1025    if ($keys == '') {
1026      form_set_error('keys', t('Please enter some keywords.'));
1027      // Fall through to the drupal_goto() call.
1028    }
1029  
1030    $type = $form_values['module'] ? $form_values['module'] : 'node';
1031    return 'search/'. $type .'/'. $keys;
1032  }
1033  
1034  /**
1035   * Output a search form for the search block and the theme's search box.
1036   */
1037  function search_box($form_id) {
1038    // Use search_keys instead of keys to avoid ID conflicts with the search block.
1039    $form[$form_id .'_keys'] = array(
1040      '#type' => 'textfield',
1041      '#size' => 15,
1042      '#default_value' => '',
1043      '#attributes' => array('title' => t('Enter the terms you wish to search for.')),
1044    );
1045    $form['submit'] = array('#type' => 'submit', '#value' => t('Search'));
1046    // Always go to the search page since the search form is not guaranteed to be
1047    // on every page.
1048    $form['#action'] = url('search/node');
1049    $form['#base'] = 'search_box_form';
1050  
1051    return $form;
1052  }
1053  
1054  /**
1055   * Process a block search form submission.
1056   */
1057  function search_box_form_submit($form_id, $form_values) {
1058    return 'search/node/'. trim($form_values[$form_id .'_keys']);
1059  }
1060  
1061  /**
1062   * Theme the theme search form.
1063   */
1064  function theme_search_theme_form($form) {
1065    return '<div id="search" class="container-inline">'. drupal_render($form) .'</div>';
1066  }
1067  
1068  /**
1069   * Theme the block search form.
1070   */
1071  function theme_search_block_form($form) {
1072    return '<div class="container-inline">'. drupal_render($form) .'</div>';
1073  }
1074  
1075  /**
1076   * Perform a standard search on the given keys, and return the formatted results.
1077   */
1078  function search_data($keys = NULL, $type = 'node') {
1079  
1080    if (isset($keys)) {
1081      if (module_hook($type, 'search')) {
1082        $results = module_invoke($type, 'search', 'search', $keys);
1083        if (isset($results) && is_array($results) && count($results)) {
1084          if (module_hook($type, 'search_page')) {
1085            return module_invoke($type, 'search_page', $results);
1086          }
1087          else {
1088            return theme('search_page', $results, $type);
1089          }
1090        }
1091      }
1092    }
1093  }
1094  
1095  /**
1096   * Returns snippets from a piece of text, with certain keywords highlighted.
1097   * Used for formatting search results.
1098   *
1099   * @param $keys
1100   *   A string containing a search query.
1101   *
1102   * @param $text
1103   *   The text to extract fragments from.
1104   *
1105   * @return
1106   *   A string containing HTML for the excerpt.
1107   */
1108  function search_excerpt($keys, $text) {
1109    // We highlight around non-indexable or CJK characters.
1110    $boundary = '(?:(?<=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .'])|(?=['. PREG_CLASS_SEARCH_EXCLUDE . PREG_CLASS_CJK .']))';
1111  
1112    // Extract positive keywords and phrases
1113    preg_match_all('/ ("([^"]+)"|(?!OR)([^" ]+))/', ' '. $keys, $matches);
1114    $keys = array_merge($matches[2], $matches[3]);
1115  
1116    // Prepare text
1117    $text = ' '. strip_tags(str_replace(array('<', '>'), array(' <', '> '), $text)) .' ';
1118    array_walk($keys, '_search_excerpt_replace');
1119    $workkeys = $keys;
1120  
1121    // Extract a fragment per keyword for at most 4 keywords.
1122    // First we collect ranges of text around each keyword, starting/ending
1123    // at spaces.
1124    // If the sum of all fragments is too short, we look for second occurrences.
1125    $ranges = array();
1126    $included = array();
1127    $length = 0;
1128    while ($length < 256 && count($workkeys)) {
1129      foreach ($workkeys as $k => $key) {
1130        if (strlen($key) == 0) {
1131          unset($workkeys[$k]);
1132          unset($keys[$k]);
1133          continue;
1134        }
1135        if ($length >= 256) {
1136          break;
1137        }
1138        // Remember occurrence of key so we can skip over it if more occurrences
1139        // are desired.
1140        if (!isset($included[$key])) {
1141          $included[$key] = 0;
1142        }
1143        // Locate a keyword (position $p), then locate a space in front (position
1144        // $q) and behind it (position $s)
1145        if (preg_match('/'. $boundary . $key . $boundary .'/iu', $text, $match, PREG_OFFSET_CAPTURE, $included[$key])) {
1146          $p = $match[0][1];
1147          if (($q = strpos($text, ' ', max(0, $p - 60))) !== FALSE) {
1148            $end = substr($text, $p, 80);
1149            if (($s = strrpos($end, ' ')) !== FALSE) {
1150              $ranges[$q] = $p + $s;
1151              $length += $p + $s - $q;
1152              $included[$key] = $p + 1;
1153            }
1154            else {
1155              unset($workkeys[$k]);
1156            }
1157          }
1158          else {
1159            unset($workkeys[$k]);
1160          }
1161        }
1162        else {
1163          unset($workkeys[$k]);
1164        }
1165      }
1166    }
1167  
1168    // If we didn't find anything, return the beginning.
1169    if (count($ranges) == 0) {
1170      return truncate_utf8($text, 256) .' ...';
1171    }
1172  
1173    // Sort the text ranges by starting position.
1174    ksort($ranges);
1175  
1176    // Now we collapse overlapping text ranges into one. The sorting makes it O(n).
1177    $newranges = array();
1178    foreach ($ranges as $from2 => $to2) {
1179      if (!isset($from1)) {
1180        $from1 = $from2;
1181        $to1 = $to2;
1182        continue;
1183      }
1184      if ($from2 <= $to1) {
1185        $to1 = max($to1, $to2);
1186      }
1187      else {
1188        $newranges[$from1] = $to1;
1189        $from1 = $from2;
1190        $to1 = $to2;
1191      }
1192    }
1193    $newranges[$from1] = $to1;
1194  
1195    // Fetch text
1196    $out = array();
1197    foreach ($newranges as $from => $to) {
1198      $out[] = substr($text, $from, $to - $from);
1199    }
1200    $text = (isset($newranges[0]) ? '' : '... '). implode(' ... ', $out) .' ...';
1201  
1202    // Highlight keywords. Must be done at once to prevent conflicts ('strong' and '<strong>').
1203    $text = preg_replace('/'. $boundary .'('. implode('|', $keys) .')'. $boundary .'/iu', '<strong>\0</strong>', $text);
1204    return $text;
1205  }
1206  
1207  /**
1208   * @} End of "defgroup search".
1209   */
1210  
1211  /**
1212   * Helper function for array_walk in search_except.
1213   */
1214  function _search_excerpt_replace(&$text) {
1215    $text = preg_quote($text, '/');
1216  }
1217  
1218  /**
1219   * Format a single result entry of a search query. This function is normally
1220   * called by theme_search_page() or hook_search_page().
1221   *
1222   * @param $item
1223   *   A single search result as returned by hook_search(). The result should be
1224   *   an array with keys "link", "title", "type", "user", "date", and "snippet".
1225   *   Optionally, "extra" can be an array of extra info to show along with the
1226   *   result.
1227   * @param $type
1228   *   The type of item found, such as "user" or "node".
1229   *
1230   * @ingroup themeable
1231   */
1232  function theme_search_item($item, $type) {
1233    $output = ' <dt class="title"><a href="'. check_url($item['link']) .'">'. check_plain($item['title']) .'</a></dt>';
1234    $info = array();
1235    if ($item['type']) {
1236      $info[] = check_plain($item['type']);
1237    }
1238    if ($item['user']) {
1239      $info[] = $item['user'];
1240    }
1241    if ($item['date']) {
1242      $info[] = format_date($item['date'], 'small');
1243    }
1244    if (is_array($item['extra'])) {
1245      $info = array_merge($info, $item['extra']);
1246    }
1247    $output .= ' <dd>'. ($item['snippet'] ? '<p>'. $item['snippet'] .'</p>' : '') .'<p class="search-info">'. implode(' - ', $info) .'</p></dd>';
1248    return $output;
1249  }
1250  
1251  /**
1252   * Format the result page of a search query.
1253   *
1254   * Modules may implement hook_search_page() in order to override this default
1255   * function to display search results. In that case it is expected they provide
1256   * their own themeable functions.
1257   *
1258   * @param $results
1259   *   All search result as returned by hook_search().
1260   * @param $type
1261   *   The type of item found, such as "user" or "node".
1262   *
1263   * @ingroup themeable
1264   */
1265  function theme_search_page($results, $type) {
1266    $output = '<dl class="search-results">';
1267  
1268    foreach ($results as $entry) {
1269      $output .= theme('search_item', $entry, $type);
1270    }
1271    $output .= '</dl>';
1272    $output .= theme('pager', NULL, 10, 0);
1273  
1274    return $output;
1275  }
1276  
1277  function search_forms() {
1278    $forms['search_theme_form']= array(
1279      'callback' => 'search_box',
1280      'callback arguments' => array('search_theme_form'),
1281    );
1282    $forms['search_block_form']= array(
1283      'callback' => 'search_box',
1284      'callback arguments' => array('search_block_form'),
1285    );
1286    return $forms;
1287  }


Généré le : Fri Nov 30 16:20:15 2007 par Balluche grâce à PHPXref 0.7
  Clicky Web Analytics