DokuWiki 2006-11-06 : /inc/search.php source

[Sommaire] [Imprimer]
   1  <?php
   2  /**
   3   * DokuWiki search functions
   4   *
   5   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6   * @author     Andreas Gohr <andi@splitbrain.org>
   7   */
   8  
   9    if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../').'/');
  10    require_once (DOKU_INC.'inc/common.php');
  11  
  12  /**
  13   * recurse direcory
  14   *
  15   * This function recurses into a given base directory
  16   * and calls the supplied function for each file and directory
  17   *
  18   * @param   array ref $data The results of the search are stored here
  19   * @param   string    $base Where to start the search
  20   * @param   callback  $func Callback (function name or arayy with object,method)
  21   * @param   string    $dir  Current directory beyond $base
  22   * @param   int       $lvl  Recursion Level
  23   * @author  Andreas Gohr <andi@splitbrain.org>
  24   */
  25  function search(&$data,$base,$func,$opts,$dir='',$lvl=1){
  26    $dirs   = array();
  27    $files  = array();
  28  
  29    //read in directories and files
  30    $dh = @opendir($base.'/'.$dir);
  31    if(!$dh) return;
  32    while(($file = readdir($dh)) !== false){
  33      if(preg_match('/^[\._]/',$file)) continue; //skip hidden files and upper dirs
  34      if(is_dir($base.'/'.$dir.'/'.$file)){
  35        $dirs[] = $dir.'/'.$file;
  36        continue;
  37      }elseif(substr($file,-5) == '.lock'){
  38        //skip lockfiles
  39        continue;
  40      }
  41      $files[] = $dir.'/'.$file;
  42    }
  43    closedir($dh);
  44    sort($files);
  45    sort($dirs);
  46  
  47    //give directories to userfunction then recurse
  48    foreach($dirs as $dir){
  49      if (search_callback($func,$data,$base,$dir,'d',$lvl,$opts)){
  50        search($data,$base,$func,$opts,$dir,$lvl+1);
  51      }
  52    }
  53    //now handle the files
  54    foreach($files as $file){
  55      search_callback($func,$data,$base,$file,'f',$lvl,$opts);
  56    }
  57  }
  58  
  59  /**
  60   * Used to run the a user callback
  61   *
  62   * Makes sure the $data array is passed by reference (unlike when using
  63   * call_user_func())
  64   *
  65   * @todo If this can be generalized it may be useful elsewhere in the code
  66   * @author Andreas Gohr <andi@splitbrain.org>
  67   */
  68  function search_callback($func,&$data,$base,$file,$type,$lvl,$opts){
  69    if(is_array($func)){
  70      if(is_object($func[0])){
  71        // instanciated object
  72        return $func[0]->$func[1]($data,$base,$file,$type,$lvl,$opts);
  73      }else{
  74        // static call
  75        $f = $func[0].'::'.$func[1];
  76        return $f($data,$base,$file,$type,$lvl,$opts);
  77      }
  78    }
  79    // simple function call
  80    return $func($data,$base,$file,$type,$lvl,$opts);
  81  }
  82  
  83  /**
  84   * The following functions are userfunctions to use with the search
  85   * function above. This function is called for every found file or
  86   * directory. When a directory is given to the function it has to
  87   * decide if this directory should be traversed (true) or not (false)
  88   * The function has to accept the following parameters:
  89   *
  90   * &$data - Reference to the result data structure
  91   * $base  - Base usually $conf['datadir']
  92   * $file  - current file or directory relative to $base
  93   * $type  - Type either 'd' for directory or 'f' for file
  94   * $lvl   - Current recursion depht
  95   * $opts  - option array as given to search()
  96   *
  97   * return values for files are ignored
  98   *
  99   * All functions should check the ACL for document READ rights
 100   * namespaces (directories) are NOT checked as this would break
 101   * the recursion (You can have an nonreadable dir over a readable
 102   * one deeper nested)
 103   */
 104  
 105  /**
 106   * Searches for pages beginning with the given query
 107   *
 108   * @author Andreas Gohr <andi@splitbrain.org>
 109   */
 110  function search_qsearch(&$data,$base,$file,$type,$lvl,$opts){
 111    $item = array();
 112  
 113    if($type == 'd'){
 114      return false; //no handling yet
 115    }
 116  
 117    //get id
 118    $id = pathID($file);
 119  
 120    //check if it matches the query
 121    if(!preg_match('/^'.preg_quote($opts['query'],'/').'/u',$id)){
 122      return false;
 123    }
 124  
 125    //check ACL
 126    if(auth_quickaclcheck($id) < AUTH_READ){
 127      return false;
 128    }
 129  
 130    $data[]=array( 'id'    => $id,
 131                   'type'  => $type,
 132                   'level' => 1,
 133                   'open'  => true);
 134    return true;
 135  }
 136  
 137  /**
 138   * Build the browsable index of pages
 139   *
 140   * $opts['ns'] is the current namespace
 141   *
 142   * @author  Andreas Gohr <andi@splitbrain.org>
 143   */
 144  function search_index(&$data,$base,$file,$type,$lvl,$opts){
 145    $return = true;
 146  
 147    $item = array();
 148  
 149    if($type == 'd' && !preg_match('#^'.$file.'(/|$)#','/'.$opts['ns'])){
 150      //add but don't recurse
 151      $return = false;
 152    }elseif($type == 'f' && ($opts['nofiles'] || !preg_match('#\.txt$#',$file))){
 153      //don't add
 154      return false;
 155    }
 156  
 157    $id = pathID($file);
 158  
 159    //check hidden
 160    if(isHiddenPage($id)){
 161      return false;
 162    }
 163  
 164    //check ACL
 165    if($type=='f' && auth_quickaclcheck($id) < AUTH_READ){
 166      return false;
 167    }
 168  
 169    $data[]=array( 'id'    => $id,
 170                   'type'  => $type,
 171                   'level' => $lvl,
 172                   'open'  => $return );
 173    return $return;
 174  }
 175  
 176  /**
 177   * List all namespaces
 178   *
 179   * @author  Andreas Gohr <andi@splitbrain.org>
 180   */
 181  function search_namespaces(&$data,$base,$file,$type,$lvl,$opts){
 182    if($type == 'f') return true; //nothing to do on files
 183  
 184    $id = pathID($file);
 185    $data[]=array( 'id'    => $id,
 186                   'type'  => $type,
 187                   'level' => $lvl );
 188    return true;
 189  }
 190  
 191  /**
 192   * List all mediafiles in a namespace
 193   *
 194   * @author  Andreas Gohr <andi@splitbrain.org>
 195   */
 196  function search_media(&$data,$base,$file,$type,$lvl,$opts){
 197    //we do nothing with directories
 198    if($type == 'd') return false;
 199  
 200    $info         = array();
 201    $info['id']   = pathID($file,true);
 202  
 203    //check ACL for namespace (we have no ACL for mediafiles)
 204    if(auth_quickaclcheck(getNS($info['id']).':*') < AUTH_READ){
 205      return false;
 206    }
 207  
 208    $info['file'] = basename($file);
 209    $info['size'] = filesize($base.'/'.$file);
 210    $info['writable'] = is_writable($base.'/'.$file);
 211    if(preg_match("/\.(jpe?g|gif|png)$/",$file)){
 212      $info['isimg'] = true;
 213      require_once (DOKU_INC.'inc/JpegMeta.php');
 214      $info['meta']  = new JpegMeta($base.'/'.$file);
 215    }else{
 216      $info['isimg'] = false;
 217    }
 218    $data[] = $info;
 219  
 220    return false;
 221  }
 222  
 223  /**
 224   * This function just lists documents (for RSS namespace export)
 225   *
 226   * @author  Andreas Gohr <andi@splitbrain.org>
 227   */
 228  function search_list(&$data,$base,$file,$type,$lvl,$opts){
 229    //we do nothing with directories
 230    if($type == 'd') return false;
 231    if(preg_match('#\.txt$#',$file)){
 232      //check ACL
 233      $id = pathID($file);
 234      if(auth_quickaclcheck($id) < AUTH_READ){
 235        return false;
 236      }
 237      $data[]['id'] = $id;;
 238    }
 239    return false;
 240  }
 241  
 242  /**
 243   * Quicksearch for searching matching pagenames
 244   *
 245   * $opts['query'] is the search query
 246   *
 247   * @author  Andreas Gohr <andi@splitbrain.org>
 248   */
 249  function search_pagename(&$data,$base,$file,$type,$lvl,$opts){
 250    //we do nothing with directories
 251    if($type == 'd') return true;
 252    //only search txt files
 253    if(!preg_match('#\.txt$#',$file)) return true;
 254  
 255    //simple stringmatching
 256    if (!empty($opts['query'])){
 257      if(strpos($file,$opts['query']) !== false){
 258        //check ACL
 259        $id = pathID($file);
 260        if(auth_quickaclcheck($id) < AUTH_READ){
 261          return false;
 262        }
 263        $data[]['id'] = $id;
 264      }
 265    }
 266    return true;
 267  }
 268  
 269  /**
 270   * Just lists all documents
 271   *
 272   * @author  Andreas Gohr <andi@splitbrain.org>
 273   */
 274  function search_allpages(&$data,$base,$file,$type,$lvl,$opts){
 275    //we do nothing with directories
 276    if($type == 'd') return true;
 277    //only search txt files
 278    if(!preg_match('#\.txt$#',$file)) return true;
 279  
 280    $data[]['id'] = pathID($file);
 281    return true;
 282  }
 283  
 284  /**
 285   * Search for backlinks to a given page
 286   *
 287   * $opts['ns']    namespace of the page
 288   * $opts['name']  name of the page without namespace
 289   *
 290   * @author  Andreas Gohr <andi@splitbrain.org>
 291   * @deprecated Replaced by ft_backlinks()
 292   */
 293  function search_backlinks(&$data,$base,$file,$type,$lvl,$opts){
 294    //we do nothing with directories
 295    if($type == 'd') return true;;
 296    //only search txt files
 297    if(!preg_match('#\.txt$#',$file)) return true;;
 298  
 299    //absolute search id
 300    $sid = cleanID($opts['ns'].':'.$opts['name']);
 301  
 302    //current id and namespace
 303    $cid = pathID($file);
 304    $cns = getNS($cid);
 305  
 306    //check ACL
 307    if(auth_quickaclcheck($cid) < AUTH_READ){
 308      return false;
 309    }
 310  
 311    //fetch instructions
 312    require_once (DOKU_INC.'inc/parserutils.php');
 313    $instructions = p_cached_instructions($base.$file,true);
 314    if(is_null($instructions)) return false;
 315  
 316    //check all links for match
 317    foreach($instructions as $ins){
 318      if($ins[0] == 'internallink' || ($conf['camelcase'] && $ins[0] == 'camelcaselink') ){
 319        $mid = $ins[1][0];
 320        resolve_pageid($cns,$mid,$exists); //exists is not used
 321        if($mid == $sid){
 322          //we have a match - finish
 323          $data[]['id'] = $cid;
 324          break;
 325        }
 326      }
 327    }
 328  
 329    return false;
 330  }
 331  
 332  /**
 333   * Fulltextsearch
 334   *
 335   * $opts['query'] is the search query
 336   *
 337   * @author  Andreas Gohr <andi@splitbrain.org>
 338   * @deprecated - fulltext indexer is used instead
 339   */
 340  function search_fulltext(&$data,$base,$file,$type,$lvl,$opts){
 341    //we do nothing with directories
 342    if($type == 'd') return true;;
 343    //only search txt files
 344    if(!preg_match('#\.txt$#',$file)) return true;;
 345  
 346    //check ACL
 347    $id = pathID($file);
 348    if(auth_quickaclcheck($id) < AUTH_READ){
 349      return false;
 350    }
 351  
 352    //create regexp from queries
 353    $poswords = array();
 354    $negwords = array();
 355    $qpreg = preg_split('/\s+/',$opts['query']);
 356  
 357    foreach($qpreg as $word){
 358      switch(substr($word,0,1)){
 359        case '-':
 360          if(strlen($word) > 1){  // catch single '-'
 361            array_push($negwords,preg_quote(substr($word,1),'#'));
 362          }
 363          break;
 364        case '+':
 365          if(strlen($word) > 1){  // catch single '+'
 366            array_push($poswords,preg_quote(substr($word,1),'#'));
 367          }
 368          break;
 369        default:
 370          array_push($poswords,preg_quote($word,'#'));
 371          break;
 372      }
 373    }
 374  
 375    // a search without any posword is useless
 376    if (!count($poswords)) return true;
 377  
 378    $reg  = '^(?=.*?'.join(')(?=.*?',$poswords).')';
 379    $reg .= count($negwords) ? '((?!'.join('|',$negwords).').)*$' : '.*$';
 380    search_regex($data,$base,$file,$reg,$poswords);
 381    return true;
 382  }
 383  
 384  /**
 385   * Reference search
 386   * This fuction searches for existing references to a given media file
 387   * and returns an array with the found pages. It doesn't pay any
 388   * attention to ACL permissions to find every reference. The caller
 389   * must check if the user has the appropriate rights to see the found
 390   * page and eventually have to prevent the result from displaying.
 391   *
 392   * @param array  $data Reference to the result data structure
 393   * @param string $base Base usually $conf['datadir']
 394   * @param string $file current file or directory relative to $base
 395   * @param char   $type Type either 'd' for directory or 'f' for file
 396   * @param int    $lvl  Current recursion depht
 397   * @param mixed  $opts option array as given to search()
 398   *
 399   * $opts['query'] is the demanded media file name
 400   *
 401   * @author  Andreas Gohr <andi@splitbrain.org>
 402   * @author  Matthias Grimm <matthiasgrimm@users.sourceforge.net>
 403   */
 404  function search_reference(&$data,$base,$file,$type,$lvl,$opts){
 405    global $conf;
 406  
 407    //we do nothing with directories
 408    if($type == 'd') return true;
 409  
 410    //only search txt files
 411    if(!preg_match('#\.txt$#',$file)) return true;
 412  
 413    //we finish after 'cnt' references found. The return value
 414    //'false' will skip subdirectories to speed search up.
 415    $cnt = $conf['refshow'] > 0 ? $conf['refshow'] : 1;
 416    if(count($data) >= $cnt) return false;
 417  
 418    $reg = '\{\{ *\:?'.$opts['query'].' *(\|.*)?\}\}';
 419    search_regex($data,$base,$file,$reg,array($opts['query']));
 420    return true;
 421  }
 422  
 423  /* ------------- helper functions below -------------- */
 424  
 425  /**
 426   * fulltext search helper
 427   * searches a text file with a given regular expression
 428   * no ACL checks are performed. This have to be done by
 429   * the caller if necessary.
 430   *
 431   * @param array  $data  reference to array for results
 432   * @param string $base  base directory
 433   * @param string $file  file name to search in
 434   * @param string $reg   regular expression to search for
 435   * @param array  $words words that should be marked in the results
 436   *
 437   * @author  Andreas Gohr <andi@splitbrain.org>
 438   * @author  Matthias Grimm <matthiasgrimm@users.sourceforge.net>
 439   *
 440   * @deprecated - fulltext indexer is used instead
 441   */
 442  function search_regex(&$data,$base,$file,$reg,$words){
 443  
 444    //get text
 445    $text = io_readfile($base.'/'.$file);
 446    //lowercase text (u modifier does not help with case)
 447    $lctext = utf8_strtolower($text);
 448  
 449    //do the fulltext search
 450    $matches = array();
 451    if($cnt = preg_match_all('#'.$reg.'#usi',$lctext,$matches)){
 452      //this is not the best way for snippet generation but the fastest I could find
 453      $q = $words[0];  //use first word for snippet creation
 454      $p = utf8_strpos($lctext,$q);
 455      $f = $p - 100;
 456      $l = utf8_strlen($q) + 200;
 457      if($f < 0) $f = 0;
 458      $snippet = '<span class="search_sep"> ... </span>'.
 459                 htmlspecialchars(utf8_substr($text,$f,$l)).
 460                 '<span class="search_sep"> ... </span>';
 461      $mark    = '('.join('|', $words).')';
 462      $snippet = preg_replace('#'.$mark.'#si','<span class="search_hit">\\1</span>',$snippet);
 463  
 464      $data[] = array(
 465        'id'       => pathID($file),
 466        'count'    => preg_match_all('#'.$mark.'#usi',$lctext,$matches),
 467        'poswords' => join(' ',$words),
 468        'snippet'  => $snippet,
 469      );
 470    }
 471  
 472    return true;
 473  }
 474  
 475  
 476  /**
 477   * fulltext sort
 478   *
 479   * Callback sort function for use with usort to sort the data
 480   * structure created by search_fulltext. Sorts descending by count
 481   *
 482   * @author  Andreas Gohr <andi@splitbrain.org>
 483   */
 484  function sort_search_fulltext($a,$b){
 485    if($a['count'] > $b['count']){
 486      return -1;
 487    }elseif($a['count'] < $b['count']){
 488      return 1;
 489    }else{
 490      return strcmp($a['id'],$b['id']);
 491    }
 492  }
 493  
 494  /**
 495   * translates a document path to an ID
 496   *
 497   * @author  Andreas Gohr <andi@splitbrain.org>
 498   * @todo    move to pageutils
 499   */
 500  function pathID($path,$keeptxt=false){
 501    $id = utf8_decodeFN($path);
 502    $id = str_replace('/',':',$id);
 503    if(!$keeptxt) $id = preg_replace('#\.txt$#','',$id);
 504    $id = preg_replace('#^:+#','',$id);
 505    $id = preg_replace('#:+$#','',$id);
 506    return $id;
 507  }
 508  
 509  
 510  //Setup VIM: ex: et ts=2 enc=utf-8 :
Code source de DokuWiki 2006-11-06

/inc/ -> search.php (source)