[ Index ]
 

Code source de DokuWiki 2006-11-06

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/lib/exe/ -> indexer.php (source)

   1  <?php
   2  /**
   3   * DokuWiki indexer
   4   *
   5   * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6   * @author     Andreas Gohr <andi@splitbrain.org>
   7   */
   8  if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/');
   9  define('DOKU_DISABLE_GZIP_OUTPUT',1);
  10  require_once (DOKU_INC.'inc/init.php');
  11  require_once (DOKU_INC.'inc/auth.php');
  12  require_once (DOKU_INC.'inc/events.php');
  13  session_write_close();  //close session
  14  if(!defined('NL')) define('NL',"\n");
  15  
  16  // keep running after browser closes connection
  17  @ignore_user_abort(true);
  18  
  19  // check if user abort worked, if yes send output early
  20  if(@ignore_user_abort()){
  21      sendGIF(); // send gif
  22      $defer = false;
  23  }else{
  24      $defer = true;
  25  }
  26  
  27  // Catch any possible output (e.g. errors)
  28  if(!$_REQUEST['debug']) ob_start();
  29  
  30  // run one of the jobs
  31  runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges();
  32  if($defer) sendGIF();
  33  
  34  if(!$_REQUEST['debug']) ob_end_clean();
  35  exit;
  36  
  37  // --------------------------------------------------------------------
  38  
  39  /**
  40   * Trims the recent changes cache (or imports the old changelog) as needed.
  41   *
  42   * @author Ben Coburn <btcoburn@silicodon.net>
  43   */
  44  function runTrimRecentChanges() {
  45      global $conf;
  46  
  47      // Import old changelog (if needed)
  48      // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly.
  49      // FIXME: Remove this from runTrimRecentChanges when it is no longer needed.
  50      if (isset($conf['changelog_old']) &&
  51          @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) &&
  52          !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) {
  53              $tmp = array(); // no event data
  54              trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp);
  55              return true;
  56      }
  57  
  58      // Trim the Recent Changes
  59      // Trims the recent changes cache to the last $conf['changes_days'] recent
  60      // changes or $conf['recent'] items, which ever is larger.
  61      // The trimming is only done once a day.
  62      if (@file_exists($conf['changelog']) &&
  63          (filectime($conf['changelog'])+86400)<time() &&
  64          !@file_exists($conf['changelog'].'_tmp')) {
  65              io_lock($conf['changelog']);
  66              $lines = file($conf['changelog']);
  67              if (count($lines)<$conf['recent']) {
  68                  // nothing to trim
  69                  io_unlock($conf['changelog']);
  70                  return true;
  71              }
  72  
  73              io_saveFile($conf['changelog'].'_tmp', '');          // presave tmp as 2nd lock
  74              $trim_time = time() - $conf['recent_days']*86400;
  75              $out_lines = array();
  76  
  77              for ($i=0; $i<count($lines); $i++) {
  78                $log = parseChangelogLine($lines[$i]);
  79                if ($log === false) continue;                      // discard junk
  80                if ($log['date'] < $trim_time) {
  81                  $old_lines[$log['date'].".$i"] = $lines[$i];     // keep old lines for now (append .$i to prevent key collisions)
  82                } else {
  83                  $out_lines[$log['date'].".$i"] = $lines[$i];     // definitely keep these lines
  84                }
  85              }
  86  
  87              // sort the final result, it shouldn't be necessary,
  88              //   however the extra robustness in making the changelog cache self-correcting is worth it
  89              ksort($out_lines);
  90              $extra = $conf['recent'] - count($out_lines);        // do we need extra lines do bring us up to minimum
  91              if ($extra > 0) {
  92                ksort($old_lines);
  93                $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines);
  94              }
  95  
  96              // save trimmed changelog
  97              io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines));
  98              @unlink($conf['changelog']);
  99              if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) {
 100                  // rename failed so try another way...
 101                  io_unlock($conf['changelog']);
 102                  io_saveFile($conf['changelog'], implode('', $out_lines));
 103                  @unlink($conf['changelog'].'_tmp');
 104              } else {
 105                  io_unlock($conf['changelog']);
 106              }
 107              return true;
 108      }
 109  
 110      // nothing done
 111      return false;
 112  }
 113  
 114  /**
 115   * Runs the indexer for the current page
 116   *
 117   * @author Andreas Gohr <andi@splitbrain.org>
 118   */
 119  function runIndexer(){
 120      global $conf;
 121      print "runIndexer(): started".NL;
 122  
 123      $ID = cleanID($_REQUEST['id']);
 124      if(!$ID) return false;
 125  
 126      // check if indexing needed
 127      $last = @filemtime(metaFN($ID,'.indexed'));
 128      if($last > @filemtime(wikiFN($ID))){
 129          print "runIndexer(): index for $ID up to date".NL;
 130          return false;
 131      }
 132  
 133      // try to aquire a lock
 134      $lock = $conf['lockdir'].'/_indexer.lock';
 135      while(!@mkdir($lock,$conf['dmode'])){
 136          usleep(50);
 137          if(time()-@filemtime($lock) > 60*5){
 138              // looks like a stale lock - remove it
 139              @rmdir($lock);
 140              print "runIndexer(): stale lock removed".NL;
 141          }else{
 142              print "runIndexer(): indexer locked".NL;
 143              return false;
 144          }
 145      }
 146      if($conf['dperm']) chmod($lock, $conf['dperm']);
 147  
 148      require_once (DOKU_INC.'inc/indexer.php');
 149  
 150      // do the work
 151      idx_addPage($ID);
 152  
 153      // we're finished - save and free lock
 154      io_saveFile(metaFN($ID,'.indexed'),' ');
 155      @rmdir($lock);
 156      print "runIndexer(): finished".NL;
 157      return true;
 158  }
 159  
 160  /**
 161   * Will render the metadata for the page if not exists yet
 162   *
 163   * This makes sure pages which are created from outside DokuWiki will
 164   * gain their data when viewed for the first time.
 165   */
 166  function metaUpdate(){
 167      print "metaUpdate(): started".NL;
 168  
 169      $ID = cleanID($_REQUEST['id']);
 170      if(!$ID) return false;
 171      $file = metaFN($ID, '.meta');
 172      echo "meta file: $file".NL;
 173  
 174      // rendering needed?
 175      if (@file_exists($file)) return false;
 176      if (!@file_exists(wikiFN($ID))) return false;
 177  
 178      require_once (DOKU_INC.'inc/common.php');
 179      require_once (DOKU_INC.'inc/parserutils.php');
 180      global $conf;
 181  
 182  
 183      // gather some additional info from changelog
 184      $info = io_grep($conf['changelog'],
 185                      '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/',
 186                      0,true);
 187  
 188      $meta = array();
 189      if(count($info)){
 190          $meta['date']['created'] = $info[0][1];
 191          foreach($info as $item){
 192              if($item[4] != '*'){
 193                  $meta['date']['modified'] = $item[1];
 194                  if($item[3]){
 195                      $meta['contributor'][$item[3]] = $item[3];
 196                  }
 197              }
 198          }
 199      }
 200  
 201      $meta = p_render_metadata($ID, $meta);
 202      io_saveFile($file, serialize($meta));
 203  
 204      echo "metaUpdate(): finished".NL;
 205      return true;
 206  }
 207  
 208  /**
 209   * Builds a Google Sitemap of all public pages known to the indexer
 210   *
 211   * The map is placed in the root directory named sitemap.xml.gz - This
 212   * file needs to be writable!
 213   *
 214   * @author Andreas Gohr
 215   * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
 216   */
 217  function runSitemapper(){
 218      global $conf;
 219      print "runSitemapper(): started".NL;
 220      if(!$conf['sitemap']) return false;
 221  
 222      if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){
 223          $sitemap = 'sitemap.xml.gz';
 224      }else{
 225          $sitemap = 'sitemap.xml';
 226      }
 227      print "runSitemapper(): using $sitemap".NL;
 228  
 229      if(!is_writable(DOKU_INC.$sitemap)) return false;
 230      if(@filesize(DOKU_INC.$sitemap) && 
 231         @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){
 232         print 'runSitemapper(): Sitemap up to date'.NL;
 233         return false;
 234      }
 235  
 236      $pages = file($conf['cachedir'].'/page.idx');
 237      print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL;
 238  
 239      // build the sitemap
 240      ob_start();
 241      print '<?xml version="1.0" encoding="UTF-8"?>'.NL;
 242      print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL;
 243      foreach($pages as $id){
 244          $id = trim($id);
 245          $file = wikiFN($id);
 246  
 247          //skip hidden, non existing and restricted files
 248          if(isHiddenPage($id)) continue;
 249          $date = @filemtime($file);
 250          if(!$date) continue;
 251          if(auth_aclcheck($id,'','') < AUTH_READ) continue;
 252  
 253          print '  <url>'.NL;
 254          print '    <loc>'.wl($id,'',true).'</loc>'.NL;
 255          print '    <lastmod>'.date_iso8601($date).'</lastmod>'.NL;
 256          print '  </url>'.NL;
 257      }
 258      print '</urlset>'.NL;
 259      $data = ob_get_contents();
 260      ob_end_clean();
 261  
 262      //save the new sitemap
 263      io_saveFile(DOKU_INC.$sitemap,$data);
 264  
 265      print 'runSitemapper(): pinging google'.NL;
 266      //ping google
 267      $url  = 'http://www.google.com/webmasters/sitemaps/ping?sitemap=';
 268      $url .= urlencode(DOKU_URL.$sitemap);
 269      $http = new DokuHTTPClient();
 270      $http->get($url);
 271      if($http->error) print 'runSitemapper(): '.$http->error.NL;
 272  
 273      print 'runSitemapper(): finished'.NL;
 274      return true;
 275  }
 276  
 277  /**
 278   * Formats a timestamp as ISO 8601 date
 279   *
 280   * @author <ungu at terong dot com>
 281   * @link http://www.php.net/manual/en/function.date.php#54072
 282   */
 283  function date_iso8601($int_date) {
 284     //$int_date: current date in UNIX timestamp
 285     $date_mod = date('Y-m-d\TH:i:s', $int_date);
 286     $pre_timezone = date('O', $int_date);
 287     $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2);
 288     $date_mod .= $time_zone;
 289     return $date_mod;
 290  }
 291  
 292  /**
 293   * Just send a 1x1 pixel blank gif to the browser
 294   * 
 295   * @author Andreas Gohr <andi@splitbrain.org>
 296   * @author Harry Fuecks <fuecks@gmail.com>
 297   */
 298  function sendGIF(){
 299      if($_REQUEST['debug']){
 300          header('Content-Type: text/plain');
 301          return;
 302      }
 303      $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7');
 304      header('Content-Type: image/gif');
 305      header('Content-Length: '.strlen($img));
 306      header('Connection: Close');
 307      print $img;
 308      flush();
 309      // Browser should drop connection after this
 310      // Thinks it's got the whole image
 311  }
 312  
 313  //Setup VIM: ex: et ts=4 enc=utf-8 :
 314  // No trailing PHP closing tag - no output please!
 315  // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php


Généré le : Tue Apr 3 20:47:31 2007 par Balluche grâce à PHPXref 0.7