[ Index ] |
|
Code source de DokuWiki 2006-11-06 |
1 <?php 2 /** 3 * DokuWiki indexer 4 * 5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html) 6 * @author Andreas Gohr <andi@splitbrain.org> 7 */ 8 if(!defined('DOKU_INC')) define('DOKU_INC',realpath(dirname(__FILE__).'/../../').'/'); 9 define('DOKU_DISABLE_GZIP_OUTPUT',1); 10 require_once (DOKU_INC.'inc/init.php'); 11 require_once (DOKU_INC.'inc/auth.php'); 12 require_once (DOKU_INC.'inc/events.php'); 13 session_write_close(); //close session 14 if(!defined('NL')) define('NL',"\n"); 15 16 // keep running after browser closes connection 17 @ignore_user_abort(true); 18 19 // check if user abort worked, if yes send output early 20 if(@ignore_user_abort()){ 21 sendGIF(); // send gif 22 $defer = false; 23 }else{ 24 $defer = true; 25 } 26 27 // Catch any possible output (e.g. errors) 28 if(!$_REQUEST['debug']) ob_start(); 29 30 // run one of the jobs 31 runIndexer() or metaUpdate() or runSitemapper() or runTrimRecentChanges(); 32 if($defer) sendGIF(); 33 34 if(!$_REQUEST['debug']) ob_end_clean(); 35 exit; 36 37 // -------------------------------------------------------------------- 38 39 /** 40 * Trims the recent changes cache (or imports the old changelog) as needed. 41 * 42 * @author Ben Coburn <btcoburn@silicodon.net> 43 */ 44 function runTrimRecentChanges() { 45 global $conf; 46 47 // Import old changelog (if needed) 48 // Uses the imporoldchangelog plugin to upgrade the changelog automaticaly. 49 // FIXME: Remove this from runTrimRecentChanges when it is no longer needed. 50 if (isset($conf['changelog_old']) && 51 @file_exists($conf['changelog_old']) && !@file_exists($conf['changelog']) && 52 !@file_exists($conf['changelog'].'_importing') && !@file_exists($conf['changelog'].'_tmp')) { 53 $tmp = array(); // no event data 54 trigger_event('TEMPORARY_CHANGELOG_UPGRADE_EVENT', $tmp); 55 return true; 56 } 57 58 // Trim the Recent Changes 59 // Trims the recent changes cache to the last $conf['changes_days'] recent 60 // changes or $conf['recent'] items, which ever is larger. 61 // The trimming is only done once a day. 62 if (@file_exists($conf['changelog']) && 63 (filectime($conf['changelog'])+86400)<time() && 64 !@file_exists($conf['changelog'].'_tmp')) { 65 io_lock($conf['changelog']); 66 $lines = file($conf['changelog']); 67 if (count($lines)<$conf['recent']) { 68 // nothing to trim 69 io_unlock($conf['changelog']); 70 return true; 71 } 72 73 io_saveFile($conf['changelog'].'_tmp', ''); // presave tmp as 2nd lock 74 $trim_time = time() - $conf['recent_days']*86400; 75 $out_lines = array(); 76 77 for ($i=0; $i<count($lines); $i++) { 78 $log = parseChangelogLine($lines[$i]); 79 if ($log === false) continue; // discard junk 80 if ($log['date'] < $trim_time) { 81 $old_lines[$log['date'].".$i"] = $lines[$i]; // keep old lines for now (append .$i to prevent key collisions) 82 } else { 83 $out_lines[$log['date'].".$i"] = $lines[$i]; // definitely keep these lines 84 } 85 } 86 87 // sort the final result, it shouldn't be necessary, 88 // however the extra robustness in making the changelog cache self-correcting is worth it 89 ksort($out_lines); 90 $extra = $conf['recent'] - count($out_lines); // do we need extra lines do bring us up to minimum 91 if ($extra > 0) { 92 ksort($old_lines); 93 $out_lines = array_merge(array_slice($old_lines,-$extra),$out_lines); 94 } 95 96 // save trimmed changelog 97 io_saveFile($conf['changelog'].'_tmp', implode('', $out_lines)); 98 @unlink($conf['changelog']); 99 if (!rename($conf['changelog'].'_tmp', $conf['changelog'])) { 100 // rename failed so try another way... 101 io_unlock($conf['changelog']); 102 io_saveFile($conf['changelog'], implode('', $out_lines)); 103 @unlink($conf['changelog'].'_tmp'); 104 } else { 105 io_unlock($conf['changelog']); 106 } 107 return true; 108 } 109 110 // nothing done 111 return false; 112 } 113 114 /** 115 * Runs the indexer for the current page 116 * 117 * @author Andreas Gohr <andi@splitbrain.org> 118 */ 119 function runIndexer(){ 120 global $conf; 121 print "runIndexer(): started".NL; 122 123 $ID = cleanID($_REQUEST['id']); 124 if(!$ID) return false; 125 126 // check if indexing needed 127 $last = @filemtime(metaFN($ID,'.indexed')); 128 if($last > @filemtime(wikiFN($ID))){ 129 print "runIndexer(): index for $ID up to date".NL; 130 return false; 131 } 132 133 // try to aquire a lock 134 $lock = $conf['lockdir'].'/_indexer.lock'; 135 while(!@mkdir($lock,$conf['dmode'])){ 136 usleep(50); 137 if(time()-@filemtime($lock) > 60*5){ 138 // looks like a stale lock - remove it 139 @rmdir($lock); 140 print "runIndexer(): stale lock removed".NL; 141 }else{ 142 print "runIndexer(): indexer locked".NL; 143 return false; 144 } 145 } 146 if($conf['dperm']) chmod($lock, $conf['dperm']); 147 148 require_once (DOKU_INC.'inc/indexer.php'); 149 150 // do the work 151 idx_addPage($ID); 152 153 // we're finished - save and free lock 154 io_saveFile(metaFN($ID,'.indexed'),' '); 155 @rmdir($lock); 156 print "runIndexer(): finished".NL; 157 return true; 158 } 159 160 /** 161 * Will render the metadata for the page if not exists yet 162 * 163 * This makes sure pages which are created from outside DokuWiki will 164 * gain their data when viewed for the first time. 165 */ 166 function metaUpdate(){ 167 print "metaUpdate(): started".NL; 168 169 $ID = cleanID($_REQUEST['id']); 170 if(!$ID) return false; 171 $file = metaFN($ID, '.meta'); 172 echo "meta file: $file".NL; 173 174 // rendering needed? 175 if (@file_exists($file)) return false; 176 if (!@file_exists(wikiFN($ID))) return false; 177 178 require_once (DOKU_INC.'inc/common.php'); 179 require_once (DOKU_INC.'inc/parserutils.php'); 180 global $conf; 181 182 183 // gather some additional info from changelog 184 $info = io_grep($conf['changelog'], 185 '/^(\d+)\t(\d+\.\d+\.\d+\.\d+)\t'.preg_quote($ID,'/').'\t([^\t]+)\t([^\t\n]+)/', 186 0,true); 187 188 $meta = array(); 189 if(count($info)){ 190 $meta['date']['created'] = $info[0][1]; 191 foreach($info as $item){ 192 if($item[4] != '*'){ 193 $meta['date']['modified'] = $item[1]; 194 if($item[3]){ 195 $meta['contributor'][$item[3]] = $item[3]; 196 } 197 } 198 } 199 } 200 201 $meta = p_render_metadata($ID, $meta); 202 io_saveFile($file, serialize($meta)); 203 204 echo "metaUpdate(): finished".NL; 205 return true; 206 } 207 208 /** 209 * Builds a Google Sitemap of all public pages known to the indexer 210 * 211 * The map is placed in the root directory named sitemap.xml.gz - This 212 * file needs to be writable! 213 * 214 * @author Andreas Gohr 215 * @link https://www.google.com/webmasters/sitemaps/docs/en/about.html 216 */ 217 function runSitemapper(){ 218 global $conf; 219 print "runSitemapper(): started".NL; 220 if(!$conf['sitemap']) return false; 221 222 if($conf['compression'] == 'bz2' || $conf['compression'] == 'gz'){ 223 $sitemap = 'sitemap.xml.gz'; 224 }else{ 225 $sitemap = 'sitemap.xml'; 226 } 227 print "runSitemapper(): using $sitemap".NL; 228 229 if(!is_writable(DOKU_INC.$sitemap)) return false; 230 if(@filesize(DOKU_INC.$sitemap) && 231 @filemtime(DOKU_INC.$sitemap) > (time()-($conf['sitemap']*60*60*24))){ 232 print 'runSitemapper(): Sitemap up to date'.NL; 233 return false; 234 } 235 236 $pages = file($conf['cachedir'].'/page.idx'); 237 print 'runSitemapper(): creating sitemap using '.count($pages).' pages'.NL; 238 239 // build the sitemap 240 ob_start(); 241 print '<?xml version="1.0" encoding="UTF-8"?>'.NL; 242 print '<urlset xmlns="http://www.google.com/schemas/sitemap/0.84">'.NL; 243 foreach($pages as $id){ 244 $id = trim($id); 245 $file = wikiFN($id); 246 247 //skip hidden, non existing and restricted files 248 if(isHiddenPage($id)) continue; 249 $date = @filemtime($file); 250 if(!$date) continue; 251 if(auth_aclcheck($id,'','') < AUTH_READ) continue; 252 253 print ' <url>'.NL; 254 print ' <loc>'.wl($id,'',true).'</loc>'.NL; 255 print ' <lastmod>'.date_iso8601($date).'</lastmod>'.NL; 256 print ' </url>'.NL; 257 } 258 print '</urlset>'.NL; 259 $data = ob_get_contents(); 260 ob_end_clean(); 261 262 //save the new sitemap 263 io_saveFile(DOKU_INC.$sitemap,$data); 264 265 print 'runSitemapper(): pinging google'.NL; 266 //ping google 267 $url = 'http://www.google.com/webmasters/sitemaps/ping?sitemap='; 268 $url .= urlencode(DOKU_URL.$sitemap); 269 $http = new DokuHTTPClient(); 270 $http->get($url); 271 if($http->error) print 'runSitemapper(): '.$http->error.NL; 272 273 print 'runSitemapper(): finished'.NL; 274 return true; 275 } 276 277 /** 278 * Formats a timestamp as ISO 8601 date 279 * 280 * @author <ungu at terong dot com> 281 * @link http://www.php.net/manual/en/function.date.php#54072 282 */ 283 function date_iso8601($int_date) { 284 //$int_date: current date in UNIX timestamp 285 $date_mod = date('Y-m-d\TH:i:s', $int_date); 286 $pre_timezone = date('O', $int_date); 287 $time_zone = substr($pre_timezone, 0, 3).":".substr($pre_timezone, 3, 2); 288 $date_mod .= $time_zone; 289 return $date_mod; 290 } 291 292 /** 293 * Just send a 1x1 pixel blank gif to the browser 294 * 295 * @author Andreas Gohr <andi@splitbrain.org> 296 * @author Harry Fuecks <fuecks@gmail.com> 297 */ 298 function sendGIF(){ 299 if($_REQUEST['debug']){ 300 header('Content-Type: text/plain'); 301 return; 302 } 303 $img = base64_decode('R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAEALAAAAAABAAEAAAIBTAA7'); 304 header('Content-Type: image/gif'); 305 header('Content-Length: '.strlen($img)); 306 header('Connection: Close'); 307 print $img; 308 flush(); 309 // Browser should drop connection after this 310 // Thinks it's got the whole image 311 } 312 313 //Setup VIM: ex: et ts=4 enc=utf-8 : 314 // No trailing PHP closing tag - no output please! 315 // See Note at http://www.php.net/manual/en/language.basic-syntax.instruction-separation.php
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Tue Apr 3 20:47:31 2007 | par Balluche grâce à PHPXref 0.7 |