[ Index ] |
|
Code source de SPIP Agora 1.4 |
1 <?php 2 /***************************************************** 3 * This file is part of Agora, web based content management system. 4 * 5 * Agora is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; version 2 of the License. 8 * 9 * Agora is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 * GNU General Public License for more details (file "COPYING"). 13 * 14 * Copyright © Arnaud Martin, Antoine Pitrou et Philippe Rivière. 15 * List of authors detailed in "copyright_fr.html" file. 16 * E-mail : agora@sig.premier-ministre.gouv.fr 17 * Web site : http://www.agora.gouv.fr 18 *****************************************************/ 19 // Base class for Indexer 20 // $Id$ 21 22 require_once dirname(__FILE__). "/../bd/inc_index_dico_factory.php"; 23 24 require_once dirname(__FILE__). "/Indexer_Utils.php"; 25 26 class Indexer { 27 28 // {{{ properties 29 30 /** 31 * type of Indexation. 32 * @var String 33 * @access private 34 */ 35 var $_type; 36 37 /** 38 * $mots contains (hash, mot). 39 * @var Array 40 * @access private 41 */ 42 var $_mots; 43 44 /** 45 * $index contains (hash, poids). 46 * @var Array 47 * @access private 48 */ 49 var $_index; 50 51 /** 52 * $poids contains the definition of weight. 53 * @var Array 54 * @access private 55 */ 56 var $_poids; 57 58 /** 59 * $full is a flag to index more data. 60 * @var Array 61 * @access private 62 */ 63 var $_full = true; 64 65 // }}} 66 67 // {{{ getFull() 68 69 function getFull () { 70 return $this->_full; 71 } 72 73 // }}} 74 75 // {{{ setFull($full) 76 77 function setFull ($full) { 78 $this->_full = $full; 79 } 80 81 // }}} 82 83 // {{{ factory($typeIndexer) 84 85 /** 86 * Indexer Factory. 87 * 88 * @access public 89 */ 90 function &factory ($typeIndexer) { 91 if (!file_exists(dirname( 92 __FILE__). '/' . $GLOBALS['type_indexation']. '/Indexer_' . $typeIndexer . '_' . $GLOBALS['type_indexation']. '.php')) 93 { 94 include_once (dirname(__FILE__). '/common/Indexer_' . $typeIndexer . '_common.php'); 95 $classname = 'Indexer_' . $typeIndexer . '_common'; 96 } 97 else { 98 include_once (dirname( 99 __FILE__). '/' . $GLOBALS['type_indexation']. '/Indexer_' . $typeIndexer . '_' . $GLOBALS['type_indexation']. '.php'); 100 $classname = 'Indexer_' . $typeIndexer . "_" . $GLOBALS['type_indexation']; 101 } 102 103 if (!class_exists($classname)) { 104 return PEAR::raiseError("Cannot instanciate class $classname", null, null, null, null, null, false); 105 } 106 107 $obj = &new $classname(); 108 109 return $obj; 110 } 111 112 // }}} 113 114 // {{{ constructor 115 116 /** 117 * Indexer constructor. 118 * 119 * @access public 120 */ 121 122 function Indexer () { } 123 124 // }}} 125 126 // {{{ index($id_objet, $forcer_reset = true, $full = true) 127 128 /** 129 * index an object 130 * @param $id_objet 131 * @param $forcer_reset 132 * @param $full 133 * @access public 134 */ 135 136 function index ($id_objet, $forcer_reset = true) { 137 if (!$id_objet OR (!$forcer_reset AND $this->alreadyIndex($id_objet))) 138 return; 139 140 if (function_exists(spip_log)) { 141 spip_log ("indexation " . $this->_type . " $id_objet"); 142 } 143 144 $this->_index = ''; 145 146 $indexDicoMetier = &recuperer_instance_index_dico(); 147 148 $this->_indexData($id_objet); 149 150 if ($this->_index) { 151 $indexDicoMetier->addDico($this->_mots); 152 153 reset ($this->_index); 154 155 $this->_createIndex($id_objet); 156 } 157 } 158 159 // }}} 160 161 // {{{ _indexer_chaine($texte, $val = 1, $min_long = 3) 162 163 function _indexer_chaine ($texte, $val = 1, $min_long = 3) { 164 if ($val == 0) { // pas d'indexation pour un poids de zéro 165 return; 166 } 167 168 // Nettoyer les tags, entites HTML, signes diacritiques... 169 $texte = ' ' . ereg_replace("<[^>]*>", " ", $texte). ' '; 170 $texte = Indexer_Utils::nettoyer_chaine_indexation($texte); 171 172 // Nettoyer les caracteres non-alphanumeriques 173 $regs = Indexer_Utils::separateurs_indexation(); 174 $texte = strtr($texte, $regs, ereg_replace('.', ' ', $regs)); 175 176 // Cas particulier : sigles d'au moins deux lettres 177 $texte = ereg_replace(" ([A-Z][0-9A-Z]{1," . ($min_long - 1). "}) ", ' \\1___ ', $texte); 178 $texte = strtolower($texte); 179 180 // Separer les mots 181 $table = Indexer_Utils::spip_split(" +", $texte); 182 183 while (list(, $mot) = each($table)) { 184 if (strlen($mot) > $min_long) { 185 $h = substr(md5($mot), 0, 16); 186 $this->_index[$h] += $val; 187 $this->_mots[] = array("hash" => "0x$h", "dico" => $mot); 188 } 189 } 190 } 191 192 // }}} 193 194 // {{{ alreadyIndex($id_objet) 195 196 /** 197 * check id an object is indexed. 198 * @param $id_objet 199 * @access public 200 */ 201 202 function alreadyIndex ($id_objet) { } 203 204 // {{{ _indexData($id_objet) 205 206 /** 207 * this method initialses this attributes 'mots' and 'index' 208 * @param $id_objet 209 * @access private 210 */ 211 212 function _indexData ($id_object) { } 213 214 // }}} 215 216 // {{{ _createIndex($id_objet) 217 218 /** 219 * this method create indexation data 220 * @param $id_objet 221 * @access private 222 */ 223 224 function _createIndex ($id_object) { } 225 226 // }}} 227 228 } 229 ?>
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sat Feb 24 14:40:03 2007 | par Balluche grâce à PHPXref 0.7 |