[ Index ] |
|
Code source de PRADO 3.0.6 |
1 <?php 2 /** 3 * Zend Framework 4 * 5 * LICENSE 6 * 7 * This source file is subject to version 1.0 of the Zend Framework 8 * license, that is bundled with this package in the file LICENSE, and 9 * is available through the world-wide-web at the following URL: 10 * http://www.zend.com/license/framework/1_0.txt. If you did not receive 11 * a copy of the Zend Framework license and are unable to obtain it 12 * through the world-wide-web, please send a note to license@zend.com 13 * so we can mail you a copy immediately. 14 * 15 * @package Zend_Search_Lucene 16 * @subpackage Index 17 * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) 18 * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 19 */ 20 21 22 /** Zend_Search_Lucene_Exception */ 23 require_once 'Zend/Search/Lucene/Exception.php'; 24 25 26 /** 27 * @package Zend_Search_Lucene 28 * @subpackage Index 29 * @copyright Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com) 30 * @license http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0 31 */ 32 class Zend_Search_Lucene_Index_SegmentInfo 33 { 34 /** 35 * Number of docs in a segment 36 * 37 * @var integer 38 */ 39 private $_docCount; 40 41 /** 42 * Segment name 43 * 44 * @var string 45 */ 46 private $_name; 47 48 /** 49 * Term Dictionary Index 50 * Array of the Zend_Search_Lucene_Index_Term objects 51 * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos 52 * 53 * @var array 54 */ 55 private $_termDictionary; 56 57 /** 58 * Term Dictionary Index TermInfos 59 * Array of the Zend_Search_Lucene_Index_TermInfo objects 60 * 61 * @var array 62 */ 63 private $_termDictionaryInfos; 64 65 /** 66 * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment 67 * 68 * @var array 69 */ 70 private $_fields; 71 72 /** 73 * Field positions in a dictionary. 74 * (Term dictionary contains filelds ordered by names) 75 * 76 * @var array 77 */ 78 private $_fieldsDicPositions; 79 80 81 /** 82 * Associative array where the key is the file name and the value is data offset 83 * in a compound segment file (.csf). 84 * 85 * @var array 86 */ 87 private $_segFiles; 88 89 /** 90 * File system adapter. 91 * 92 * @var Zend_Search_Lucene_Storage_Directory_Filesystem 93 */ 94 private $_directory; 95 96 /** 97 * Normalization factors. 98 * An array fieldName => normVector 99 * normVector is a binary string. 100 * Each byte corresponds to an indexed document in a segment and 101 * encodes normalization factor (float value, encoded by 102 * Zend_Search_Lucene_Search_Similarity::encodeNorm()) 103 * 104 * @var array 105 */ 106 private $_norms = array(); 107 108 /** 109 * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname, 110 * Documents count and Directory as a parameter. 111 * 112 * @param string $name 113 * @param integer $docCount 114 * @param Zend_Search_Lucene_Storage_Directory $directory 115 */ 116 public function __construct($name, $docCount, $directory) 117 { 118 $this->_name = $name; 119 $this->_docCount = $docCount; 120 $this->_directory = $directory; 121 $this->_termDictionary = null; 122 123 $this->_segFiles = array(); 124 $cfsFile = $this->_directory->getFileObject($name . '.cfs'); 125 $segFilesCount = $cfsFile->readVInt(); 126 127 for ($count = 0; $count < $segFilesCount; $count++) { 128 $dataOffset = $cfsFile->readLong(); 129 $fileName = $cfsFile->readString(); 130 $this->_segFiles[$fileName] = $dataOffset; 131 } 132 133 $fnmFile = $this->openCompoundFile('.fnm'); 134 $fieldsCount = $fnmFile->readVInt(); 135 $fieldNames = array(); 136 $fieldNums = array(); 137 $this->_fields = array(); 138 for ($count=0; $count < $fieldsCount; $count++) { 139 $fieldName = $fnmFile->readString(); 140 $fieldBits = $fnmFile->readByte(); 141 $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName, 142 $fieldBits & 1, 143 $count, 144 $fieldBits & 2 ); 145 if ($fieldBits & 0x10) { 146 // norms are omitted for the indexed field 147 $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount); 148 } 149 150 $fieldNums[$count] = $count; 151 $fieldNames[$count] = $fieldName; 152 } 153 array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums); 154 $this->_fieldsDicPositions = array_flip($fieldNums); 155 } 156 157 /** 158 * Opens index file stoted within compound index file 159 * 160 * @param string $extension 161 * @throws Zend_Search_Lucene_Exception 162 * @return Zend_Search_Lucene_Storage_File 163 */ 164 public function openCompoundFile($extension) 165 { 166 $filename = $this->_name . $extension; 167 168 if( !isset($this->_segFiles[ $filename ]) ) { 169 throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain ' 170 . $filename . ' file.' ); 171 } 172 173 $file = $this->_directory->getFileObject( $this->_name.".cfs" ); 174 $file->seek( $this->_segFiles[ $filename ] ); 175 return $file; 176 } 177 178 /** 179 * Returns field index or -1 if field is not found 180 * 181 * @param string $fieldName 182 * @return integer 183 */ 184 public function getFieldNum($fieldName) 185 { 186 foreach( $this->_fields as $field ) { 187 if( $field->name == $fieldName ) { 188 return $field->number; 189 } 190 } 191 192 return -1; 193 } 194 195 /** 196 * Returns field info for specified field 197 * 198 * @param integer $fieldNum 199 * @return ZSearchFieldInfo 200 */ 201 public function getField($fieldNum) 202 { 203 return $this->_fields[$fieldNum]; 204 } 205 206 /** 207 * Returns array of fields. 208 * if $indexed parameter is true, then returns only indexed fields. 209 * 210 * @param boolean $indexed 211 * @return array 212 */ 213 public function getFields($indexed = false) 214 { 215 $result = array(); 216 foreach( $this->_fields as $field ) { 217 if( (!$indexed) || $field->isIndexed ) { 218 $result[ $field->name ] = $field->name; 219 } 220 } 221 return $result; 222 } 223 224 /** 225 * Returns the total number of documents in this segment. 226 * 227 * @return integer 228 */ 229 public function count() 230 { 231 return $this->_docCount; 232 } 233 234 235 /** 236 * Loads Term dictionary from TermInfoIndex file 237 */ 238 protected function _loadDictionary() 239 { 240 if ($this->_termDictionary !== null) { 241 return; 242 } 243 244 $this->_termDictionary = array(); 245 $this->_termDictionaryInfos = array(); 246 247 $tiiFile = $this->openCompoundFile('.tii'); 248 $tiVersion = $tiiFile->readInt(); 249 if ($tiVersion != (int)0xFFFFFFFE) { 250 throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format'); 251 } 252 253 $indexTermCount = $tiiFile->readLong(); 254 $tiiFile->readInt(); // IndexInterval 255 $skipInterval = $tiiFile->readInt(); 256 257 $prevTerm = ''; 258 $freqPointer = 0; 259 $proxPointer = 0; 260 $indexPointer = 0; 261 for ($count = 0; $count < $indexTermCount; $count++) { 262 $termPrefixLength = $tiiFile->readVInt(); 263 $termSuffix = $tiiFile->readString(); 264 $termValue = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix; 265 266 $termFieldNum = $tiiFile->readVInt(); 267 $docFreq = $tiiFile->readVInt(); 268 $freqPointer += $tiiFile->readVInt(); 269 $proxPointer += $tiiFile->readVInt(); 270 if( $docFreq >= $skipInterval ) { 271 $skipDelta = $tiiFile->readVInt(); 272 } else { 273 $skipDelta = 0; 274 } 275 276 $indexPointer += $tiiFile->readVInt(); 277 278 $this->_termDictionary[] = new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum); 279 $this->_termDictionaryInfos[] = 280 new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer); 281 $prevTerm = $termValue; 282 } 283 } 284 285 286 /** 287 * Return segment name 288 * 289 * @return string 290 */ 291 public function getName() 292 { 293 return $this->_name; 294 } 295 296 297 /** 298 * Scans terms dictionary and returns term info 299 * 300 * @param Zend_Search_Lucene_Index_Term $term 301 * @return Zend_Search_Lucene_Index_TermInfo 302 */ 303 public function getTermInfo($term) 304 { 305 $this->_loadDictionary(); 306 307 $searchField = $this->getFieldNum($term->field); 308 309 if ($searchField == -1) { 310 return null; 311 } 312 $searchDicField = $this->_fieldsDicPositions[$searchField]; 313 314 // search for appropriate value in dictionary 315 $lowIndex = 0; 316 $highIndex = count($this->_termDictionary)-1; 317 while ($highIndex >= $lowIndex) { 318 // $mid = ($highIndex - $lowIndex)/2; 319 $mid = ($highIndex + $lowIndex) >> 1; 320 $midTerm = $this->_termDictionary[$mid]; 321 322 $delta = $searchDicField - $this->_fieldsDicPositions[$midTerm->field]; 323 if ($delta == 0) { 324 $delta = strcmp($term->text, $midTerm->text); 325 } 326 327 if ($delta < 0) { 328 $highIndex = $mid-1; 329 } elseif ($delta > 0) { 330 $lowIndex = $mid+1; 331 } else { 332 return $this->_termDictionaryInfos[$mid]; // We got it! 333 } 334 } 335 336 if ($highIndex == -1) { 337 // Term is out of the dictionary range 338 return null; 339 } 340 341 $prevPosition = $highIndex; 342 $prevTerm = $this->_termDictionary[$prevPosition]; 343 $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ]; 344 345 $tisFile = $this->openCompoundFile('.tis'); 346 $tiVersion = $tisFile->readInt(); 347 if ($tiVersion != (int)0xFFFFFFFE) { 348 throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format'); 349 } 350 351 $termCount = $tisFile->readLong(); 352 $indexInterval = $tisFile->readInt(); 353 $skipInterval = $tisFile->readInt(); 354 355 $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR); 356 357 $termValue = $prevTerm->text; 358 $termFieldNum = $prevTerm->field; 359 $freqPointer = $prevTermInfo->freqPointer; 360 $proxPointer = $prevTermInfo->proxPointer; 361 for ($count = $prevPosition*$indexInterval + 1; 362 $count < $termCount && 363 ( $this->_fieldsDicPositions[ $termFieldNum ] < $searchDicField || 364 ($this->_fieldsDicPositions[ $termFieldNum ] == $searchDicField && 365 strcmp($termValue, $term->text) < 0) ); 366 $count++) { 367 $termPrefixLength = $tisFile->readVInt(); 368 $termSuffix = $tisFile->readString(); 369 $termFieldNum = $tisFile->readVInt(); 370 $termValue = substr( $termValue, 0, $termPrefixLength ) . $termSuffix; 371 372 $docFreq = $tisFile->readVInt(); 373 $freqPointer += $tisFile->readVInt(); 374 $proxPointer += $tisFile->readVInt(); 375 if( $docFreq >= $skipInterval ) { 376 $skipOffset = $tisFile->readVInt(); 377 } else { 378 $skipOffset = 0; 379 } 380 } 381 382 if ($termFieldNum == $searchField && $termValue == $term->text) { 383 return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset); 384 } else { 385 return null; 386 } 387 } 388 389 /** 390 * Returns normalization factor for specified documents 391 * 392 * @param integer $id 393 * @param string $fieldName 394 * @return string 395 */ 396 public function norm($id, $fieldName) 397 { 398 $fieldNum = $this->getFieldNum($fieldName); 399 400 if ( !($this->_fields[$fieldNum]->isIndexed) ) { 401 return null; 402 } 403 404 if ( !isset( $this->_norms[$fieldNum] )) { 405 $fFile = $this->openCompoundFile('.f' . $fieldNum); 406 $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount); 407 } 408 409 return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) ); 410 } 411 } 412
titre
Description
Corps
titre
Description
Corps
titre
Description
Corps
titre
Corps
Généré le : Sun Feb 25 21:07:04 2007 | par Balluche grâce à PHPXref 0.7 |