[ Index ]
 

Code source de PRADO 3.0.6

Accédez au Source d'autres logiciels libresSoutenez Angelica Josefina !

title

Body

[fermer]

/demos/quickstart/protected/index/Zend/Search/Lucene/Index/ -> SegmentInfo.php (source)

   1  <?php
   2  /**
   3   * Zend Framework
   4   *
   5   * LICENSE
   6   *
   7   * This source file is subject to version 1.0 of the Zend Framework
   8   * license, that is bundled with this package in the file LICENSE, and
   9   * is available through the world-wide-web at the following URL:
  10   * http://www.zend.com/license/framework/1_0.txt. If you did not receive
  11   * a copy of the Zend Framework license and are unable to obtain it
  12   * through the world-wide-web, please send a note to license@zend.com
  13   * so we can mail you a copy immediately.
  14   *
  15   * @package    Zend_Search_Lucene
  16   * @subpackage Index
  17   * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
  18   * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
  19   */
  20  
  21  
  22  /** Zend_Search_Lucene_Exception */
  23  require_once 'Zend/Search/Lucene/Exception.php';
  24  
  25  
  26  /**
  27   * @package    Zend_Search_Lucene
  28   * @subpackage Index
  29   * @copyright  Copyright (c) 2005-2006 Zend Technologies USA Inc. (http://www.zend.com)
  30   * @license    http://www.zend.com/license/framework/1_0.txt Zend Framework License version 1.0
  31   */
  32  class Zend_Search_Lucene_Index_SegmentInfo
  33  {
  34      /**
  35       * Number of docs in a segment
  36       *
  37       * @var integer
  38       */
  39      private $_docCount;
  40  
  41      /**
  42       * Segment name
  43       *
  44       * @var string
  45       */
  46      private $_name;
  47  
  48      /**
  49       * Term Dictionary Index
  50       * Array of the Zend_Search_Lucene_Index_Term objects
  51       * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
  52       *
  53       * @var array
  54       */
  55      private $_termDictionary;
  56  
  57      /**
  58       * Term Dictionary Index TermInfos
  59       * Array of the Zend_Search_Lucene_Index_TermInfo objects
  60       *
  61       * @var array
  62       */
  63      private $_termDictionaryInfos;
  64  
  65      /**
  66       * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
  67       *
  68       * @var array
  69       */
  70      private $_fields;
  71  
  72      /**
  73       * Field positions in a dictionary.
  74       * (Term dictionary contains filelds ordered by names)
  75       *
  76       * @var array
  77       */
  78      private $_fieldsDicPositions;
  79  
  80  
  81      /**
  82       * Associative array where the key is the file name and the value is data offset
  83       * in a compound segment file (.csf).
  84       *
  85       * @var array
  86       */
  87      private $_segFiles;
  88  
  89      /**
  90       * File system adapter.
  91       *
  92       * @var Zend_Search_Lucene_Storage_Directory_Filesystem
  93       */
  94      private $_directory;
  95  
  96      /**
  97       * Normalization factors.
  98       * An array fieldName => normVector
  99       * normVector is a binary string.
 100       * Each byte corresponds to an indexed document in a segment and
 101       * encodes normalization factor (float value, encoded by
 102       * Zend_Search_Lucene_Search_Similarity::encodeNorm())
 103       *
 104       * @var array
 105       */
 106      private $_norms = array();
 107  
 108      /**
 109       * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
 110       * Documents count and Directory as a parameter.
 111       *
 112       * @param string $name
 113       * @param integer $docCount
 114       * @param Zend_Search_Lucene_Storage_Directory $directory
 115       */
 116      public function __construct($name, $docCount, $directory)
 117      {
 118          $this->_name = $name;
 119          $this->_docCount = $docCount;
 120          $this->_directory = $directory;
 121          $this->_termDictionary = null;
 122  
 123          $this->_segFiles = array();
 124          $cfsFile = $this->_directory->getFileObject($name . '.cfs');
 125          $segFilesCount = $cfsFile->readVInt();
 126  
 127          for ($count = 0; $count < $segFilesCount; $count++) {
 128              $dataOffset = $cfsFile->readLong();
 129              $fileName = $cfsFile->readString();
 130              $this->_segFiles[$fileName] = $dataOffset;
 131          }
 132  
 133          $fnmFile = $this->openCompoundFile('.fnm');
 134          $fieldsCount = $fnmFile->readVInt();
 135          $fieldNames = array();
 136          $fieldNums  = array();
 137          $this->_fields = array();
 138          for ($count=0; $count < $fieldsCount; $count++) {
 139              $fieldName = $fnmFile->readString();
 140              $fieldBits = $fnmFile->readByte();
 141              $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
 142                                                                              $fieldBits & 1,
 143                                                                              $count,
 144                                                                              $fieldBits & 2 );
 145              if ($fieldBits & 0x10) {
 146                  // norms are omitted for the indexed field
 147                  $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
 148              }
 149  
 150              $fieldNums[$count]  = $count;
 151              $fieldNames[$count] = $fieldName;
 152          }
 153          array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
 154          $this->_fieldsDicPositions = array_flip($fieldNums);
 155      }
 156  
 157      /**
 158       * Opens index file stoted within compound index file
 159       *
 160       * @param string $extension
 161       * @throws Zend_Search_Lucene_Exception
 162       * @return Zend_Search_Lucene_Storage_File
 163       */
 164      public function openCompoundFile($extension)
 165      {
 166          $filename = $this->_name . $extension;
 167  
 168          if( !isset($this->_segFiles[ $filename ]) ) {
 169              throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
 170                                         . $filename . ' file.' );
 171          }
 172  
 173          $file = $this->_directory->getFileObject( $this->_name.".cfs" );
 174          $file->seek( $this->_segFiles[ $filename ] );
 175          return $file;
 176      }
 177  
 178      /**
 179       * Returns field index or -1 if field is not found
 180       *
 181       * @param string $fieldName
 182       * @return integer
 183       */
 184      public function getFieldNum($fieldName)
 185      {
 186          foreach( $this->_fields as $field ) {
 187              if( $field->name == $fieldName ) {
 188                  return $field->number;
 189              }
 190          }
 191  
 192          return -1;
 193      }
 194  
 195      /**
 196       * Returns field info for specified field
 197       *
 198       * @param integer $fieldNum
 199       * @return ZSearchFieldInfo
 200       */
 201      public function getField($fieldNum)
 202      {
 203          return $this->_fields[$fieldNum];
 204      }
 205  
 206      /**
 207       * Returns array of fields.
 208       * if $indexed parameter is true, then returns only indexed fields.
 209       *
 210       * @param boolean $indexed
 211       * @return array
 212       */
 213      public function getFields($indexed = false)
 214      {
 215          $result = array();
 216          foreach( $this->_fields as $field ) {
 217              if( (!$indexed) || $field->isIndexed ) {
 218                  $result[ $field->name ] = $field->name;
 219              }
 220          }
 221          return $result;
 222      }
 223  
 224      /**
 225       * Returns the total number of documents in this segment.
 226       *
 227       * @return integer
 228       */
 229      public function count()
 230      {
 231          return $this->_docCount;
 232      }
 233  
 234  
 235      /**
 236       * Loads Term dictionary from TermInfoIndex file
 237       */
 238      protected function _loadDictionary()
 239      {
 240          if ($this->_termDictionary !== null) {
 241              return;
 242          }
 243  
 244          $this->_termDictionary = array();
 245          $this->_termDictionaryInfos = array();
 246  
 247          $tiiFile = $this->openCompoundFile('.tii');
 248          $tiVersion = $tiiFile->readInt();
 249          if ($tiVersion != (int)0xFFFFFFFE) {
 250              throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
 251          }
 252  
 253          $indexTermCount = $tiiFile->readLong();
 254                            $tiiFile->readInt();  // IndexInterval
 255          $skipInterval   = $tiiFile->readInt();
 256  
 257          $prevTerm     = '';
 258          $freqPointer  =  0;
 259          $proxPointer  =  0;
 260          $indexPointer =  0;
 261          for ($count = 0; $count < $indexTermCount; $count++) {
 262              $termPrefixLength = $tiiFile->readVInt();
 263              $termSuffix       = $tiiFile->readString();
 264              $termValue        = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix;
 265  
 266              $termFieldNum     = $tiiFile->readVInt();
 267              $docFreq          = $tiiFile->readVInt();
 268              $freqPointer     += $tiiFile->readVInt();
 269              $proxPointer     += $tiiFile->readVInt();
 270              if( $docFreq >= $skipInterval ) {
 271                  $skipDelta = $tiiFile->readVInt();
 272              } else {
 273                  $skipDelta = 0;
 274              }
 275  
 276              $indexPointer += $tiiFile->readVInt();
 277  
 278              $this->_termDictionary[] =  new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum);
 279              $this->_termDictionaryInfos[] =
 280                  new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
 281              $prevTerm = $termValue;
 282          }
 283      }
 284  
 285  
 286      /**
 287       * Return segment name
 288       *
 289       * @return string
 290       */
 291      public function getName()
 292      {
 293          return $this->_name;
 294      }
 295  
 296  
 297      /**
 298       * Scans terms dictionary and returns term info
 299       *
 300       * @param Zend_Search_Lucene_Index_Term $term
 301       * @return Zend_Search_Lucene_Index_TermInfo
 302       */
 303      public function getTermInfo($term)
 304      {
 305          $this->_loadDictionary();
 306  
 307          $searchField = $this->getFieldNum($term->field);
 308  
 309          if ($searchField == -1) {
 310              return null;
 311          }
 312          $searchDicField = $this->_fieldsDicPositions[$searchField];
 313  
 314          // search for appropriate value in dictionary
 315          $lowIndex = 0;
 316          $highIndex = count($this->_termDictionary)-1;
 317          while ($highIndex >= $lowIndex) {
 318              // $mid = ($highIndex - $lowIndex)/2;
 319              $mid = ($highIndex + $lowIndex) >> 1;
 320              $midTerm = $this->_termDictionary[$mid];
 321  
 322              $delta = $searchDicField - $this->_fieldsDicPositions[$midTerm->field];
 323              if ($delta == 0) {
 324                  $delta = strcmp($term->text, $midTerm->text);
 325              }
 326  
 327              if ($delta < 0) {
 328                  $highIndex = $mid-1;
 329              } elseif ($delta > 0) {
 330                  $lowIndex  = $mid+1;
 331              } else {
 332                  return $this->_termDictionaryInfos[$mid]; // We got it!
 333              }
 334          }
 335  
 336          if ($highIndex == -1) {
 337              // Term is out of the dictionary range
 338              return null;
 339          }
 340  
 341          $prevPosition = $highIndex;
 342          $prevTerm = $this->_termDictionary[$prevPosition];
 343          $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ];
 344  
 345          $tisFile = $this->openCompoundFile('.tis');
 346          $tiVersion = $tisFile->readInt();
 347          if ($tiVersion != (int)0xFFFFFFFE) {
 348              throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
 349          }
 350  
 351          $termCount     = $tisFile->readLong();
 352          $indexInterval = $tisFile->readInt();
 353          $skipInterval  = $tisFile->readInt();
 354  
 355          $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR);
 356  
 357          $termValue    = $prevTerm->text;
 358          $termFieldNum = $prevTerm->field;
 359          $freqPointer = $prevTermInfo->freqPointer;
 360          $proxPointer = $prevTermInfo->proxPointer;
 361          for ($count = $prevPosition*$indexInterval + 1;
 362               $count < $termCount &&
 363               ( $this->_fieldsDicPositions[ $termFieldNum ] < $searchDicField ||
 364                ($this->_fieldsDicPositions[ $termFieldNum ] == $searchDicField &&
 365                 strcmp($termValue, $term->text) < 0) );
 366               $count++) {
 367              $termPrefixLength = $tisFile->readVInt();
 368              $termSuffix       = $tisFile->readString();
 369              $termFieldNum     = $tisFile->readVInt();
 370              $termValue        = substr( $termValue, 0, $termPrefixLength ) . $termSuffix;
 371  
 372              $docFreq      = $tisFile->readVInt();
 373              $freqPointer += $tisFile->readVInt();
 374              $proxPointer += $tisFile->readVInt();
 375              if( $docFreq >= $skipInterval ) {
 376                  $skipOffset = $tisFile->readVInt();
 377              } else {
 378                  $skipOffset = 0;
 379              }
 380          }
 381  
 382          if ($termFieldNum == $searchField && $termValue == $term->text) {
 383              return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
 384          } else {
 385              return null;
 386          }
 387      }
 388  
 389      /**
 390       * Returns normalization factor for specified documents
 391       *
 392       * @param integer $id
 393       * @param string $fieldName
 394       * @return string
 395       */
 396      public function norm($id, $fieldName)
 397      {
 398          $fieldNum = $this->getFieldNum($fieldName);
 399  
 400          if ( !($this->_fields[$fieldNum]->isIndexed) ) {
 401              return null;
 402          }
 403  
 404          if ( !isset( $this->_norms[$fieldNum] )) {
 405              $fFile = $this->openCompoundFile('.f' . $fieldNum);
 406              $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
 407          }
 408  
 409          return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
 410      }
 411  }
 412  


Généré le : Sun Feb 25 21:07:04 2007 par Balluche grâce à PHPXref 0.7