Initial commit
[moodle.git] / search / Zend / Search / Lucene / Index / Writer.php
1 <?php
2 /**
3  * Zend Framework
4  *
5  * LICENSE
6  *
7  * This source file is subject to the new BSD license that is bundled
8  * with this package in the file LICENSE.txt.
9  * It is also available through the world-wide-web at this URL:
10  * http://framework.zend.com/license/new-bsd
11  * If you did not receive a copy of the license and are unable to
12  * obtain it through the world-wide-web, please send an email
13  * to license@zend.com so we can send you a copy immediately.
14  *
15  * @category   Zend
16  * @package    Zend_Search_Lucene
17  * @subpackage Index
18  * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
20  */
23 /** Zend_Search_Lucene_Index_SegmentWriter */
24 require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
26 /** Zend_Search_Lucene_Index_SegmentInfo */
27 require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
30 /**
31  * @category   Zend
32  * @package    Zend_Search_Lucene
33  * @subpackage Index
34  * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
35  * @license    http://framework.zend.com/license/new-bsd     New BSD License
36  */
37 class Zend_Search_Lucene_Index_Writer
38 {
39     /**
40      * @todo Implement segment merger
41      * @todo Implement mergeFactor, minMergeDocs, maxMergeDocs usage.
42      * @todo Implement Analyzer substitution
43      * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
44      *       temporary index files
45      * @todo Directory lock processing
46      */
48     /**
49      * File system adapter.
50      *
51      * @var Zend_Search_Lucene_Storage_Directory
52      */
53     private $_directory = null;
56     /**
57      * Index version
58      * Counts how often the index has been changed by adding or deleting docs
59      *
60      * @var integer
61      */
62     private $_version;
64     /**
65      * Segment name counter.
66      * Used to name new segments .
67      *
68      * @var integer
69      */
70     private $_segmentNameCounter;
72     /**
73      * Number of the segments in the index
74      *
75      * @var inteher
76      */
77     private $_segments;
79     /**
80      * Determines how often segment indices
81      * are merged by addDocument().
82      *
83      * @var integer
84      */
85     public $mergeFactor;
87     /**
88      * Determines the minimal number of documents required before
89      * the buffered in-memory documents are merging and a new Segment
90      * is created.
91      *
92      * @var integer
93      */
94     public $minMergeDocs;
96     /**
97      * Determines the largest number of documents ever merged by addDocument().
98      *
99      * @var integer
100      */
101     public $maxMergeDocs;
103     /**
104      * List of the segments, created by index writer
105      * Array of Zend_Search_Lucene_Index_SegmentInfo objects
106      *
107      * @var array
108      */
109     private $_newSegments;
111     /**
112      * Current segment to add documents
113      *
114      * @var Zend_Search_Lucene_Index_SegmentWriter
115      */
116     private $_currentSegment;
118     /**
119      * List of indexfiles extensions
120      *
121      * @var array
122      */
123     private static $_indexExtensions = array('.cfs' => '.cfs',
124                                              '.fnm' => '.fnm',
125                                              '.fdx' => '.fdx',
126                                              '.fdt' => '.fdt',
127                                              '.tis' => '.tis',
128                                              '.tii' => '.tii',
129                                              '.frq' => '.frq',
130                                              '.prx' => '.prx',
131                                              '.tvx' => '.tvx',
132                                              '.tvd' => '.tvd',
133                                              '.tvf' => '.tvf',
134                                              '.del' => '.del'  );
136     /**
137      * Opens the index for writing
138      *
139      * IndexWriter constructor needs Directory as a parameter. It should be
140      * a string with a path to the index folder or a Directory object.
141      * Second constructor parameter create is optional - true to create the
142      * index or overwrite the existing one.
143      *
144      * @param Zend_Search_Lucene_Storage_Directory $directory
145      * @param boolean $create
146      */
147     public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $create = false)
148     {
149         $this->_directory = $directory;
151         if ($create) {
152             foreach ($this->_directory->fileList() as $file) {
153                 if ($file == 'deletable' ||
154                     $file == 'segments'  ||
155                     isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
156                     preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
157                         $this->_directory->deleteFile($file);
158                     }
159             }
160             $segmentsFile = $this->_directory->createFile('segments');
161             $segmentsFile->writeInt((int)0xFFFFFFFF);
162             // write version
163             $segmentsFile->writeLong(0);
164             // write name counter
165             $segmentsFile->writeInt(0);
166             // write segment counter
167             $segmentsFile->writeInt(0);
169             $deletableFile = $this->_directory->createFile('deletable');
170             // write counter
171             $deletableFile->writeInt(0);
173             $this->_version            = 0;
174             $this->_segmentNameCounter = 0;
175             $this->_segments           = 0;
176         } else {
177             $segmentsFile = $this->_directory->getFileObject('segments');
178             $format = $segmentsFile->readInt();
179             if ($format != (int)0xFFFFFFFF) {
180                 throw new Zend_Search_Lucene_Exception('Wrong segments file format');
181             }
183             // read version
184             $this->_version            = $segmentsFile->readLong();
185             // read counter
186             $this->_segmentNameCounter = $segmentsFile->readInt();
187             // read segment counter
188             $this->_segments           = $segmentsFile->readInt();
189         }
191         $this->_newSegments = array();
192         $this->_currentSegment = null;
193     }
195     /**
196      * Adds a document to this index.
197      *
198      * @param Zend_Search_Lucene_Document $document
199      */
200     public function addDocument(Zend_Search_Lucene_Document $document)
201     {
202         if ($this->_currentSegment === null) {
203             $this->_currentSegment =
204                 new Zend_Search_Lucene_Index_SegmentWriter($this->_directory, $this->_newSegmentName());
205         }
206         $this->_currentSegment->addDocument($document);
207         $this->_version++;
208     }
212     /**
213      * Update segments file by adding current segment to a list
214      * @todo !!!!!Finish the implementation
215      *
216      * @throws Zend_Search_Lucene_Exception
217      */
218     private function _updateSegments()
219     {
220         $segmentsFile   = $this->_directory->getFileObject('segments');
221         $newSegmentFile = $this->_directory->createFile('segments.new');
223         $newSegmentFile->writeInt((int)0xFFFFFFFF);
224         $newSegmentFile->writeLong($this->_version);
225         $newSegmentFile->writeInt($this->_segmentNameCounter);
227         $this->_segments += count($this->_newSegments);
228         $newSegmentFile->writeInt($this->_segments);
230         $segmentsFile->seek(20);
231         $newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20));
233         foreach ($this->_newSegments as $segmentName => $segmentInfo) {
234             $newSegmentFile->writeString($segmentName);
235             $newSegmentFile->writeInt($segmentInfo->count());
236         }
238         $this->_directory->renameFile('segments.new', 'segments');
239     }
242     /**
243      * Commit current changes
244      * returns array of new segments
245      *
246      * @return array
247      */
248     public function commit()
249     {
250         if ($this->_currentSegment !== null) {
251             $newSegment = $this->_currentSegment->close();
252             if ($newSegment !== null) {
253                 $this->_newSegments[$newSegment->getName()] = $newSegment;
254             }
255             $this->_currentSegment = null;
256         }
258         if (count($this->_newSegments) != 0) {
259             $this->_updateSegments();
260         }
262         $result = $this->_newSegments;
263         $this->_newSegments = array();
265         return $result;
266     }
269     /**
270      * Merges the provided indexes into this index.
271      *
272      * @param array $readers
273      * @return void
274      */
275     public function addIndexes($readers)
276     {
277         /**
278          * @todo implementation
279          */
280     }
283     /**
284      * Returns the number of documents currently in this index.
285      *
286      * @return integer
287      */
288     public function docCount($readers)
289     {
290         /**
291          * @todo implementation
292          */
293     }
296     /**
297      * Flushes all changes to an index and closes all associated files.
298      *
299      */
300     public function close()
301     {
302         /**
303          * @todo implementation
304          */
305     }
308     /**
309      * Merges all segments together into a single segment, optimizing
310      * an index for search.
311      *
312      * return void
313      */
314     public function optimize()
315     {
316         /**
317          * @todo implementation
318          */
319     }
321     /**
322      * Get name for new segment
323      *
324      * @return string
325      */
326     private function _newSegmentName()
327     {
328         return '_' . base_convert($this->_segmentNameCounter++, 10, 36);
329     }