Fixed couple bugs in query, and improved logic of querylib.
[moodle.git] / search / Zend / Search / Lucene / Storage / File.php
CommitLineData
682d4032 1<?php
2/**
3 * Zend Framework
4 *
5 * LICENSE
6 *
7 * This source file is subject to the new BSD license that is bundled
8 * with this package in the file LICENSE.txt.
9 * It is also available through the world-wide-web at this URL:
10 * http://framework.zend.com/license/new-bsd
11 * If you did not receive a copy of the license and are unable to
12 * obtain it through the world-wide-web, please send an email
13 * to license@zend.com so we can send you a copy immediately.
14 *
15 * @category Zend
16 * @package Zend_Search_Lucene
17 * @subpackage Storage
18 * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
19 * @license http://framework.zend.com/license/new-bsd New BSD License
20 */
21
22
0d46c846 23
24/** Zend_Search_Lucene_Exception */
25require_once 'Zend/Search/Lucene/Exception.php';
26
27
682d4032 28/**
29 * @category Zend
30 * @package Zend_Search_Lucene
31 * @subpackage Storage
32 * @copyright Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
33 * @license http://framework.zend.com/license/new-bsd New BSD License
34 */
35abstract class Zend_Search_Lucene_Storage_File
36{
37 /**
38 * Reads $length number of bytes at the current position in the
39 * file and advances the file pointer.
40 *
41 * @param integer $length
42 * @return string
43 */
44 abstract protected function _fread($length=1);
45
46
47 /**
48 * Sets the file position indicator and advances the file pointer.
49 * The new position, measured in bytes from the beginning of the file,
50 * is obtained by adding offset to the position specified by whence,
51 * whose values are defined as follows:
52 * SEEK_SET - Set position equal to offset bytes.
53 * SEEK_CUR - Set position to current location plus offset.
54 * SEEK_END - Set position to end-of-file plus offset. (To move to
55 * a position before the end-of-file, you need to pass a negative value
56 * in offset.)
57 * Upon success, returns 0; otherwise, returns -1
58 *
59 * @param integer $offset
60 * @param integer $whence
61 * @return integer
62 */
63 abstract public function seek($offset, $whence=SEEK_SET);
64
65 /**
66 * Get file position.
67 *
68 * @return integer
69 */
70 abstract public function tell();
71
72 /**
73 * Writes $length number of bytes (all, if $length===null) to the end
74 * of the file.
75 *
76 * @param string $data
77 * @param integer $length
78 */
79 abstract protected function _fwrite($data, $length=null);
80
81
82 /**
83 * Reads a byte from the current position in the file
84 * and advances the file pointer.
85 *
86 * @return integer
87 */
88 public function readByte()
89 {
90 return ord($this->_fread(1));
91 }
92
93 /**
94 * Writes a byte to the end of the file.
95 *
96 * @param integer $byte
97 */
98 public function writeByte($byte)
99 {
100 return $this->_fwrite(chr($byte), 1);
101 }
102
103 /**
104 * Read num bytes from the current position in the file
105 * and advances the file pointer.
106 *
107 * @param integer $num
108 * @return string
109 */
110 public function readBytes($num)
111 {
112 return $this->_fread($num);
113 }
114
115 /**
116 * Writes num bytes of data (all, if $num===null) to the end
117 * of the string.
118 *
119 * @param string $data
120 * @param integer $num
121 */
122 public function writeBytes($data, $num=null)
123 {
124 $this->_fwrite($data, $num);
125 }
126
127
128 /**
129 * Reads an integer from the current position in the file
130 * and advances the file pointer.
131 *
132 * @return integer
133 */
134 public function readInt()
135 {
136 $str = $this->_fread(4);
137
138 return ord($str{0}) << 24 |
139 ord($str{1}) << 16 |
140 ord($str{2}) << 8 |
141 ord($str{3});
142 }
143
144
145 /**
146 * Writes an integer to the end of file.
147 *
148 * @param integer $value
149 */
150 public function writeInt($value)
151 {
152 settype($value, 'integer');
153 $this->_fwrite( chr($value>>24 & 0xFF) .
154 chr($value>>16 & 0xFF) .
155 chr($value>>8 & 0xFF) .
156 chr($value & 0xFF), 4 );
157 }
158
159
160 /**
161 * Returns a long integer from the current position in the file
162 * and advances the file pointer.
163 *
164 * @return integer
0d46c846 165 * @throws Zend_Search_Lucene_Exception
682d4032 166 */
167 public function readLong()
168 {
169 $str = $this->_fread(8);
170
171 /**
0d46c846 172 * Check, that we work in 64-bit mode.
173 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
682d4032 174 */
0d46c846 175 if (PHP_INT_SIZE > 4) {
176 return ord($str{0}) << 56 |
177 ord($str{1}) << 48 |
178 ord($str{2}) << 40 |
179 ord($str{3}) << 32 |
180 ord($str{4}) << 24 |
181 ord($str{5}) << 16 |
182 ord($str{6}) << 8 |
183 ord($str{7});
184 } else {
185 if ((ord($str{0}) != 0) ||
186 (ord($str{1}) != 0) ||
187 (ord($str{2}) != 0) ||
188 (ord($str{3}) != 0) ||
189 ((ord($str{0}) & 0x80) != 0)) {
190 throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
191 }
192
193 return ord($str{4}) << 24 |
194 ord($str{5}) << 16 |
195 ord($str{6}) << 8 |
196 ord($str{7});
197 }
682d4032 198 }
199
200 /**
201 * Writes long integer to the end of file
202 *
203 * @param integer $value
0d46c846 204 * @throws Zend_Search_Lucene_Exception
682d4032 205 */
206 public function writeLong($value)
207 {
208 /**
0d46c846 209 * Check, that we work in 64-bit mode.
210 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
682d4032 211 */
0d46c846 212 if (PHP_INT_SIZE > 4) {
213 settype($value, 'integer');
214 $this->_fwrite( chr($value>>56 & 0xFF) .
215 chr($value>>48 & 0xFF) .
216 chr($value>>40 & 0xFF) .
217 chr($value>>32 & 0xFF) .
218 chr($value>>24 & 0xFF) .
219 chr($value>>16 & 0xFF) .
220 chr($value>>8 & 0xFF) .
221 chr($value & 0xFF), 8 );
222 } else {
223 if ($value > 0x7FFFFFFF) {
224 throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
225 }
226
227 $this->_fwrite( "\x00\x00\x00\x00" .
228 chr($value>>24 & 0xFF) .
229 chr($value>>16 & 0xFF) .
230 chr($value>>8 & 0xFF) .
231 chr($value & 0xFF), 8 );
232 }
682d4032 233 }
234
235
236
237 /**
238 * Returns a variable-length integer from the current
239 * position in the file and advances the file pointer.
240 *
241 * @return integer
242 */
243 public function readVInt()
244 {
245 $nextByte = ord($this->_fread(1));
246 $val = $nextByte & 0x7F;
247
248 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
249 $nextByte = ord($this->_fread(1));
250 $val |= ($nextByte & 0x7F) << $shift;
251 }
252 return $val;
253 }
254
255 /**
256 * Writes a variable-length integer to the end of file.
257 *
258 * @param integer $value
259 */
260 public function writeVInt($value)
261 {
262 settype($value, 'integer');
263 while ($value > 0x7F) {
264 $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
265 $value >>= 7;
266 }
267 $this->_fwrite(chr($value));
268 }
269
270
271 /**
272 * Reads a string from the current position in the file
273 * and advances the file pointer.
274 *
275 * @return string
276 */
277 public function readString()
278 {
279 $strlen = $this->readVInt();
280 if ($strlen == 0) {
281 return '';
282 } else {
283 /**
284 * This implementation supports only Basic Multilingual Plane
285 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
286 * "supplementary characters" (characters whose code points are
287 * greater than 0xFFFF)
288 * Java 2 represents these characters as a pair of char (16-bit)
289 * values, the first from the high-surrogates range (0xD800-0xDBFF),
290 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
291 * they are encoded as usual UTF-8 characters in six bytes.
292 * Standard UTF-8 representation uses four bytes for supplementary
293 * characters.
294 */
295
296 $str_val = $this->_fread($strlen);
297
298 for ($count = 0; $count < $strlen; $count++ ) {
299 if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
300 $addBytes = 1;
301 if (ord($str_val{$count}) & 0x20 ) {
302 $addBytes++;
303
304 // Never used. Java2 doesn't encode strings in four bytes
305 if (ord($str_val{$count}) & 0x10 ) {
306 $addBytes++;
307 }
308 }
309 $str_val .= $this->_fread($addBytes);
310 $strlen += $addBytes;
311
312 // Check for null character. Java2 encodes null character
313 // in two bytes.
314 if (ord($str_val{$count}) == 0xC0 &&
315 ord($str_val{$count+1}) == 0x80 ) {
316 $str_val{$count} = 0;
317 $str_val = substr($str_val,0,$count+1)
318 . substr($str_val,$count+2);
319 }
320 $count += $addBytes;
321 }
322 }
323
324 return $str_val;
325 }
326 }
327
328 /**
329 * Writes a string to the end of file.
330 *
331 * @param string $str
332 * @throws Zend_Search_Lucene_Exception
333 */
334 public function writeString($str)
335 {
336 /**
337 * This implementation supports only Basic Multilingual Plane
338 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
339 * "supplementary characters" (characters whose code points are
340 * greater than 0xFFFF)
341 * Java 2 represents these characters as a pair of char (16-bit)
342 * values, the first from the high-surrogates range (0xD800-0xDBFF),
343 * the second from the low-surrogates range (0xDC00-0xDFFF). Then
344 * they are encoded as usual UTF-8 characters in six bytes.
345 * Standard UTF-8 representation uses four bytes for supplementary
346 * characters.
347 */
348
349 // convert input to a string before iterating string characters
350 settype($str, 'string');
351
352 $chars = $strlen = strlen($str);
353 $containNullChars = false;
354
355 for ($count = 0; $count < $strlen; $count++ ) {
356 /**
357 * String is already in Java 2 representation.
358 * We should only calculate actual string length and replace
359 * \x00 by \xC0\x80
360 */
361 if ((ord($str{$count}) & 0xC0) == 0xC0) {
362 $addBytes = 1;
363 if (ord($str{$count}) & 0x20 ) {
364 $addBytes++;
365
366 // Never used. Java2 doesn't encode strings in four bytes
367 // and we dont't support non-BMP characters
368 if (ord($str{$count}) & 0x10 ) {
369 $addBytes++;
370 }
371 }
372 $chars -= $addBytes;
373
374 if (ord($str{$count}) == 0 ) {
375 $containNullChars = true;
376 }
377 $count += $addBytes;
378 }
379 }
380
381 if ($chars < 0) {
382 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
383 }
384
385 $this->writeVInt($chars);
386 if ($containNullChars) {
387 $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
388 } else {
389 $this->_fwrite($str);
390 }
391 }
392
393
394 /**
395 * Reads binary data from the current position in the file
396 * and advances the file pointer.
397 *
398 * @return string
399 */
400 public function readBinary()
401 {
402 return $this->_fread($this->readVInt());
403 }
404}