Initial commit
authormchampan <mchampan>
Sun, 25 Jun 2006 23:07:36 +0000 (23:07 +0000)
committermchampan <mchampan>
Sun, 25 Jun 2006 23:07:36 +0000 (23:07 +0000)
55 files changed:
blocks/search/block_search.php [new file with mode: 0644]
blocks/search/config_global.html [new file with mode: 0644]
mod/wiki/lib.php
search/README.txt [new file with mode: 0644]
search/Zend/Exception.php [new file with mode: 0755]
search/Zend/IMPORTANT.txt [new file with mode: 0644]
search/Zend/LICENSE.txt [new file with mode: 0644]
search/Zend/Search/Exception.php [new file with mode: 0644]
search/Zend/Search/Lucene.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/Analyzer.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/Analyzer/Common.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/Token.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/TokenFilter.php [new file with mode: 0644]
search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php [new file with mode: 0644]
search/Zend/Search/Lucene/Document.php [new file with mode: 0644]
search/Zend/Search/Lucene/EncodingConverter.php [new file with mode: 0644]
search/Zend/Search/Lucene/Exception.php [new file with mode: 0644]
search/Zend/Search/Lucene/Field.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/FieldInfo.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/SegmentInfo.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/SegmentWriter.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/Term.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/TermInfo.php [new file with mode: 0644]
search/Zend/Search/Lucene/Index/Writer.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query/MultiTerm.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query/Phrase.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Query/Term.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryHit.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryParser.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryToken.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/QueryTokenizer.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Similarity.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Similarity/Default.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight/MultiTerm.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight/Phrase.php [new file with mode: 0644]
search/Zend/Search/Lucene/Search/Weight/Term.php [new file with mode: 0644]
search/Zend/Search/Lucene/Storage/Directory.php [new file with mode: 0644]
search/Zend/Search/Lucene/Storage/Directory/Filesystem.php [new file with mode: 0644]
search/Zend/Search/Lucene/Storage/File.php [new file with mode: 0644]
search/Zend/Search/Lucene/Storage/File/Filesystem.php [new file with mode: 0644]
search/Zend/Search/TODO.txt [new file with mode: 0644]
search/db/mysql.sql [new file with mode: 0644]
search/db/postgres7.sql [new file with mode: 0644]
search/documents/document.php [new file with mode: 0644]
search/documents/wiki_document.php [new file with mode: 0644]
search/index.php [new file with mode: 0644]
search/indexer.php [new file with mode: 0644]
search/indexersplash.php [new file with mode: 0644]
search/lib.php [new file with mode: 0644]
search/query.php [new file with mode: 0644]
search/stats.php [new file with mode: 0644]

diff --git a/blocks/search/block_search.php b/blocks/search/block_search.php
new file mode 100644 (file)
index 0000000..3cafd10
--- /dev/null
@@ -0,0 +1,70 @@
+<?php
+  
+  /* This is the global search shortcut block - a single query can be entered, and
+     the user will be redirected to the query page where they can enter more
+     advanced queries, and view the results of their search. When searching from
+     this block, the broadest possible selection of documents is searched.
+     
+     Author:  Michael Champanis (mchampan)
+     Date:    2006 06 23
+  
+     Todo: make strings -> get_string()  
+  */
+     
+  class block_search extends block_base {
+    
+    function init() {
+      $this->title = "Global Search"; //get_string()
+      $this->version = 20060625;
+    } //init  
+    
+    // only one instance of this block is required
+    function instance_allow_multiple() {
+      return false;
+    } //instance_allow_multiple
+    
+    // label and button values can be set in admin
+    function has_config() {
+      return true;
+    } //has_config
+      
+    function get_content() {
+      global $CFG;
+      
+      //cache block contents
+      if ($this->content !== NULL) {
+        return $this->content;
+      } //if
+      
+      $this->content = new stdClass;
+      
+      //lazy check for the moment
+      if (check_php_version("5.0.0")) {        
+        //fetch values if defined in admin, otherwise use defaults
+        $label  = (isset($CFG->block_search_text)) ? $CFG->block_search_text : "Search Moodle";
+        $button = (isset($CFG->block_search_button)) ? $CFG->block_search_button : "Go";
+        
+        //basic search form
+        $this->content->text =
+            '<form name="query" method="post" action="search/query.php">'
+          . "<label for=''>$label</label>"
+          . '<input type="text" name="query_string" length="50" value=""/>'
+          . '<input type="submit" value="'.$button.'"/>'
+          . '</form>';
+      } else {
+        $this->content->text = "Sorry folks, PHP 5 is needed for the new search module.";
+      } //else
+        
+      //no footer, thanks
+      $this->content->footer = '';
+      
+      return $this->content;      
+    } //get_content
+    
+    function specialisation() {
+      //empty!
+    } //specialisation
+      
+  } //block_search
+
+?>
\ No newline at end of file
diff --git a/blocks/search/config_global.html b/blocks/search/config_global.html
new file mode 100644 (file)
index 0000000..63e6a1b
--- /dev/null
@@ -0,0 +1,19 @@
+<div style="text-align:center;">
+  <label for="block_search_text">Search label</label>
+  <input type="text" name="block_search_text" value="<?php 
+    if(isset($CFG->block_search_text)) {
+        p($CFG->block_search_text);
+    } else {
+        p("Search Moodle");
+    } ?>"/><br>
+  
+  <label for="block_search_button">Button label</label>
+  <input type="text" name="block_search_button" value="<?php 
+    if(isset($CFG->block_search_button)) {
+        p($CFG->block_search_button);
+    } else {
+        p("Go");
+    } ?>"/><br><br>
+  
+  <input type="submit" value="<?php print_string('savechanges'); ?>" />
+</div>
\ No newline at end of file
index 54fe22d..487847c 100644 (file)
@@ -352,6 +352,126 @@ function wiki_get_entries(&$wiki, $byindex=NULL) {
     }
 }
 
+
+/*==== Global search modifications
+ * Author: Michael Champanis (mchampan)
+ * Last date: 2006 06 25
+ * These modifications allow wiki documents to be indexed in the new
+ * search engine module - they are probably not final, and as such
+ * shouldn't be used by other stuff for the time being
+ **/
+
+//rescued and converted from ewikimoodlelib.php
+//retrieves latest version of a page
+function wiki_get_latest_page(&$entry, $pagename, $version=0) {
+  global $CFG;
+   
+  //need something like this in datalib.php?
+  switch ($CFG->dbtype) {
+    case 'mysql':
+      $f = 'mysql_real_escape_string';
+      break;
+    case 'postgres7':
+      $f = 'pg_escape_string';
+      break;
+    default:
+      $f = 'addslashes';
+  } //switch
+  
+  $pagename = "'".$f($pagename)."'";
+  
+  if ($version > 0 and is_int($version)) {
+    $version = "AND (version=$version)";
+  } else {
+    $version = '';
+  } //else
+
+  $select = "(pagename=$pagename) AND wiki=".$entry->id." $version ";
+  $sort   = 'version DESC';
+  
+  //change this to recordset_select, as per http://docs.moodle.org/en/Datalib_Notes
+  if ($result_arr = get_records_select('wiki_pages', $select, $sort, '*', 0, 1)) {    
+    foreach ($result_arr as $obj) {
+      $result_obj = $obj;                 
+    } //foreach
+  } //if
+    
+  if (isset($result_obj))  {
+    $result_obj->meta = @unserialize($result_obj->meta);
+    return $result_obj;
+  } else {
+    return false;
+  } //else
+} //wiki_get_latest_page
+
+//fetches all pages, including old versions
+function wiki_get_pages(&$entry) {   
+  return get_records('wiki_pages', 'wiki', $entry->id);
+} //wiki_get_pages
+
+//fetches all the latest versions of all the pages
+function wiki_get_latest_pages(&$entry) {
+  //== (My)SQL for this
+  /* select * from wiki_pages
+     inner join
+    (select wiki_pages.pagename, max(wiki_pages.version) as ver
+    from wiki_pages group by pagename) as a
+    on ((wiki_pages.version = a.ver) and
+    (wiki_pages.pagename like a.pagename)) */
+  
+  $pages = array();    
+  
+  //http://moodle.org/bugs/bug.php?op=show&bugid=5877&pos=0
+  //if ($ids = get_records('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) { 
+  if ($rs = get_recordset('wiki_pages', 'wiki', $entry->id, '', 'distinct pagename')) {
+    $ids = $rs->GetRows();
+  //--    
+    foreach ($ids as $id) {      
+      $pages[] = wiki_get_latest_page($entry, $id[0]);
+    } //foreach
+  } else {
+    return false;
+  } //else  
+    
+  return $pages;   
+} //wiki_get_latest_pages
+
+function wiki_iterator() {
+  return get_all_instances_in_courses("wiki", get_courses());  
+} //wiki_search_index
+
+function wiki_get_content_for_index(&$wiki) {
+  $documents = array();
+  
+  $entries = wiki_get_entries($wiki);    
+  foreach($entries as $entry) {
+    //all pages
+    //$pages = wiki_get_pages($entry);
+    
+    //latest pages
+    $pages = wiki_get_latest_pages($entry);
+    $i = 0;
+    
+    if (is_array($pages)) {
+      foreach($pages as $page) {
+        if (strlen($page->content) > 0) {
+          $i++;
+          $documents[] = new WikiSearchDocument($page, $entry->wikiid, $entry->course, $entry->userid, $entry->groupid);
+        } //if
+      } //foreach
+      
+      //print "$entry->id : $i"; print "<br>";
+    } else {
+      print $pages;
+    } //else
+  } //foreach
+  
+  return $documents;
+} //wiki_get_content_for_index
+
+/*==== Global search modifications end */
+
+
 function wiki_get_default_entry(&$wiki, &$course, $userid=0, $groupid=0) {
 /// Returns the wiki entry according to the wiki type.
 /// Optionally, will return wiki entry for $userid student wiki, or
diff --git a/search/README.txt b/search/README.txt
new file mode 100644 (file)
index 0000000..c3d4ab1
--- /dev/null
@@ -0,0 +1,22 @@
+This is the initial release (prototype) of Moodle's new search module -
+so basically watch out for sharp edges.
+
+The structure has not been finalised, but this is what is working at the
+moment, when I start looking at other content to index, it will most likely
+change. I don't recommend trying to make your own content modules indexable,
+at least not until the whole flow is finalised. I will be implementing the
+functions needed to index all of the default content modules on Moodle, so
+expect that around mid-August.
+
+Wiki pages were my goal for this release, they can be indexed and searched,
+but not updated or deleted at this stage (was waiting for ZF 0.14 actually).
+
+I need to check the PostgreSQL sql file, I don't have a PG7 install lying
+around to test on, so the script is untested.
+
+To index for the first time, login as an admin user and browse to /search/index.php
+or /search/stats.php - there will be a message and a link telling you to go index.
+
+-- Michael Champanis (mchampan)
+   cynnical@gmail.com
+   Summer of Code 2006
\ No newline at end of file
diff --git a/search/Zend/Exception.php b/search/Zend/Exception.php
new file mode 100755 (executable)
index 0000000..c47fffb
--- /dev/null
@@ -0,0 +1,30 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Exception extends Exception
+{}
+
diff --git a/search/Zend/IMPORTANT.txt b/search/Zend/IMPORTANT.txt
new file mode 100644 (file)
index 0000000..56ecab0
--- /dev/null
@@ -0,0 +1,15 @@
+We are running cutting-edge (i.e. HEAD) Zend Framework:
+  URL: http://framework.zend.com/svn/framework/trunk
+  Revision: 696
+  Last Changed Rev: 696
+  Last Changed Date: 2006-06-23 02:14:54 +0200 (Fri, 23 Jun 2006)
+
+This Zend Framework present in this directory only contains the minimum
+to run Zend_Search_Lucene - I don't foresee any problems, since the license
+is new BSD...
+
+To obtain a full Zend Framework package, please visit:
+  http://framework.zend.com/
+
+Or alternatively check it out from SVN:
+  svn checkout http://framework.zend.com/svn/framework/trunk
\ No newline at end of file
diff --git a/search/Zend/LICENSE.txt b/search/Zend/LICENSE.txt
new file mode 100644 (file)
index 0000000..60f72fe
--- /dev/null
@@ -0,0 +1,27 @@
+Copyright (c) 2006, Zend Technologies USA, Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+
+    * Neither the name of Zend Technologies USA, Inc. nor the names of its
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff --git a/search/Zend/Search/Exception.php b/search/Zend/Search/Exception.php
new file mode 100644 (file)
index 0000000..a111cf6
--- /dev/null
@@ -0,0 +1,36 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Framework base exception
+ */
+require_once 'Zend/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Exception extends Zend_Exception
+{}
+
diff --git a/search/Zend/Search/Lucene.php b/search/Zend/Search/Lucene.php
new file mode 100644 (file)
index 0000000..3e33b7c
--- /dev/null
@@ -0,0 +1,614 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Document */
+require_once 'Zend/Search/Lucene/Document.php';
+
+/** Zend_Search_Lucene_Storage_Directory */
+require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
+
+/** Zend_Search_Lucene_Index_Term */
+require_once 'Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Index_TermInfo */
+require_once 'Zend/Search/Lucene/Index/TermInfo.php';
+
+/** Zend_Search_Lucene_Index_SegmentInfo */
+require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
+
+/** Zend_Search_Lucene_Index_FieldInfo */
+require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
+
+/** Zend_Search_Lucene_Index_Writer */
+require_once 'Zend/Search/Lucene/Index/Writer.php';
+
+/** Zend_Search_Lucene_Search_QueryParser */
+require_once 'Zend/Search/Lucene/Search/QueryParser.php';
+
+/** Zend_Search_Lucene_Search_QueryHit */
+require_once 'Zend/Search/Lucene/Search/QueryHit.php';
+
+/** Zend_Search_Lucene_Search_Similarity */
+require_once 'Zend/Search/Lucene/Search/Similarity.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene
+{
+    /**
+     * File system adapter.
+     *
+     * @var Zend_Search_Lucene_Storage_Directory
+     */
+    private $_directory = null;
+
+    /**
+     * File system adapter closing option
+     *
+     * @var boolean
+     */
+    private $_closeDirOnExit = true;
+
+    /**
+     * Writer for this index, not instantiated unless required.
+     *
+     * @var Zend_Search_Lucene_Index_Writer
+     */
+    private $_writer = null;
+
+    /**
+     * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
+     *
+     * @var array Zend_Search_Lucene_Index_SegmentInfo
+     */
+    private $_segmentInfos = array();
+
+    /**
+     * Number of documents in this index.
+     *
+     * @var integer
+     */
+    private $_docCount = 0;
+
+    /**
+     * Flag for index changes
+     *
+     * @var boolean
+     */
+    private $_hasChanges = false;
+
+    /**
+     * Opens the index.
+     *
+     * IndexReader constructor needs Directory as a parameter. It should be
+     * a string with a path to the index folder or a Directory object.
+     *
+     * @param mixed $directory
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function __construct($directory = null, $create = false)
+    {
+        if ($directory === null) {
+            throw new Zend_Search_Exception('No index directory specified');
+        }
+
+        if ($directory instanceof Zend_Search_Lucene_Storage_Directory_Filesystem) {
+            $this->_directory      = $directory;
+            $this->_closeDirOnExit = false;
+        } else {
+            $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
+            $this->_closeDirOnExit = true;
+        }
+
+        if ($create) {
+            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, true);
+        } else {
+            $this->_writer = null;
+        }
+
+        $this->_segmentInfos = array();
+
+        $segmentsFile = $this->_directory->getFileObject('segments');
+
+        $format = $segmentsFile->readInt();
+
+        if ($format != (int)0xFFFFFFFF) {
+            throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+        }
+
+        // read version
+        $segmentsFile->readLong();
+
+        // read counter
+        $segmentsFile->readInt();
+
+        $segments = $segmentsFile->readInt();
+
+        $this->_docCount = 0;
+
+        // read segmentInfos
+        for ($count = 0; $count < $segments; $count++) {
+            $segName = $segmentsFile->readString();
+            $segSize = $segmentsFile->readInt();
+            $this->_docCount += $segSize;
+
+            $this->_segmentInfos[$count] =
+                                new Zend_Search_Lucene_Index_SegmentInfo($segName,
+                                                                         $segSize,
+                                                                         $this->_directory);
+        }
+    }
+
+
+    /**
+     * Object destructor
+     */
+    public function __destruct()
+    {
+        $this->commit();
+
+        if ($this->_closeDirOnExit) {
+            $this->_directory->close();
+        }
+    }
+
+    /**
+     * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
+     *
+     * @return Zend_Search_Lucene_Index_Writer
+     */
+    public function getIndexWriter()
+    {
+        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
+            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
+        }
+
+        return $this->_writer;
+    }
+
+
+    /**
+     * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
+     *
+     * @return Zend_Search_Lucene_Storage_Directory
+     */
+    public function getDirectory()
+    {
+        return $this->_directory;
+    }
+
+
+    /**
+     * Returns the total number of documents in this index.
+     *
+     * @return integer
+     */
+    public function count()
+    {
+        return $this->_docCount;
+    }
+
+
+    /**
+     * Performs a query against the index and returns an array
+     * of Zend_Search_Lucene_Search_QueryHit objects.
+     * Input is a string or Zend_Search_Lucene_Search_Query.
+     *
+     * @param mixed $query
+     * @return array ZSearchHit
+     */
+    public function find($query)
+    {
+        if (is_string($query)) {
+            $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
+        }
+
+        if (!$query instanceof Zend_Search_Lucene_Search_Query) {
+            throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
+        }
+
+        $this->commit();
+
+        $hits = array();
+        $scores = array();
+
+        $docNum = $this->count();
+        for( $count=0; $count < $docNum; $count++ ) {
+            $docScore = $query->score( $count, $this);
+            if( $docScore != 0 ) {
+                $hit = new Zend_Search_Lucene_Search_QueryHit($this);
+                $hit->id = $count;
+                $hit->score = $docScore;
+
+                $hits[] = $hit;
+                $scores[] = $docScore;
+            }
+        }
+        array_multisort($scores, SORT_DESC, SORT_REGULAR, $hits);
+
+        return $hits;
+    }
+
+
+    /**
+     * Returns a list of all unique field names that exist in this index.
+     *
+     * @param boolean $indexed
+     * @return array
+     */
+    public function getFieldNames($indexed = false)
+    {
+        $result = array();
+        foreach( $this->_segmentInfos as $segmentInfo ) {
+            $result = array_merge($result, $segmentInfo->getFields($indexed));
+        }
+        return $result;
+    }
+
+
+    /**
+     * Returns a Zend_Search_Lucene_Document object for the document
+     * number $id in this index.
+     *
+     * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+     * @return Zend_Search_Lucene_Document
+     */
+    public function getDocument($id)
+    {
+        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+            /* @var $id Zend_Search_Lucene_Search_QueryHit */
+            $id = $id->id;
+        }
+
+        if ($id >= $this->_docCount) {
+            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+        }
+
+        $segCount = 0;
+        $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
+        while( $nextSegmentStartId <= $id ) {
+               $segCount++;
+               $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
+        }
+        $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
+
+        $fdxFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdx');
+        $fdxFile->seek( ($id-$segmentStartId)*8, SEEK_CUR );
+        $fieldValuesPosition = $fdxFile->readLong();
+
+        $fdtFile = $this->_segmentInfos[ $segCount ]->openCompoundFile('.fdt');
+        $fdtFile->seek( $fieldValuesPosition, SEEK_CUR );
+        $fieldCount = $fdtFile->readVInt();
+
+        $doc = new Zend_Search_Lucene_Document();
+        for( $count = 0; $count < $fieldCount; $count++ ) {
+            $fieldNum = $fdtFile->readVInt();
+            $bits = $fdtFile->readByte();
+
+            $fieldInfo = $this->_segmentInfos[ $segCount ]->getField($fieldNum);
+
+            if( !($bits & 2) ) { // Text data
+                $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+                                                      $fdtFile->readString(),
+                                                      true,
+                                                      $fieldInfo->isIndexed,
+                                                      $bits & 1 );
+            } else {
+                $field = new Zend_Search_Lucene_Field($fieldInfo->name,
+                                                      $fdtFile->readBinary(),
+                                                      true,
+                                                      $fieldInfo->isIndexed,
+                                                      $bits & 1 );
+            }
+
+            $doc->addField($field);
+        }
+
+        return $doc;
+    }
+
+
+    /**
+     * Returns an array of all the documents which contain term.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return array
+     */
+    public function termDocs(Zend_Search_Lucene_Index_Term $term)
+    {
+        $result = array();
+        $segmentStartDocId = 0;
+
+        foreach ($this->_segmentInfos as $segInfo) {
+            $termInfo = $segInfo->getTermInfo($term);
+
+            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
+                $segmentStartDocId += $segInfo->count();
+                continue;
+            }
+
+            $frqFile = $segInfo->openCompoundFile('.frq');
+            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
+            $docId = 0;
+            for( $count=0; $count < $termInfo->docFreq; $count++ ) {
+                $docDelta = $frqFile->readVInt();
+                if( $docDelta % 2 == 1 ) {
+                    $docId += ($docDelta-1)/2;
+                } else {
+                    $docId += $docDelta/2;
+                    // read freq
+                    $frqFile->readVInt();
+                }
+
+                $result[] = $segmentStartDocId + $docId;
+            }
+
+            $segmentStartDocId += $segInfo->count();
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Returns an array of all term positions in the documents.
+     * Return array structure: array( docId => array( pos1, pos2, ...), ...)
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return array
+     */
+    public function termPositions(Zend_Search_Lucene_Index_Term $term)
+    {
+        $result = array();
+        $segmentStartDocId = 0;
+        foreach( $this->_segmentInfos as $segInfo ) {
+            $termInfo = $segInfo->getTermInfo($term);
+
+            if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
+                $segmentStartDocId += $segInfo->count();
+                continue;
+            }
+
+            $frqFile = $segInfo->openCompoundFile('.frq');
+            $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
+            $freqs = array();
+            $docId = 0;
+
+            for( $count = 0; $count < $termInfo->docFreq; $count++ ) {
+                $docDelta = $frqFile->readVInt();
+                if( $docDelta % 2 == 1 ) {
+                    $docId += ($docDelta-1)/2;
+                    $freqs[ $docId ] = 1;
+                } else {
+                    $docId += $docDelta/2;
+                    $freqs[ $docId ] = $frqFile->readVInt();
+                }
+            }
+
+            $prxFile = $segInfo->openCompoundFile('.prx');
+            $prxFile->seek($termInfo->proxPointer,SEEK_CUR);
+            foreach ($freqs as $docId => $freq) {
+                $termPosition = 0;
+                $positions = array();
+
+                for ($count = 0; $count < $freq; $count++ ) {
+                    $termPosition += $prxFile->readVInt();
+                    $positions[] = $termPosition;
+                }
+
+                $result[ $segmentStartDocId + $docId ] = $positions;
+            }
+
+            $segmentStartDocId += $segInfo->count();
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Returns the number of documents in this index containing the $term.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return integer
+     */
+    public function docFreq(Zend_Search_Lucene_Index_Term $term)
+    {
+        $result = 0;
+        foreach ($this->_segmentInfos as $segInfo) {
+            $termInfo = $segInfo->getTermInfo($term);
+            if ($termInfo !== null) {
+                $result += $termInfo->docFreq;
+            }
+        }
+
+        return $result;
+    }
+
+
+    /**
+     * Retrive similarity used by index reader
+     *
+     * @return Zend_Search_Lucene_Search_Similarity
+     */
+    public function getSimilarity()
+    {
+        return Zend_Search_Lucene_Search_Similarity::getDefault();
+    }
+
+
+    /**
+     * Returns a normalization factor for "field, document" pair.
+     *
+     * @param integer $id
+     * @param string $fieldName
+     * @return Zend_Search_Lucene_Document
+     */
+    public function norm( $id, $fieldName )
+    {
+        if ($id >= $this->_docCount) {
+            return null;
+        }
+
+        $segmentStartId = 0;
+        foreach ($this->_segmentInfos as $segInfo) {
+            if ($segmentStartId + $segInfo->count() > $id) {
+                break;
+            }
+
+            $segmentStartId += $segInfo->count();
+        }
+
+        if ($segInfo->isDeleted($id - $segmentStartId)) {
+            return 0;
+        }
+
+        return $segInfo->norm($id - $segmentStartId, $fieldName);
+    }
+
+    /**
+     * Returns true if any documents have been deleted from this index.
+     *
+     * @return boolean
+     */
+    public function hasDeletions()
+    {
+        foreach ($this->_segmentInfos as $segmentInfo) {
+            if ($segmentInfo->hasDeletions()) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+
+    /**
+     * Deletes a document from the index.
+     * $id is an internal document id
+     *
+     * @param integer|Zend_Search_Lucene_Search_QueryHit $id
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function delete($id)
+    {
+        if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
+            /* @var $id Zend_Search_Lucene_Search_QueryHit */
+            $id = $id->id;
+        }
+
+        if ($id >= $this->_docCount) {
+            throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
+        }
+
+        $segCount = 0;
+        $nextSegmentStartId = $this->_segmentInfos[ 0 ]->count();
+        while( $nextSegmentStartId <= $id ) {
+               $segCount++;
+               $nextSegmentStartId += $this->_segmentInfos[ $segCount ]->count();
+        }
+
+        $this->_hasChanges = true;
+        $segmentStartId = $nextSegmentStartId - $this->_segmentInfos[ $segCount ]->count();
+        $this->_segmentInfos[ $segCount ]->delete($id - $segmentStartId);
+    }
+
+
+
+    /**
+     * Adds a document to this index.
+     *
+     * @param Zend_Search_Lucene_Document $document
+     */
+    public function addDocument(Zend_Search_Lucene_Document $document)
+    {
+        if (!$this->_writer instanceof Zend_Search_Lucene_Index_Writer) {
+            $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory);
+        }
+
+        $this->_writer->addDocument($document);
+    }
+
+
+    /**
+     * Commit changes resulting from delete() or undeleteAll() operations.
+     *
+     * @todo delete() and undeleteAll processing.
+     */
+    public function commit()
+    {
+        if ($this->_hasChanges) {
+            foreach ($this->_segmentInfos as $segInfo) {
+                $segInfo->writeChanges();
+            }
+
+            $this->_hasChanges = false;
+        }
+
+        if ($this->_writer !== null) {
+            foreach ($this->_writer->commit() as $segmentName => $segmentInfo) {
+                if ($segmentInfo !== null) {
+                    $this->_segmentInfos[] = $segmentInfo;
+                    $this->_docCount += $segmentInfo->count();
+                } else {
+                    foreach ($this->_segmentInfos as $segId => $segInfo) {
+                        if ($segInfo->getName() == $segmentName) {
+                            unset($this->_segmentInfos[$segId]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+
+    /*************************************************************************
+    @todo UNIMPLEMENTED
+    *************************************************************************/
+
+    /**
+     * Returns an array of all terms in this index.
+     *
+     * @todo Implementation
+     * @return array
+     */
+    public function terms()
+    {
+        return array();
+    }
+
+
+    /**
+     * Undeletes all documents currently marked as deleted in this index.
+     *
+     * @todo Implementation
+     */
+    public function undeleteAll()
+    {}
+}
\ No newline at end of file
diff --git a/search/Zend/Search/Lucene/Analysis/Analyzer.php b/search/Zend/Search/Lucene/Analysis/Analyzer.php
new file mode 100644 (file)
index 0000000..febf88e
--- /dev/null
@@ -0,0 +1,96 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Token */
+require_once 'Zend/Search/Lucene/Analysis/Token.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php';
+
+
+
+/**
+ * An Analyzer is used to analyze text.
+ * It thus represents a policy for extracting index terms from text.
+ *
+ * Note:
+ * Lucene Java implementation is oriented to streams. It provides effective work
+ * with a huge documents (more then 20Mb).
+ * But engine itself is not oriented such documents.
+ * Thus Zend_Search_Lucene analysis API works with data strings and sets (arrays).
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+abstract class Zend_Search_Lucene_Analysis_Analyzer
+{
+    /**
+     * The Analyzer implementation used by default.
+     *
+     * @var Zend_Search_Lucene_Analysis_Analyzer
+     */
+    static private $_defaultImpl;
+
+    /**
+     * Tokenize text to a terms
+     * Returns array of Zend_Search_Lucene_Analysis_Token objects
+     *
+     * @param string $data
+     * @return array
+     */
+    abstract public function tokenize($data);
+
+
+    /**
+     * Set the default Analyzer implementation used by indexing code.
+     *
+     * @param Zend_Search_Lucene_Analysis_Analyzer $similarity
+     */
+    static public function setDefault(Zend_Search_Lucene_Analysis_Analyzer $analyzer)
+    {
+        self::$_defaultImpl = $analyzer;
+    }
+
+
+    /**
+     * Return the default Analyzer implementation used by indexing code.
+     *
+     * @return Zend_Search_Lucene_Analysis_Analyzer
+     */
+    static public function getDefault()
+    {
+        if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Analysis_Analyzer) {
+            self::$_defaultImpl = new Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive();
+        }
+
+        return self::$_defaultImpl;
+    }
+
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php b/search/Zend/Search/Lucene/Analysis/Analyzer/Common.php
new file mode 100644 (file)
index 0000000..2ad8a05
--- /dev/null
@@ -0,0 +1,75 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
+
+
+/**
+ * Common implementation of the Zend_Search_Lucene_Analysis_Analyzer interface.
+ * There are several standard standard subclasses provided by Zend_Search_Lucene/Analysis
+ * subpackage: Zend_Search_Lucene_Analysis_Analyzer_Common_Text, ZSearchHTMLAnalyzer, ZSearchXMLAnalyzer.
+ *
+ * @todo ZSearchHTMLAnalyzer and ZSearchXMLAnalyzer implementation
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Analysis_Analyzer_Common extends Zend_Search_Lucene_Analysis_Analyzer
+{
+    /**
+     * The set of Token filters applied to the Token stream.
+     * Array of Zend_Search_Lucene_Analysis_TokenFilter objects.
+     *
+     * @var array
+     */
+    private $_filters = array();
+
+    /**
+     * Add Token filter to the Analyzer
+     *
+     * @param Zend_Search_Lucene_Analysis_TokenFilter $filter
+     */
+    public function addFilter(Zend_Search_Lucene_Analysis_TokenFilter $filter)
+    {
+        $this->_filters[] = $filter;
+    }
+
+    /**
+     * Apply filters to the token.
+     *
+     * @param Zend_Search_Lucene_Analysis_Token $token
+     * @return Zend_Search_Lucene_Analysis_Token
+     */
+    public function normalize(Zend_Search_Lucene_Analysis_Token $token)
+    {
+        foreach ($this->_filters as $filter) {
+            $token = $filter->normalize($token);
+        }
+
+        return $token;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php b/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php
new file mode 100644 (file)
index 0000000..6f6f0dd
--- /dev/null
@@ -0,0 +1,78 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common
+{
+    /**
+     * Tokenize text to a terms
+     * Returns array of Zend_Search_Lucene_Analysis_Token objects
+     *
+     * @param string $data
+     * @return array
+     */
+    public function tokenize($data)
+    {
+        $tokenStream = array();
+
+        $position = 0;
+        while ($position < strlen($data)) {
+            // skip white space
+            while ($position < strlen($data) && !ctype_alpha( $data{$position} )) {
+                $position++;
+            }
+
+            $termStartPosition = $position;
+
+            // read token
+            while ($position < strlen($data) && ctype_alpha( $data{$position} )) {
+                $position++;
+            }
+
+            // Empty token, end of stream.
+            if ($position == $termStartPosition) {
+                break;
+            }
+
+            $token = new Zend_Search_Lucene_Analysis_Token(substr($data,
+                                             $termStartPosition,
+                                             $position-$termStartPosition),
+                                      $termStartPosition,
+                                      $position);
+            $tokenStream[] = $this->normalize($token);
+        }
+
+        return $tokenStream;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php b/search/Zend/Search/Lucene/Analysis/Analyzer/Common/Text/CaseInsensitive.php
new file mode 100644 (file)
index 0000000..e5fc372
--- /dev/null
@@ -0,0 +1,46 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Analyzer_Common_Text */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php';
+
+/** Zend_Search_Lucene_Analysis_TokenFilter_LowerCase */
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+class Zend_Search_Lucene_Analysis_Analyzer_Common_Text_CaseInsensitive extends Zend_Search_Lucene_Analysis_Analyzer_Common_Text
+{
+    public function __construct()
+    {
+        $this->addFilter(new Zend_Search_Lucene_Analysis_TokenFilter_LowerCase());
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/Token.php b/search/Zend/Search/Lucene/Analysis/Token.php
new file mode 100644 (file)
index 0000000..f2e9ee7
--- /dev/null
@@ -0,0 +1,171 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Analysis_Token
+{
+    /**
+     * The text of the term.
+     *
+     * @var string
+     */
+    private $_termText;
+
+    /**
+     * Start in source text.
+     *
+     * @var integer
+     */
+    private $_startOffset;
+
+    /**
+     * End in source text
+     *
+     * @var integer
+     */
+    private $_endOffset;
+
+    /**
+     * Lexical type.
+     *
+     * @var string
+     */
+    private $_type;
+
+    /**
+     * The position of this token relative to the previous Token.
+     *
+     * The default value is one.
+     *
+     * Some common uses for this are:
+     * Set it to zero to put multiple terms in the same position.  This is
+     * useful if, e.g., a word has multiple stems.  Searches for phrases
+     * including either stem will match.  In this case, all but the first stem's
+     * increment should be set to zero: the increment of the first instance
+     * should be one.  Repeating a token with an increment of zero can also be
+     * used to boost the scores of matches on that token.
+     *
+     * Set it to values greater than one to inhibit exact phrase matches.
+     * If, for example, one does not want phrases to match across removed stop
+     * words, then one could build a stop word filter that removes stop words and
+     * also sets the increment to the number of stop words removed before each
+     * non-stop word.  Then exact phrase queries will only match when the terms
+     * occur with no intervening stop words.
+     *
+     * @var integer
+     */
+    private $_positionIncrement;
+
+
+    /**
+     * Object constructor
+     *
+     * @param string  $text
+     * @param integer $start
+     * @param integer $end
+     * @param string  $type
+     */
+    public function __construct($text, $start, $end, $type = 'word' )
+    {
+        $this->_termText    = $text;
+        $this->_startOffset = $start;
+        $this->_endOffset   = $end;
+        $this->_type        = $type;
+
+        $this->_positionIncrement = 1;
+    }
+
+
+    /**
+     * positionIncrement setter
+     *
+     * @param integer $positionIncrement
+     */
+    public function setPositionIncrement($positionIncrement)
+    {
+        $this->_positionIncrement = $positionIncrement;
+    }
+
+    /**
+     * Returns the position increment of this Token.
+     *
+     * @return integer
+     */
+    public function getPositionIncrement()
+    {
+        return $this->_positionIncrement;
+    }
+
+    /**
+     * Returns the Token's term text.
+     *
+     * @return string
+     */
+    public function getTermText()
+    {
+        return $this->_termText;
+    }
+
+    /**
+     * Returns this Token's starting offset, the position of the first character
+     * corresponding to this token in the source text.
+     *
+     * Note:
+     * The difference between getEndOffset() and getStartOffset() may not be equal
+     * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
+     * by a stemmer or some other filter.
+     *
+     * @return integer
+     */
+    public function getStartOffset()
+    {
+        return $this->_startOffset;
+    }
+
+    /**
+     * Returns this Token's ending offset, one greater than the position of the
+     * last character corresponding to this token in the source text.
+     *
+     * @return integer
+     */
+    public function getEndOffset()
+    {
+        return $this->_endOffset;
+    }
+
+    /**
+     * Returns this Token's lexical type.  Defaults to 'word'.
+     *
+     * @return string
+     */
+    public function getType()
+    {
+        return $this->_type;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/TokenFilter.php b/search/Zend/Search/Lucene/Analysis/TokenFilter.php
new file mode 100644 (file)
index 0000000..a363aa1
--- /dev/null
@@ -0,0 +1,47 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_Token */
+require_once 'Zend/Search/Lucene/Analysis/Token.php';
+
+
+/**
+ * Token filter converts (normalizes) Token ore removes it from a token stream.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+abstract class Zend_Search_Lucene_Analysis_TokenFilter
+{
+    /**
+     * Normalize Token or remove it (if null is returned)
+     *
+     * @param Zend_Search_Lucene_Analysis_Token $srcToken
+     * @return Zend_Search_Lucene_Analysis_Token
+     */
+    abstract public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken);
+}
+
diff --git a/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php b/search/Zend/Search/Lucene/Analysis/TokenFilter/LowerCase.php
new file mode 100644 (file)
index 0000000..5ea1edf
--- /dev/null
@@ -0,0 +1,57 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Analysis_TokenFilter */
+require_once 'Zend/Search/Lucene/Analysis/TokenFilter.php';
+
+
+/**
+ * Lower case Token filter.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Analysis
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+class Zend_Search_Lucene_Analysis_TokenFilter_LowerCase extends Zend_Search_Lucene_Analysis_TokenFilter
+{
+    /**
+     * Normalize Token or remove it (if null is returned)
+     *
+     * @param Zend_Search_Lucene_Analysis_Token $srcToken
+     * @return Zend_Search_Lucene_Analysis_Token
+     */
+    public function normalize(Zend_Search_Lucene_Analysis_Token $srcToken)
+    {
+        $newToken = new Zend_Search_Lucene_Analysis_Token(strtolower( $srcToken->getTermText() ),
+                                     $srcToken->getStartOffset(),
+                                     $srcToken->getEndOffset(),
+                                     $srcToken->getType());
+
+        $newToken->setPositionIncrement($srcToken->getPositionIncrement());
+
+        return $newToken;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Document.php b/search/Zend/Search/Lucene/Document.php
new file mode 100644 (file)
index 0000000..48e48cf
--- /dev/null
@@ -0,0 +1,111 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Field */
+require_once 'Zend/Search/Lucene/Field.php';
+
+
+/**
+ * A Document is a set of fields. Each field has a name and a textual value.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Document
+{
+
+    /**
+     * Associative array Zend_Search_Lucene_Field objects where the keys to the
+     * array are the names of the fields.
+     *
+     * @var array
+     */
+    protected $_fields = array();
+
+    public $boost = 1.0;
+
+
+    /**
+     * Proxy method for getFieldValue(), provides more convenient access to
+     * the string value of a field.
+     *
+     * @param  $offset
+     * @return string
+     */
+       public function __get($offset)
+       {
+               return $this->getFieldValue($offset);
+       }
+
+
+    /**
+     * Add a field object to this document.
+     *
+     * @param Zend_Search_Lucene_Field $field
+     */
+    public function addField(Zend_Search_Lucene_Field $field)
+    {
+        $this->_fields[$field->name] = $field;
+    }
+
+
+    /**
+     * Return an array with the names of the fields in this document.
+     *
+     * @return array
+     */
+    public function getFieldNames()
+    {
+       return array_keys($this->_fields);
+    }
+
+
+    /**
+     * Returns Zend_Search_Lucene_Field object for a named field in this document.
+     *
+     * @param string $fieldName
+     * @return Zend_Search_Lucene_Field
+     */
+    public function getField($fieldName)
+    {
+               if (!array_key_exists($fieldName, $this->_fields)) {
+                       throw new Zend_Search_Lucene_Exception("Field name \"$fieldName\" not found in document.");
+               }
+        return $this->_fields[$fieldName];
+    }
+
+
+    /**
+     * Returns the string value of a named field in this document.
+     *
+     * @see __get()
+     * @return string
+     */
+    public function getFieldValue($fieldName)
+    {
+       return $this->getField($fieldName)->stringValue;
+    }
+
+}
diff --git a/search/Zend/Search/Lucene/EncodingConverter.php b/search/Zend/Search/Lucene/EncodingConverter.php
new file mode 100644 (file)
index 0000000..9c22e5c
--- /dev/null
@@ -0,0 +1,32 @@
+<?php
+
+class EncodingConverter {
+  private $last_error,
+          $in_encoding,
+          $out_encoding;
+          
+  function __construct($in_encoding, $out_encoding) {
+    $this->in_encoding = $in_encoding;
+    $this->out_encoding = $out_encoding;
+  } //constructor
+  
+  function handleError($err, $msg) {
+    $this->last_error = $msg;
+  } //handleError
+  
+  function convert($str) {
+    $this->last_error = FALSE;
+    
+    set_error_handler(array(&$this, 'handleError'));
+    $ret = iconv($this->in_encoding, $this->out_encoding, $str);
+    restore_error_handler();
+    
+    return $ret;
+  } //convert
+  
+  function getLastError() {
+    return $this->last_error;
+  } //getLastError
+} //EncodingConverter
+
+?>
\ No newline at end of file
diff --git a/search/Zend/Search/Lucene/Exception.php b/search/Zend/Search/Lucene/Exception.php
new file mode 100644 (file)
index 0000000..5b73b29
--- /dev/null
@@ -0,0 +1,36 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Framework base exception
+ */
+require_once 'Zend/Search/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Exception extends Zend_Search_Exception
+{}
+
diff --git a/search/Zend/Search/Lucene/Field.php b/search/Zend/Search/Lucene/Field.php
new file mode 100644 (file)
index 0000000..4612d59
--- /dev/null
@@ -0,0 +1,161 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * A field is a section of a Document.  Each field has two parts,
+ * a name and a value. Values may be free text or they may be atomic
+ * keywords, which are not further processed. Such keywords may
+ * be used to represent dates, urls, etc.  Fields are optionally
+ * stored in the index, so that they may be returned with hits
+ * on the document.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Document
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+//require_once('EncodingConverter.php');
+
+class Zend_Search_Lucene_Field
+{
+    public $kind;
+
+    public $name        = 'body';
+    public $stringValue = null;
+    public $isStored    = false;
+    public $isIndexed   = true;
+    public $isTokenized = true;
+    public $isBinary    = false;
+
+    public $storeTermVector = false;
+    
+    public $boost = 1.0;
+
+    public function __construct($name, $stringValue, $isStored, $isIndexed, $isTokenized, $isBinary = false)
+    {
+        $this->name = $name;        
+
+        if (!$isBinary) {            
+            /*
+             $econv = new EncodingConverter(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT');
+             $this->stringValue = $econv->convert($stringValue);
+            
+            if ($econv->getLastError()) {
+                echo "Error: ".$econv->getLastError();
+                echo "<br>";
+                echo "x".$stringValue."x";
+                exit();
+            } else {                                            
+            }*/                            
+            
+            /**
+             * @todo Correct UTF-8 string should be required in future
+             * Until full UTF-8 support is not completed, string should be normalized to ANSII encoding
+             */
+            
+             $this->stringValue = iconv('ISO-8859-1', 'ASCII//TRANSLIT', $stringValue);            
+             //$this->stringValue = iconv(mb_detect_encoding($stringValue), 'ASCII//TRANSLIT', $stringValue);            
+        } else {
+            $this->stringValue = $stringValue;
+        }
+        $this->isStored    = $isStored;
+        $this->isIndexed   = $isIndexed;
+        $this->isTokenized = $isTokenized;
+        $this->isBinary    = $isBinary;
+
+        $this->storeTermVector = false;
+        $this->boost           = 1.0;
+    }
+
+
+    /**
+     * Constructs a String-valued Field that is not tokenized, but is indexed
+     * and stored.  Useful for non-text fields, e.g. date or url.
+     *
+     * @param string $name
+     * @param string $value
+     * @return Zend_Search_Lucene_Field
+     */
+    static public function Keyword($name, $value)
+    {
+        return new self($name, $value, true, true, false);
+    }
+
+
+    /**
+     * Constructs a String-valued Field that is not tokenized nor indexed,
+     * but is stored in the index, for return with hits.
+     *
+     * @param string $name
+     * @param string $value
+     * @return Zend_Search_Lucene_Field
+     */
+    static public function UnIndexed($name, $value)
+    {
+        return new self($name, $value, true, false, false);
+    }
+
+
+    /**
+     * Constructs a Binary String valued Field that is not tokenized nor indexed,
+     * but is stored in the index, for return with hits.
+     *
+     * @param string $name
+     * @param string $value
+     * @return Zend_Search_Lucene_Field
+     */
+    static public function Binary($name, $value)
+    {
+        return new self($name, $value, true, false, false, true);
+    }
+
+    /**
+     * Constructs a String-valued Field that is tokenized and indexed,
+     * and is stored in the index, for return with hits.  Useful for short text
+     * fields, like "title" or "subject". Term vector will not be stored for this field.
+     *
+     * @param string $name
+     * @param string $value
+     * @return Zend_Search_Lucene_Field
+     */
+    static public function Text($name, $value)
+    {
+        return new self($name, $value, true, true, true);
+    }
+
+
+    /**
+     * Constructs a String-valued Field that is tokenized and indexed,
+     * but that is not stored in the index.
+     *
+     * @param string $name
+     * @param string $value
+     * @return Zend_Search_Lucene_Field
+     */
+    static public function UnStored($name, $value)
+    {
+        return new self($name, $value, false, true, true);
+    }
+
+}
+
diff --git a/search/Zend/Search/Lucene/Index/FieldInfo.php b/search/Zend/Search/Lucene/Index/FieldInfo.php
new file mode 100644 (file)
index 0000000..4c11aaa
--- /dev/null
@@ -0,0 +1,45 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_FieldInfo
+{
+    public $name;
+    public $isIndexed;
+    public $number;
+    public $storeTermVector;
+
+    public function __construct( $name, $isIndexed, $number, $storeTermVector )
+    {
+        $this->name            = $name;
+        $this->isIndexed       = $isIndexed;
+        $this->number          = $number;
+        $this->storeTermVector = $storeTermVector;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Index/SegmentInfo.php b/search/Zend/Search/Lucene/Index/SegmentInfo.php
new file mode 100644 (file)
index 0000000..3defbed
--- /dev/null
@@ -0,0 +1,575 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_SegmentInfo
+{
+    /**
+     * Number of docs in a segment
+     *
+     * @var integer
+     */
+    private $_docCount;
+
+    /**
+     * Segment name
+     *
+     * @var string
+     */
+    private $_name;
+
+    /**
+     * Term Dictionary Index
+     * Array of the Zend_Search_Lucene_Index_Term objects
+     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
+     *
+     * @var array
+     */
+    private $_termDictionary;
+
+    /**
+     * Term Dictionary Index TermInfos
+     * Array of the Zend_Search_Lucene_Index_TermInfo objects
+     *
+     * @var array
+     */
+    private $_termDictionaryInfos;
+
+    /**
+     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
+     *
+     * @var array
+     */
+    private $_fields;
+
+    /**
+     * Field positions in a dictionary.
+     * (Term dictionary contains filelds ordered by names)
+     *
+     * @var array
+     */
+    private $_fieldsDicPositions;
+
+
+    /**
+     * Associative array where the key is the file name and the value is data offset
+     * in a compound segment file (.csf).
+     *
+     * @var array
+     */
+    private $_segFiles;
+
+    /**
+     * File system adapter.
+     *
+     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
+     */
+    private $_directory;
+
+    /**
+     * Normalization factors.
+     * An array fieldName => normVector
+     * normVector is a binary string.
+     * Each byte corresponds to an indexed document in a segment and
+     * encodes normalization factor (float value, encoded by
+     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
+     *
+     * @var array
+     */
+    private $_norms = array();
+
+    /**
+     * List of deleted documents.
+     * bitset if bitset extension is loaded or array otherwise.
+     *
+     * @var mixed
+     */
+    private $_deleted;
+
+    /**
+     * $this->_deleted update flag
+     *
+     * @var boolean
+     */
+    private $_deletedDirty = false;
+
+    /**
+     * Zend_Search_Lucene_Index_SegmentInfo constructor needs Segmentname,
+     * Documents count and Directory as a parameter.
+     *
+     * @param string $name
+     * @param integer $docCount
+     * @param Zend_Search_Lucene_Storage_Directory $directory
+     */
+    public function __construct($name, $docCount, $directory)
+    {
+        $this->_name = $name;
+        $this->_docCount = $docCount;
+        $this->_directory = $directory;
+        $this->_termDictionary = null;
+
+        $this->_segFiles = array();
+        if ($this->_directory->fileExists($name . '.cfs')) {
+            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
+            $segFilesCount = $cfsFile->readVInt();
+
+            for ($count = 0; $count < $segFilesCount; $count++) {
+                $dataOffset = $cfsFile->readLong();
+                $fileName = $cfsFile->readString();
+                $this->_segFiles[$fileName] = $dataOffset;
+            }
+        }
+
+        $fnmFile = $this->openCompoundFile('.fnm');
+        $fieldsCount = $fnmFile->readVInt();
+        $fieldNames = array();
+        $fieldNums  = array();
+        $this->_fields = array();
+        for ($count=0; $count < $fieldsCount; $count++) {
+            $fieldName = $fnmFile->readString();
+            $fieldBits = $fnmFile->readByte();
+            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
+                                                                            $fieldBits & 1,
+                                                                            $count,
+                                                                            $fieldBits & 2 );
+            if ($fieldBits & 0x10) {
+                // norms are omitted for the indexed field
+                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
+            }
+
+            $fieldNums[$count]  = $count;
+            $fieldNames[$count] = $fieldName;
+        }
+        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
+        $this->_fieldsDicPositions = array_flip($fieldNums);
+
+        try {
+            $delFile = $this->openCompoundFile('.del');
+
+            $byteCount = $delFile->readInt();
+            $byteCount = ceil($byteCount/8);
+            $bitCount  = $delFile->readInt();
+
+            if ($bitCount == 0) {
+                $delBytes = '';
+            } else {
+                $delBytes = $delFile->readBytes($byteCount);
+            }
+
+            if (extension_loaded('bitset')) {
+                $this->_deleted = $delBytes;
+            } else {
+                $this->_deleted = array();
+                for ($count = 0; $count < $byteCount; $count++) {
+                    $byte = ord($delBytes{$count});
+                    for ($bit = 0; $bit < 8; $bit++) {
+                        if ($byte & (1<<$bit)) {
+                            $this->_deleted[$count*8 + $bit] = 1;
+                        }
+                    }
+                }
+
+            }
+        } catch(Zend_Search_Exception $e) {
+            if (strpos($e->getMessage(), 'compound file doesn\'t contain') !== false ) {
+                $this->_deleted = null;
+            } else {
+                throw $e;
+            }
+        }
+    }
+
+    /**
+     * Opens index file stoted within compound index file
+     *
+     * @param string $extension
+     * @throws Zend_Search_Lucene_Exception
+     * @return Zend_Search_Lucene_Storage_File
+     */
+    public function openCompoundFile($extension)
+    {
+        $filename = $this->_name . $extension;
+
+        // Try to open common file first
+        if ($this->_directory->fileExists($filename)) {
+            return $this->_directory->getFileObject($filename);
+        }
+
+        if( !isset($this->_segFiles[$filename]) ) {
+            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
+                                       . $filename . ' file.' );
+        }
+
+        $file = $this->_directory->getFileObject( $this->_name.".cfs" );
+        $file->seek($this->_segFiles[$filename]);
+        return $file;
+    }
+
+    /**
+     * Returns field index or -1 if field is not found
+     *
+     * @param string $fieldName
+     * @return integer
+     */
+    public function getFieldNum($fieldName)
+    {
+        foreach( $this->_fields as $field ) {
+            if( $field->name == $fieldName ) {
+                return $field->number;
+            }
+        }
+
+        return -1;
+    }
+
+    /**
+     * Returns field info for specified field
+     *
+     * @param integer $fieldNum
+     * @return ZSearchFieldInfo
+     */
+    public function getField($fieldNum)
+    {
+        return $this->_fields[$fieldNum];
+    }
+
+    /**
+     * Returns array of fields.
+     * if $indexed parameter is true, then returns only indexed fields.
+     *
+     * @param boolean $indexed
+     * @return array
+     */
+    public function getFields($indexed = false)
+    {
+        $result = array();
+        foreach( $this->_fields as $field ) {
+            if( (!$indexed) || $field->isIndexed ) {
+                $result[ $field->name ] = $field->name;
+            }
+        }
+        return $result;
+    }
+
+    /**
+     * Returns the total number of documents in this segment.
+     *
+     * @return integer
+     */
+    public function count()
+    {
+        return $this->_docCount;
+    }
+
+    /**
+     * Get field position in a fields dictionary
+     *
+     * @param integer $fieldNum
+     * @return integer
+     */
+    private function _getFieldPosition($fieldNum) {
+        // Treat values which are not in a translation table as a 'direct value'
+        return isset($this->_fieldsDicPositions[$fieldNum]) ?
+                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
+    }
+
+    /**
+     * Loads Term dictionary from TermInfoIndex file
+     */
+    protected function _loadDictionary()
+    {
+        if ($this->_termDictionary !== null) {
+            return;
+        }
+
+        $this->_termDictionary = array();
+        $this->_termDictionaryInfos = array();
+
+        $tiiFile = $this->openCompoundFile('.tii');
+        $tiVersion = $tiiFile->readInt();
+        if ($tiVersion != (int)0xFFFFFFFE) {
+            throw new Zend_Search_Lucene_Exception('Wrong TermInfoIndexFile file format');
+        }
+
+        $indexTermCount = $tiiFile->readLong();
+                          $tiiFile->readInt();  // IndexInterval
+        $skipInterval   = $tiiFile->readInt();
+
+        $prevTerm     = '';
+        $freqPointer  =  0;
+        $proxPointer  =  0;
+        $indexPointer =  0;
+        for ($count = 0; $count < $indexTermCount; $count++) {
+            $termPrefixLength = $tiiFile->readVInt();
+            $termSuffix       = $tiiFile->readString();
+            $termValue        = substr( $prevTerm, 0, $termPrefixLength ) . $termSuffix;
+
+            $termFieldNum     = $tiiFile->readVInt();
+            $docFreq          = $tiiFile->readVInt();
+            $freqPointer     += $tiiFile->readVInt();
+            $proxPointer     += $tiiFile->readVInt();
+            if( $docFreq >= $skipInterval ) {
+                $skipDelta = $tiiFile->readVInt();
+            } else {
+                $skipDelta = 0;
+            }
+
+            $indexPointer += $tiiFile->readVInt();
+
+            $this->_termDictionary[] =  new Zend_Search_Lucene_Index_Term($termValue,$termFieldNum);
+            $this->_termDictionaryInfos[] =
+                new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipDelta, $indexPointer);
+            $prevTerm = $termValue;
+        }
+    }
+
+
+    /**
+     * Return segment name
+     *
+     * @return string
+     */
+    public function getName()
+    {
+        return $this->_name;
+    }
+
+
+    /**
+     * Scans terms dictionary and returns term info
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @return Zend_Search_Lucene_Index_TermInfo
+     */
+    public function getTermInfo($term)
+    {
+        $this->_loadDictionary();
+
+        $searchField = $this->getFieldNum($term->field);
+
+        if ($searchField == -1) {
+            return null;
+        }
+        $searchDicField = $this->_getFieldPosition($searchField);
+
+        // search for appropriate value in dictionary
+        $lowIndex = 0;
+        $highIndex = count($this->_termDictionary)-1;
+        while ($highIndex >= $lowIndex) {
+            // $mid = ($highIndex - $lowIndex)/2;
+            $mid = ($highIndex + $lowIndex) >> 1;
+            $midTerm = $this->_termDictionary[$mid];
+
+            $fieldNum = $this->_getFieldPosition($midTerm->field);
+            $delta = $searchDicField - $fieldNum;
+            if ($delta == 0) {
+                $delta = strcmp($term->text, $midTerm->text);
+            }
+
+            if ($delta < 0) {
+                $highIndex = $mid-1;
+            } elseif ($delta > 0) {
+                $lowIndex  = $mid+1;
+            } else {
+                return $this->_termDictionaryInfos[$mid]; // We got it!
+            }
+        }
+
+        if ($highIndex == -1) {
+            // Term is out of the dictionary range
+            return null;
+        }
+
+        $prevPosition = $highIndex;
+        $prevTerm = $this->_termDictionary[$prevPosition];
+        $prevTermInfo = $this->_termDictionaryInfos[ $prevPosition ];
+
+        $tisFile = $this->openCompoundFile('.tis');
+        $tiVersion = $tisFile->readInt();
+        if ($tiVersion != (int)0xFFFFFFFE) {
+            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
+        }
+
+        $termCount     = $tisFile->readLong();
+        $indexInterval = $tisFile->readInt();
+        $skipInterval  = $tisFile->readInt();
+
+        $tisFile->seek($prevTermInfo->indexPointer - 20 /* header size*/, SEEK_CUR);
+
+        $termValue    = $prevTerm->text;
+        $termFieldNum = $prevTerm->field;
+        $freqPointer = $prevTermInfo->freqPointer;
+        $proxPointer = $prevTermInfo->proxPointer;
+        for ($count = $prevPosition*$indexInterval + 1;
+             $count < $termCount &&
+             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
+              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
+               strcmp($termValue, $term->text) < 0) );
+             $count++) {
+            $termPrefixLength = $tisFile->readVInt();
+            $termSuffix       = $tisFile->readString();
+            $termFieldNum     = $tisFile->readVInt();
+            $termValue        = substr( $termValue, 0, $termPrefixLength ) . $termSuffix;
+
+            $docFreq      = $tisFile->readVInt();
+            $freqPointer += $tisFile->readVInt();
+            $proxPointer += $tisFile->readVInt();
+            if( $docFreq >= $skipInterval ) {
+                $skipOffset = $tisFile->readVInt();
+            } else {
+                $skipOffset = 0;
+            }
+        }
+
+        if ($termFieldNum == $searchField && $termValue == $term->text) {
+            return new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
+        } else {
+            return null;
+        }
+    }
+
+    /**
+     * Returns normalization factor for specified documents
+     *
+     * @param integer $id
+     * @param string $fieldName
+     * @return string
+     */
+    public function norm($id, $fieldName)
+    {
+        $fieldNum = $this->getFieldNum($fieldName);
+
+        if ( !($this->_fields[$fieldNum]->isIndexed) ) {
+            return null;
+        }
+
+        if ( !isset( $this->_norms[$fieldNum] )) {
+            $fFile = $this->openCompoundFile('.f' . $fieldNum);
+            $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
+        }
+
+        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum]{$id}) );
+    }
+
+
+    /**
+     * Returns true if any documents have been deleted from this index segment.
+     *
+     * @return boolean
+     */
+    public function hasDeletions()
+    {
+        return $this->_deleted !== null;
+    }
+
+
+    /**
+     * Deletes a document from the index segment.
+     * $id is an internal document id
+     *
+     * @param integer
+     */
+    public function delete($id)
+    {
+        $this->_deletedDirty = true;
+
+        if (extension_loaded('bitset')) {
+            if ($this->_deleted === null) {
+                $this->_deleted = bitset_empty($id);
+            }
+            bitset_incl($this->_deleted, $id);
+        } else {
+            if ($this->_deleted === null) {
+                $this->_deleted = array();
+            }
+
+            $this->_deleted[$id] = 1;
+        }
+    }
+
+    /**
+     * Checks, that document is deleted
+     *
+     * @param integer
+     * @return boolean
+     */
+    public function isDeleted($id)
+    {
+        if ($this->_deleted === null) {
+            return false;
+        }
+
+        if (extension_loaded('bitset')) {
+            return bitset_in($this->_deleted, $id);
+        } else {
+            return isset($this->_deleted[$id]);
+        }
+    }
+
+
+    /**
+     * Write changes if it's necessary.
+     */
+    public function writeChanges()
+    {
+        if (!$this->_deletedDirty) {
+            return;
+        }
+
+        if (extension_loaded('bitset')) {
+            $delBytes = $this->_deleted;
+            $bitCount = count(bitset_to_array($delBytes));
+        } else {
+            $byteCount = floor($this->_docCount/8)+1;
+            $delBytes = str_repeat(chr(0), $byteCount);
+            for ($count = 0; $count < $byteCount; $count++) {
+                $byte = 0;
+                for ($bit = 0; $bit < 8; $bit++) {
+                    if (isset($this->_deleted[$count*8 + $bit])) {
+                        $byte |= (1<<$bit);
+                    }
+                }
+                $delBytes{$count} = chr($byte);
+            }
+            $bitCount = count($this->_deleted);
+        }
+
+
+        $delFile = $this->_directory->createFile($this->_name . '.del');
+        $delFile->writeInt($this->_docCount);
+        $delFile->writeInt($bitCount);
+        $delFile->writeBytes($delBytes);
+
+        $this->_deletedDirty = false;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Index/SegmentWriter.php b/search/Zend/Search/Lucene/Index/SegmentWriter.php
new file mode 100644 (file)
index 0000000..6cb4477
--- /dev/null
@@ -0,0 +1,519 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+/** Zend_Search_Lucene_Analysis_Analyzer */
+require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
+
+/** Zend_Search_Lucene_Index_SegmentInfo */
+require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_SegmentWriter
+{
+    /**
+     * Expert: The fraction of terms in the "dictionary" which should be stored
+     * in RAM.  Smaller values use more memory, but make searching slightly
+     * faster, while larger values use less memory and make searching slightly
+     * slower.  Searching is typically not dominated by dictionary lookup, so
+     * tweaking this is rarely useful.
+     *
+     * @var integer
+     */
+    static public $indexInterval = 128;
+
+    /** Expert: The fraction of TermDocs entries stored in skip tables.
+     * Larger values result in smaller indexes, greater acceleration, but fewer
+     * accelerable cases, while smaller values result in bigger indexes,
+     * less acceleration and more
+     * accelerable cases. More detailed experiments would be useful here.
+     *
+     * 0x0x7FFFFFFF indicates that we don't use skip data
+     * Default value is 16
+     *
+     * @var integer
+     */
+    static public $skipInterval = 0x7FFFFFFF;
+
+    /**
+     * Number of docs in a segment
+     *
+     * @var integer
+     */
+    private $_docCount;
+
+    /**
+     * Segment name
+     *
+     * @var string
+     */
+    private $_name;
+
+    /**
+     * File system adapter.
+     *
+     * @var Zend_Search_Lucene_Storage_Directory
+     */
+    private $_directory;
+
+    /**
+     * List of the index files.
+     * Used for automatic compound file generation
+     *
+     * @var unknown_type
+     */
+    private $_files;
+
+    /**
+     * Term Dictionary
+     * Array of the Zend_Search_Lucene_Index_Term objects
+     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
+     *
+     * @var array
+     */
+    private $_termDictionary;
+
+    /**
+     * Documents, which contain the term
+     *
+     * @var array
+     */
+    private $_termDocs;
+
+    /**
+     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
+     *
+     * @var array
+     */
+    private $_fields;
+
+    /**
+     * Sizes of the indexed fields.
+     * Used for normalization factors calculation.
+     *
+     * @var array
+     */
+    private $_fieldLengths;
+
+    /**
+     * '.fdx'  file - Stored Fields, the field index.
+     *
+     * @var Zend_Search_Lucene_Storage_File
+     */
+    private $_fdxFile;
+
+    /**
+     * '.fdt'  file - Stored Fields, the field data.
+     *
+     * @var Zend_Search_Lucene_Storage_File
+     */
+    private $_fdtFile;
+
+
+    /**
+     * Object constructor.
+     *
+     * @param Zend_Search_Lucene_Storage_Directory $directory
+     * @param string $name
+     */
+    public function __construct($directory, $name)
+    {
+        $this->_directory = $directory;
+        $this->_name      = $name;
+        $this->_docCount  = 0;
+
+        $this->_fields         = array();
+        $this->_termDocs       = array();
+        $this->_files          = array();
+        $this->_norms          = array();
+        $this->_fieldLengths   = array();
+        $this->_termDictionary = array();
+
+        $this->_fdxFile = null;
+        $this->_fdtFile = null;
+    }
+
+
+    /**
+     * Add field to the segment
+     *
+     * @param Zend_Search_Lucene_Field $field
+     */
+    private function _addFieldInfo(Zend_Search_Lucene_Field $field)
+    {
+        if (!isset($this->_fields[$field->name])) {
+            $this->_fields[$field->name] =
+                                new Zend_Search_Lucene_Index_FieldInfo($field->name,
+                                                                       $field->isIndexed,
+                                                                       count($this->_fields),
+                                                                       $field->storeTermVector);
+        } else {
+            $this->_fields[$field->name]->isIndexed       |= $field->isIndexed;
+            $this->_fields[$field->name]->storeTermVector |= $field->storeTermVector;
+        }
+    }
+
+
+    /**
+     * Adds a document to this segment.
+     *
+     * @param Zend_Search_Lucene_Document $document
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function addDocument(Zend_Search_Lucene_Document $document)
+    {
+        $storedFields = array();
+
+        foreach ($document->getFieldNames() as $fieldName) {
+            $field = $document->getField($fieldName);
+            $this->_addFieldInfo($field);
+
+            if ($field->storeTermVector) {
+                /**
+                 * @todo term vector storing support
+                 */
+                throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.');
+            }
+
+            if ($field->isIndexed) {
+                if ($field->isTokenized) {
+                    $tokenList = Zend_Search_Lucene_Analysis_Analyzer::getDefault()->tokenize($field->stringValue);
+                } else {
+                    $tokenList = array();
+                    $tokenList[] = new Zend_Search_Lucene_Analysis_Token($field->stringValue, 0, strlen($field->stringValue));
+                }
+                $this->_fieldLengths[$field->name][$this->_docCount] = count($tokenList);
+
+                $position = 0;
+                foreach ($tokenList as $token) {
+                    $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name);
+                    $termKey = $term->key();
+
+                    if (!isset($this->_termDictionary[$termKey])) {
+                        // New term
+                        $this->_termDictionary[$termKey] = $term;
+                        $this->_termDocs[$termKey] = array();
+                        $this->_termDocs[$termKey][$this->_docCount] = array();
+                    } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) {
+                        // Existing term, but new term entry
+                        $this->_termDocs[$termKey][$this->_docCount] = array();
+                    }
+                    $position += $token->getPositionIncrement();
+                    $this->_termDocs[$termKey][$this->_docCount][] = $position;
+                }
+            }
+
+            if ($field->isStored) {
+                $storedFields[] = $field;
+            }
+        }
+
+        if (count($storedFields) != 0) {
+            if (!isset($this->_fdxFile)) {
+                $this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx');
+                $this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt');
+
+                $this->_files[] = $this->_name . '.fdx';
+                $this->_files[] = $this->_name . '.fdt';
+            }
+
+            $this->_fdxFile->writeLong($this->_fdtFile->tell());
+            $this->_fdtFile->writeVInt(count($storedFields));
+            foreach ($storedFields as $field) {
+                $this->_fdtFile->writeVInt($this->_fields[$field->name]->number);
+                $fieldBits = ($field->isTokenized ? 0x01 : 0x00) |
+                             ($field->isBinary ?    0x02 : 0x00) |
+                             0x00; /* 0x04 - third bit, compressed (ZLIB) */
+                $this->_fdtFile->writeByte($fieldBits);
+                if ($field->isBinary) {
+                    $this->_fdtFile->writeVInt(strlen($field->stringValue));
+                    $this->_fdtFile->writeBytes($field->stringValue);
+                } else {
+                    $this->_fdtFile->writeString($field->stringValue);
+                }
+            }
+        }
+
+        $this->_docCount++;
+    }
+
+
+    /**
+     * Dump Field Info (.fnm) segment file
+     */
+    private function _dumpFNM()
+    {
+        $fnmFile = $this->_directory->createFile($this->_name . '.fnm');
+        $fnmFile->writeVInt(count($this->_fields));
+
+        foreach ($this->_fields as $field) {
+            $fnmFile->writeString($field->name);
+            $fnmFile->writeByte(($field->isIndexed       ? 0x01 : 0x00) |
+                                ($field->storeTermVector ? 0x02 : 0x00)
+// not supported yet            0x04 /* term positions are stored with the term vectors */ |
+// not supported yet            0x08 /* term offsets are stored with the term vectors */   |
+                               );
+
+            if ($field->isIndexed) {
+                $fieldNum   = $this->_fields[$field->name]->number;
+                $fieldName  = $field->name;
+                $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
+                $norm       = '';
+
+                for ($count = 0; $count < $this->_docCount; $count++) {
+                    $numTokens = isset($this->_fieldLengths[$fieldName][$count]) ?
+                                      $this->_fieldLengths[$fieldName][$count] : 0;
+                    $norm .= chr($similarity->encodeNorm($similarity->lengthNorm($fieldName, $numTokens)));
+                }
+
+                $normFileName = $this->_name . '.f' . $fieldNum;
+                $fFile = $this->_directory->createFile($normFileName);
+                $fFile->writeBytes($norm);
+                $this->_files[] = $normFileName;
+            }
+        }
+
+        $this->_files[] = $this->_name . '.fnm';
+    }
+
+
+    /**
+     * Dump Term Dictionary segment file entry.
+     * Used to write entry to .tis or .tii files
+     *
+     * @param Zend_Search_Lucene_Storage_File $dicFile
+     * @param Zend_Search_Lucene_Index_Term $prevTerm
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo
+     * @param Zend_Search_Lucene_Index_TermInfo $termInfo
+     */
+    private function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile,
+                                        &$prevTerm,     Zend_Search_Lucene_Index_Term     $term,
+                                        &$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo)
+    {
+        if (isset($prevTerm) && $prevTerm->field == $term->field) {
+            $prefixLength = 0;
+            while ($prefixLength < strlen($prevTerm->text) &&
+                   $prefixLength < strlen($term->text) &&
+                   $prevTerm->text{$prefixLength} == $term->text{$prefixLength}
+                  ) {
+                $prefixLength++;
+            }
+            // Write preffix length
+            $dicFile->writeVInt($prefixLength);
+            // Write suffix
+            $dicFile->writeString( substr($term->text, $prefixLength) );
+        } else {
+            // Write preffix length
+            $dicFile->writeVInt(0);
+            // Write suffix
+            $dicFile->writeString($term->text);
+        }
+        // Write field number
+        $dicFile->writeVInt($term->field);
+        // DocFreq (the count of documents which contain the term)
+        $dicFile->writeVInt($termInfo->docFreq);
+
+        $prevTerm = $term;
+
+        if (!isset($prevTermInfo)) {
+            // Write FreqDelta
+            $dicFile->writeVInt($termInfo->freqPointer);
+            // Write ProxDelta
+            $dicFile->writeVInt($termInfo->proxPointer);
+        } else {
+            // Write FreqDelta
+            $dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer);
+            // Write ProxDelta
+            $dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer);
+        }
+        // Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval
+        if ($termInfo->skipOffset != 0) {
+            $dicFile->writeVInt($termInfo->skipOffset);
+        }
+
+        $prevTermInfo = $termInfo;
+    }
+
+    /**
+     * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files
+     */
+    private function _dumpDictionary()
+    {
+        $termKeys = array_keys($this->_termDictionary);
+        sort($termKeys, SORT_STRING);
+
+        $tisFile = $this->_directory->createFile($this->_name . '.tis');
+        $tisFile->writeInt((int)0xFFFFFFFE);
+        $tisFile->writeLong(count($termKeys));
+        $tisFile->writeInt(self::$indexInterval);
+        $tisFile->writeInt(self::$skipInterval);
+
+        $tiiFile = $this->_directory->createFile($this->_name . '.tii');
+        $tiiFile->writeInt((int)0xFFFFFFFE);
+        $tiiFile->writeLong(ceil((count($termKeys) + 2)/self::$indexInterval));
+        $tiiFile->writeInt(self::$indexInterval);
+        $tiiFile->writeInt(self::$skipInterval);
+
+        /** Dump dictionary header */
+        $tiiFile->writeVInt(0);                    // preffix length
+        $tiiFile->writeString('');                 // suffix
+        $tiiFile->writeInt((int)0xFFFFFFFF);       // field number
+        $tiiFile->writeByte((int)0x0F);
+        $tiiFile->writeVInt(0);                    // DocFreq
+        $tiiFile->writeVInt(0);                    // FreqDelta
+        $tiiFile->writeVInt(0);                    // ProxDelta
+        $tiiFile->writeVInt(20);                   // IndexDelta
+
+        $frqFile = $this->_directory->createFile($this->_name . '.frq');
+        $prxFile = $this->_directory->createFile($this->_name . '.prx');
+
+        $termCount = 1;
+
+        $prevTerm     = null;
+        $prevTermInfo = null;
+        $prevIndexTerm     = null;
+        $prevIndexTermInfo = null;
+        $prevIndexPosition = 20;
+
+        foreach ($termKeys as $termId) {
+            $freqPointer = $frqFile->tell();
+            $proxPointer = $prxFile->tell();
+
+            $prevDoc = 0;
+            foreach ($this->_termDocs[$termId] as $docId => $termPositions) {
+                $docDelta = ($docId - $prevDoc)*2;
+                $prevDoc = $docId;
+                if (count($termPositions) > 1) {
+                    $frqFile->writeVInt($docDelta);
+                    $frqFile->writeVInt(count($termPositions));
+                } else {
+                    $frqFile->writeVInt($docDelta + 1);
+                }
+
+                $prevPosition = 0;
+                foreach ($termPositions as $position) {
+                    $prxFile->writeVInt($position - $prevPosition);
+                    $prevPosition = $position;
+                }
+            }
+
+            if (count($this->_termDocs[$termId]) >= self::$skipInterval) {
+                /**
+                 * @todo Write Skip Data to a freq file.
+                 * It's not used now, but make index more optimal
+                 */
+                $skipOffset = $frqFile->tell() - $freqPointer;
+            } else {
+                $skipOffset = 0;
+            }
+
+            $term = new Zend_Search_Lucene_Index_Term($this->_termDictionary[$termId]->text,
+                                                      $this->_fields[$this->_termDictionary[$termId]->field]->number);
+            $termInfo = new Zend_Search_Lucene_Index_TermInfo(count($this->_termDocs[$termId]),
+                                            $freqPointer, $proxPointer, $skipOffset);
+
+            $this->_dumpTermDictEntry($tisFile, $prevTerm, $term, $prevTermInfo, $termInfo);
+
+            if ($termCount % self::$indexInterval == 0) {
+                $this->_dumpTermDictEntry($tiiFile, $prevIndexTerm, $term, $prevIndexTermInfo, $termInfo);
+
+                $indexPosition = $tisFile->tell();
+                $tiiFile->writeVInt($indexPosition - $prevIndexPosition);
+                $prevIndexPosition = $indexPosition;
+            }
+            $termCount++;
+        }
+
+        $this->_files[] = $this->_name . '.tis';
+        $this->_files[] = $this->_name . '.tii';
+        $this->_files[] = $this->_name . '.frq';
+        $this->_files[] = $this->_name . '.prx';
+    }
+
+
+    /**
+     * Generate compound index file
+     */
+    private function _generateCFS()
+    {
+        $cfsFile = $this->_directory->createFile($this->_name . '.cfs');
+        $cfsFile->writeVInt(count($this->_files));
+
+        $dataOffsetPointers = array();
+        foreach ($this->_files as $fileName) {
+            $dataOffsetPointers[$fileName] = $cfsFile->tell();
+            $cfsFile->writeLong(0); // write dummy data
+            $cfsFile->writeString($fileName);
+        }
+
+        foreach ($this->_files as $fileName) {
+            // Get actual data offset
+            $dataOffset = $cfsFile->tell();
+            // Seek to the data offset pointer
+            $cfsFile->seek($dataOffsetPointers[$fileName]);
+            // Write actual data offset value
+            $cfsFile->writeLong($dataOffset);
+            // Seek back to the end of file
+            $cfsFile->seek($dataOffset);
+
+            $dataFile = $this->_directory->getFileObject($fileName);
+            $data = $dataFile->readBytes($this->_directory->fileLength($fileName));
+            $cfsFile->writeBytes($data);
+
+            $this->_directory->deleteFile($fileName);
+        }
+    }
+
+
+    /**
+     * Close segment, write it to disk and return segment info
+     *
+     * @return Zend_Search_Lucene_Index_SegmentInfo
+     */
+    public function close()
+    {
+        if ($this->_docCount == 0) {
+            return null;
+        }
+
+        $this->_dumpFNM();
+        $this->_dumpDictionary();
+
+        $this->_generateCFS();
+
+        return new Zend_Search_Lucene_Index_SegmentInfo($this->_name,
+                                                        $this->_docCount,
+                                                        $this->_directory);
+    }
+
+}
+
diff --git a/search/Zend/Search/Lucene/Index/Term.php b/search/Zend/Search/Lucene/Index/Term.php
new file mode 100644 (file)
index 0000000..3deffa9
--- /dev/null
@@ -0,0 +1,72 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * A Term represents a word from text.  This is the unit of search.  It is
+ * composed of two elements, the text of the word, as a string, and the name of
+ * the field that the text occured in, an interned string.
+ *
+ * Note that terms may represent more than words from text fields, but also
+ * things like dates, email addresses, urls, etc.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_Term
+{
+    /**
+     * Field name or field number (depending from context)
+     *
+     * @var mixed
+     */
+    public $field;
+
+    /**
+     * Term value
+     *
+     * @var string
+     */
+    public $text;
+
+
+    /**
+     * @todo docblock
+     */
+    public function __construct( $text, $field = 'contents' )
+    {
+        $this->field = $field;
+        $this->text = $text;
+    }
+
+
+    /**
+     * @todo docblock
+     */
+    public function key()
+    {
+        return $this->field . chr(0) . $this->text;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Index/TermInfo.php b/search/Zend/Search/Lucene/Index/TermInfo.php
new file mode 100644 (file)
index 0000000..7dcfcc8
--- /dev/null
@@ -0,0 +1,79 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * A Zend_Search_Lucene_Index_TermInfo represents a record of information stored for a term.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_TermInfo
+{
+    /**
+     * The number of documents which contain the term.
+     *
+     * @var integer
+     */
+    public $docFreq;
+
+    /**
+     * Data offset in a Frequencies file.
+     *
+     * @var integer
+     */
+    public $freqPointer;
+
+    /**
+     * Data offset in a Positions file.
+     *
+     * @var integer
+     */
+    public $proxPointer;
+
+    /**
+     * ScipData offset in a Frequencies file.
+     *
+     * @var integer
+     */
+    public $skipOffset;
+
+    /**
+     * Term offset of the _next_ term in a TermDictionary file.
+     * Used only for Term Index
+     *
+     * @var integer
+     */
+    public $indexPointer;
+
+    public function __construct($docFreq, $freqPointer, $proxPointer, $skipOffset, $indexPointer = null)
+    {
+        $this->docFreq      = $docFreq;
+        $this->freqPointer  = $freqPointer;
+        $this->proxPointer  = $proxPointer;
+        $this->skipOffset   = $skipOffset;
+        $this->indexPointer = $indexPointer;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Index/Writer.php b/search/Zend/Search/Lucene/Index/Writer.php
new file mode 100644 (file)
index 0000000..ef6c655
--- /dev/null
@@ -0,0 +1,331 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Index_SegmentWriter */
+require_once 'Zend/Search/Lucene/Index/SegmentWriter.php';
+
+/** Zend_Search_Lucene_Index_SegmentInfo */
+require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Index
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Index_Writer
+{
+    /**
+     * @todo Implement segment merger
+     * @todo Implement mergeFactor, minMergeDocs, maxMergeDocs usage.
+     * @todo Implement Analyzer substitution
+     * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
+     *       temporary index files
+     * @todo Directory lock processing
+     */
+
+    /**
+     * File system adapter.
+     *
+     * @var Zend_Search_Lucene_Storage_Directory
+     */
+    private $_directory = null;
+
+
+    /**
+     * Index version
+     * Counts how often the index has been changed by adding or deleting docs
+     *
+     * @var integer
+     */
+    private $_version;
+
+    /**
+     * Segment name counter.
+     * Used to name new segments .
+     *
+     * @var integer
+     */
+    private $_segmentNameCounter;
+
+    /**
+     * Number of the segments in the index
+     *
+     * @var inteher
+     */
+    private $_segments;
+
+    /**
+     * Determines how often segment indices
+     * are merged by addDocument().
+     *
+     * @var integer
+     */
+    public $mergeFactor;
+
+    /**
+     * Determines the minimal number of documents required before
+     * the buffered in-memory documents are merging and a new Segment
+     * is created.
+     *
+     * @var integer
+     */
+    public $minMergeDocs;
+
+    /**
+     * Determines the largest number of documents ever merged by addDocument().
+     *
+     * @var integer
+     */
+    public $maxMergeDocs;
+
+    /**
+     * List of the segments, created by index writer
+     * Array of Zend_Search_Lucene_Index_SegmentInfo objects
+     *
+     * @var array
+     */
+    private $_newSegments;
+
+    /**
+     * Current segment to add documents
+     *
+     * @var Zend_Search_Lucene_Index_SegmentWriter
+     */
+    private $_currentSegment;
+
+    /**
+     * List of indexfiles extensions
+     *
+     * @var array
+     */
+    private static $_indexExtensions = array('.cfs' => '.cfs',
+                                             '.fnm' => '.fnm',
+                                             '.fdx' => '.fdx',
+                                             '.fdt' => '.fdt',
+                                             '.tis' => '.tis',
+                                             '.tii' => '.tii',
+                                             '.frq' => '.frq',
+                                             '.prx' => '.prx',
+                                             '.tvx' => '.tvx',
+                                             '.tvd' => '.tvd',
+                                             '.tvf' => '.tvf',
+                                             '.del' => '.del'  );
+
+    /**
+     * Opens the index for writing
+     *
+     * IndexWriter constructor needs Directory as a parameter. It should be
+     * a string with a path to the index folder or a Directory object.
+     * Second constructor parameter create is optional - true to create the
+     * index or overwrite the existing one.
+     *
+     * @param Zend_Search_Lucene_Storage_Directory $directory
+     * @param boolean $create
+     */
+    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $create = false)
+    {
+        $this->_directory = $directory;
+
+        if ($create) {
+            foreach ($this->_directory->fileList() as $file) {
+                if ($file == 'deletable' ||
+                    $file == 'segments'  ||
+                    isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
+                    preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
+                        $this->_directory->deleteFile($file);
+                    }
+            }
+            $segmentsFile = $this->_directory->createFile('segments');
+            $segmentsFile->writeInt((int)0xFFFFFFFF);
+            // write version
+            $segmentsFile->writeLong(0);
+            // write name counter
+            $segmentsFile->writeInt(0);
+            // write segment counter
+            $segmentsFile->writeInt(0);
+
+            $deletableFile = $this->_directory->createFile('deletable');
+            // write counter
+            $deletableFile->writeInt(0);
+
+            $this->_version            = 0;
+            $this->_segmentNameCounter = 0;
+            $this->_segments           = 0;
+        } else {
+            $segmentsFile = $this->_directory->getFileObject('segments');
+            $format = $segmentsFile->readInt();
+            if ($format != (int)0xFFFFFFFF) {
+                throw new Zend_Search_Lucene_Exception('Wrong segments file format');
+            }
+
+            // read version
+            $this->_version            = $segmentsFile->readLong();
+            // read counter
+            $this->_segmentNameCounter = $segmentsFile->readInt();
+            // read segment counter
+            $this->_segments           = $segmentsFile->readInt();
+        }
+
+        $this->_newSegments = array();
+        $this->_currentSegment = null;
+    }
+
+    /**
+     * Adds a document to this index.
+     *
+     * @param Zend_Search_Lucene_Document $document
+     */
+    public function addDocument(Zend_Search_Lucene_Document $document)
+    {
+        if ($this->_currentSegment === null) {
+            $this->_currentSegment =
+                new Zend_Search_Lucene_Index_SegmentWriter($this->_directory, $this->_newSegmentName());
+        }
+        $this->_currentSegment->addDocument($document);
+        $this->_version++;
+    }
+
+
+
+    /**
+     * Update segments file by adding current segment to a list
+     * @todo !!!!!Finish the implementation
+     *
+     * @throws Zend_Search_Lucene_Exception
+     */
+    private function _updateSegments()
+    {
+        $segmentsFile   = $this->_directory->getFileObject('segments');
+        $newSegmentFile = $this->_directory->createFile('segments.new');
+
+        $newSegmentFile->writeInt((int)0xFFFFFFFF);
+        $newSegmentFile->writeLong($this->_version);
+        $newSegmentFile->writeInt($this->_segmentNameCounter);
+
+        $this->_segments += count($this->_newSegments);
+        $newSegmentFile->writeInt($this->_segments);
+
+        $segmentsFile->seek(20);
+        $newSegmentFile->writeBytes($segmentsFile->readBytes($this->_directory->fileLength('segments') - 20));
+
+        foreach ($this->_newSegments as $segmentName => $segmentInfo) {
+            $newSegmentFile->writeString($segmentName);
+            $newSegmentFile->writeInt($segmentInfo->count());
+        }
+
+        $this->_directory->renameFile('segments.new', 'segments');
+    }
+
+
+    /**
+     * Commit current changes
+     * returns array of new segments
+     *
+     * @return array
+     */
+    public function commit()
+    {
+        if ($this->_currentSegment !== null) {
+            $newSegment = $this->_currentSegment->close();
+            if ($newSegment !== null) {
+                $this->_newSegments[$newSegment->getName()] = $newSegment;
+            }
+            $this->_currentSegment = null;
+        }
+
+        if (count($this->_newSegments) != 0) {
+            $this->_updateSegments();
+        }
+
+        $result = $this->_newSegments;
+        $this->_newSegments = array();
+
+        return $result;
+    }
+
+
+    /**
+     * Merges the provided indexes into this index.
+     *
+     * @param array $readers
+     * @return void
+     */
+    public function addIndexes($readers)
+    {
+        /**
+         * @todo implementation
+         */
+    }
+
+
+    /**
+     * Returns the number of documents currently in this index.
+     *
+     * @return integer
+     */
+    public function docCount($readers)
+    {
+        /**
+         * @todo implementation
+         */
+    }
+
+
+    /**
+     * Flushes all changes to an index and closes all associated files.
+     *
+     */
+    public function close()
+    {
+        /**
+         * @todo implementation
+         */
+    }
+
+
+    /**
+     * Merges all segments together into a single segment, optimizing
+     * an index for search.
+     *
+     * return void
+     */
+    public function optimize()
+    {
+        /**
+         * @todo implementation
+         */
+    }
+
+    /**
+     * Get name for new segment
+     *
+     * @return string
+     */
+    private function _newSegmentName()
+    {
+        return '_' . base_convert($this->_segmentNameCounter++, 10, 36);
+    }
+
+}
diff --git a/search/Zend/Search/Lucene/Search/Query.php b/search/Zend/Search/Lucene/Search/Query.php
new file mode 100644 (file)
index 0000000..bf28497
--- /dev/null
@@ -0,0 +1,100 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Search_Query
+{
+
+    /**
+     * query boost factor
+     *
+     * @var float
+     */
+    private $_boost = 1.0;
+
+    /**
+     * Query weight
+     *
+     * @var Zend_Search_Lucene_Search_Weight
+     */
+    protected $_weight;
+
+
+    /**
+     * Gets the boost for this clause.  Documents matching
+     * this clause will (in addition to the normal weightings) have their score
+     * multiplied by boost.   The boost is 1.0 by default.
+     *
+     * @return float
+     */
+    public function getBoost()
+    {
+        return $this->_boost;
+    }
+
+    /**
+     * Sets the boost for this query clause to $boost.
+     *
+     * @param float $boost
+     */
+    public function setBoost($boost)
+    {
+        $this->_boost = $boost;
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    abstract public function score($docId, $reader);
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    abstract protected function _createWeight($reader);
+
+    /**
+     * Constructs an initializes a Weight for a query.
+     *
+     * @param Zend_Search_Lucene $reader
+     */
+    protected function _initWeight($reader)
+    {
+        $this->_weight = $this->_createWeight($reader);
+        $sum = $this->_weight->sumOfSquaredWeights();
+        $queryNorm = $reader->getSimilarity()->queryNorm($sum);
+        $this->_weight->normalize($queryNorm);
+    }
+
+}
\ No newline at end of file
diff --git a/search/Zend/Search/Lucene/Search/Query/MultiTerm.php b/search/Zend/Search/Lucene/Search/Query/MultiTerm.php
new file mode 100644 (file)
index 0000000..d3ec761
--- /dev/null
@@ -0,0 +1,439 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once 'Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_MultiTerm */
+require_once 'Zend/Search/Lucene/Search/Weight/MultiTerm.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_MultiTerm extends Zend_Search_Lucene_Search_Query
+{
+
+    /**
+     * Terms to find.
+     * Array of Zend_Search_Lucene_Index_Term
+     *
+     * @var array
+     */
+    private $_terms = array();
+
+    /**
+     * Term signs.
+     * If true then term is required.
+     * If false then term is prohibited.
+     * If null then term is neither prohibited, nor required
+     *
+     * If array is null then all terms are required
+     *
+     * @var array
+     */
+
+    private $_signs = array();
+
+    /**
+     * Result vector.
+     * Bitset or array of document IDs
+     * (depending from Bitset extension availability).
+     *
+     * @var mixed
+     */
+    private $_resVector = null;
+
+    /**
+     * Terms positions vectors.
+     * Array of Arrays:
+     * term1Id => (docId => array( pos1, pos2, ... ), ...)
+     * term2Id => (docId => array( pos1, pos2, ... ), ...)
+     *
+     * @var array
+     */
+    private $_termsPositions = array();
+
+
+    /**
+     * A score factor based on the fraction of all query terms
+     * that a document contains.
+     * float for conjunction queries
+     * array of float for non conjunction queries
+     *
+     * @var mixed
+     */
+    private $_coord = null;
+
+
+    /**
+     * Terms weights
+     * array of Zend_Search_Lucene_Search_Weight
+     *
+     * @var array
+     */
+    private $_weights = array();
+
+
+    /**
+     * Class constructor.  Create a new multi-term query object.
+     *
+     * @param array $terms    Array of Zend_Search_Lucene_Index_Term objects
+     * @param array $signs    Array of signs.  Sign is boolean|null.
+     * @return void
+     */
+    public function __construct($terms = null, $signs = null)
+    {
+        /**
+         * @todo Check contents of $terms and $signs before adding them.
+         */
+        if (is_array($terms)) {
+            $this->_terms = $terms;
+
+            $this->_signs = null;
+            // Check if all terms are required
+            if (is_array($signs)) {
+                foreach ($signs as $sign ) {
+                    if ($sign !== true) {
+                        $this->_signs = $signs;
+                        continue;
+                    }
+                }
+            }
+        }
+    }
+
+
+    /**
+     * Add a $term (Zend_Search_Lucene_Index_Term) to this query.
+     *
+     * The sign is specified as:
+     *     TRUE  - term is required
+     *     FALSE - term is prohibited
+     *     NULL  - term is neither prohibited, nor required
+     *
+     * @param  Zend_Search_Lucene_Index_Term $term
+     * @param  boolean|null $sign
+     * @return void
+     */
+    public function addTerm(Zend_Search_Lucene_Index_Term $term, $sign=null) {
+        $this->_terms[] = $term;
+
+        /**
+         * @todo This is not good.  Sometimes $this->_signs is an array, sometimes
+         * it is null, even when there are terms.  It will be changed so that
+         * it is always an array.
+         */
+        if ($this->_signs === null) {
+            if ($sign !== null) {
+                $this->_signs = array();
+                foreach ($this->_terms as $term) {
+                    $this->_signs[] = null;
+                }
+                $this->_signs[] = $sign;
+            }
+        } else {
+            $this->_signs[] = $sign;
+        }
+    }
+
+
+    /**
+     * Returns query term
+     *
+     * @return array
+     */
+    public function getTerms()
+    {
+        return $this->_terms;
+    }
+
+
+    /**
+     * Return terms signs
+     *
+     * @return array
+     */
+    public function getSigns()
+    {
+        return $this->_signs;
+    }
+
+
+    /**
+     * Set weight for specified term
+     *
+     * @param integer $num
+     * @param Zend_Search_Lucene_Search_Weight_Term $weight
+     */
+    public function setWeight($num, $weight)
+    {
+        $this->_weights[$num] = $weight;
+    }
+
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    protected function _createWeight($reader)
+    {
+        return new Zend_Search_Lucene_Search_Weight_MultiTerm($this, $reader);
+    }
+
+
+    /**
+     * Calculate result vector for Conjunction query
+     * (like '+something +another')
+     *
+     * @param Zend_Search_Lucene $reader
+     */
+    private function _calculateConjunctionResult($reader)
+    {
+        if (extension_loaded('bitset')) {
+            foreach( $this->_terms as $termId=>$term ) {
+                if($this->_resVector === null) {
+                    $this->_resVector = bitset_from_array($reader->termDocs($term));
+                } else {
+                    $this->_resVector = bitset_intersection(
+                                $this->_resVector,
+                                bitset_from_array($reader->termDocs($term)) );
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+        } else {
+            foreach( $this->_terms as $termId=>$term ) {
+                if($this->_resVector === null) {
+                    $this->_resVector = array_flip($reader->termDocs($term));
+                } else {
+                    $termDocs = array_flip($reader->termDocs($term));
+                    foreach($this->_resVector as $key=>$value) {
+                        if (!isset( $termDocs[$key] )) {
+                            unset( $this->_resVector[$key] );
+                        }
+                    }
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+        }
+    }
+
+
+    /**
+     * Calculate result vector for non Conjunction query
+     * (like '+something -another')
+     *
+     * @param Zend_Search_Lucene $reader
+     */
+    private function _calculateNonConjunctionResult($reader)
+    {
+        if (extension_loaded('bitset')) {
+            $required   = null;
+            $neither    = bitset_empty();
+            $prohibited = bitset_empty();
+
+            foreach ($this->_terms as $termId => $term) {
+                $termDocs = bitset_from_array($reader->termDocs($term));
+
+                if ($this->_signs[$termId] === true) {
+                    // required
+                    if ($required !== null) {
+                        $required = bitset_intersection($required, $termDocs);
+                    } else {
+                        $required = $termDocs;
+                    }
+                } elseif ($this->_signs[$termId] === false) {
+                    // prohibited
+                    $prohibited = bitset_union($prohibited, $termDocs);
+                } else {
+                    // neither required, nor prohibited
+                    $neither = bitset_union($neither, $termDocs);
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+
+            if ($required === null) {
+                $required = $neither;
+            }
+            $this->_resVector = bitset_intersection( $required,
+                                                     bitset_invert($prohibited, $reader->count()) );
+        } else {
+            $required   = null;
+            $neither    = array();
+            $prohibited = array();
+
+            foreach ($this->_terms as $termId => $term) {
+                $termDocs = array_flip($reader->termDocs($term));
+
+                if ($this->_signs[$termId] === true) {
+                    // required
+                    if ($required !== null) {
+                        // substitute for bitset_intersection
+                        foreach ($required as $key => $value) {
+                            if (!isset( $termDocs[$key] )) {
+                                unset($required[$key]);
+                            }
+                        }
+                    } else {
+                        $required = $termDocs;
+                    }
+                } elseif ($this->_signs[$termId] === false) {
+                    // prohibited
+                    // substitute for bitset_union
+                    foreach ($termDocs as $key => $value) {
+                        $prohibited[$key] = $value;
+                    }
+                } else {
+                    // neither required, nor prohibited
+                    // substitute for bitset_union
+                    foreach ($termDocs as $key => $value) {
+                        $neither[$key] = $value;
+                    }
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+
+            if ($required === null) {
+                $required = $neither;
+            }
+
+            foreach ($required as $key=>$value) {
+                if (isset( $prohibited[$key] )) {
+                    unset($required[$key]);
+                }
+            }
+            $this->_resVector = $required;
+        }
+    }
+
+
+    /**
+     * Score calculator for conjunction queries (all terms are required)
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function _conjunctionScore($docId, $reader)
+    {
+        if ($this->_coord === null) {
+            $this->_coord = $reader->getSimilarity()->coord(count($this->_terms),
+                                                            count($this->_terms) );
+        }
+
+        $score = 0.0;
+
+        foreach ($this->_terms as $termId=>$term) {
+            $score += $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+                      $this->_weights[$termId]->getValue() *
+                      $reader->norm($docId, $term->field);
+        }
+
+        return $score * $this->_coord;
+    }
+
+
+    /**
+     * Score calculator for non conjunction queries (not all terms are required)
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function _nonConjunctionScore($docId, $reader)
+    {
+        if ($this->_coord === null) {
+            $this->_coord = array();
+
+            $maxCoord = 0;
+            foreach ($this->_signs as $sign) {
+                if ($sign !== false /* not prohibited */) {
+                    $maxCoord++;
+                }
+            }
+
+            for ($count = 0; $count <= $maxCoord; $count++) {
+                $this->_coord[$count] = $reader->getSimilarity()->coord($count, $maxCoord);
+            }
+        }
+
+        $score = 0.0;
+        $matchedTerms = 0;
+        foreach ($this->_terms as $termId=>$term) {
+            // Check if term is
+            if ($this->_signs[$termId] !== false &&            // not prohibited
+                isset($this->_termsPositions[$termId][$docId]) // matched
+               ) {
+                $matchedTerms++;
+                $score +=
+                      $reader->getSimilarity()->tf(count($this->_termsPositions[$termId][$docId]) ) *
+                      $this->_weights[$termId]->getValue() *
+                      $reader->norm($docId, $term->field);
+            }
+        }
+
+        return $score * $this->_coord[$matchedTerms];
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function score($docId, $reader)
+    {
+        if($this->_resVector === null) {
+            if ($this->_signs === null) {
+                $this->_calculateConjunctionResult($reader);
+            } else {
+                $this->_calculateNonConjunctionResult($reader);
+            }
+
+            $this->_initWeight($reader);
+        }
+
+        if ( (extension_loaded('bitset')) ?
+                bitset_in($this->_resVector, $docId) :
+                isset($this->_resVector[$docId])  ) {
+            if ($this->_signs === null) {
+                return $this->_conjunctionScore($docId, $reader);
+            } else {
+                return $this->_nonConjunctionScore($docId, $reader);
+            }
+        } else {
+            return 0;
+        }
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Query/Phrase.php b/search/Zend/Search/Lucene/Search/Query/Phrase.php
new file mode 100644 (file)
index 0000000..b1d40b4
--- /dev/null
@@ -0,0 +1,426 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Zend_Search_Lucene_Search_Query
+ */
+require_once 'Zend/Search/Lucene/Search/Query.php';
+
+/**
+ * Zend_Search_Lucene_Search_Weight_MultiTerm
+ */
+require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
+
+
+/**
+ * A Query that matches documents containing a particular sequence of terms.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
+{
+    /**
+     * Terms to find.
+     * Array of Zend_Search_Lucene_Index_Term objects.
+     *
+     * @var array
+     */
+    private $_terms;
+
+    /**
+     * Term positions (relative positions of terms within the phrase).
+     * Array of integers
+     *
+     * @var array
+     */
+    private $_offsets;
+
+    /**
+     * Sets the number of other words permitted between words in query phrase.
+     * If zero, then this is an exact phrase search.  For larger values this works
+     * like a WITHIN or NEAR operator.
+     *
+     * The slop is in fact an edit-distance, where the units correspond to
+     * moves of terms in the query phrase out of position.  For example, to switch
+     * the order of two words requires two moves (the first move places the words
+     * atop one another), so to permit re-orderings of phrases, the slop must be
+     * at least two.
+     * More exact matches are scored higher than sloppier matches, thus search
+     * results are sorted by exactness.
+     *
+     * The slop is zero by default, requiring exact matches.
+     *
+     * @var unknown_type
+     */
+    private $_slop;
+
+    /**
+     * Result vector.
+     * Bitset or array of document IDs
+     * (depending from Bitset extension availability).
+     *
+     * @var mixed
+     */
+    private $_resVector = null;
+
+    /**
+     * Terms positions vectors.
+     * Array of Arrays:
+     * term1Id => (docId => array( pos1, pos2, ... ), ...)
+     * term2Id => (docId => array( pos1, pos2, ... ), ...)
+     *
+     * @var array
+     */
+    private $_termsPositions = array();
+
+    /**
+     * Class constructor.  Create a new prase query.
+     *
+     * @param string $field    Field to search.
+     * @param array  $terms    Terms to search Array of strings.
+     * @param array  $offsets  Relative term positions. Array of integers.
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function __construct($terms = null, $offsets = null, $field = null)
+    {
+        $this->_slop = 0;
+
+        if (is_array($terms)) {
+            $this->_terms = array();
+            foreach ($terms as $termId => $termText) {
+                $this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
+                                                            new Zend_Search_Lucene_Index_Term($termText);
+            }
+        } else if ($terms === null) {
+            $this->_terms = array();
+        } else {
+            throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
+        }
+
+        if (is_array($offsets)) {
+            if (count($this->_terms) != count($offsets)) {
+                throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
+            }
+            $this->_offsets = $offsets;
+        } else if ($offsets === null) {
+            $this->_offsets = array();
+            foreach ($this->_terms as $termId => $term) {
+                $position = count($this->_offsets);
+                $this->_offsets[$termId] = $position;
+            }
+        } else {
+            throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
+        }
+    }
+
+    /**
+     * Set slop
+     *
+     * @param integer $slop
+     */
+    public function setSlop($slop)
+    {
+        $this->_slop = $slop;
+    }
+
+
+    /**
+     * Get slop
+     *
+     * @return integer
+     */
+    public function getSlop()
+    {
+        return $this->_slop;
+    }
+
+
+    /**
+     * Adds a term to the end of the query phrase.
+     * The relative position of the term is specified explicitly or the one immediately
+     * after the last term added.
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param integer $position
+     */
+    public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
+        if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
+            throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
+                                                   $term->field . ':' . $term->text);
+        }
+
+        $this->_terms[] = $term;
+        if ($position !== null) {
+            $this->_offsets[] = $position;
+        } else if (count($this->_offsets) != 0) {
+            $this->_offsets[] = end($this->_offsets) + 1;
+        } else {
+            $this->_offsets[] = 0;
+        }
+    }
+
+
+    /**
+     * Returns query term
+     *
+     * @return array
+     */
+    public function getTerms()
+    {
+        return $this->_terms;
+    }
+
+
+    /**
+     * Set weight for specified term
+     *
+     * @param integer $num
+     * @param Zend_Search_Lucene_Search_Weight_Term $weight
+     */
+    public function setWeight($num, $weight)
+    {
+        $this->_weights[$num] = $weight;
+    }
+
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    protected function _createWeight($reader)
+    {
+        return new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
+    }
+
+
+    /**
+     * Calculate result vector
+     *
+     * @param Zend_Search_Lucene $reader
+     */
+    private function _calculateResult($reader)
+    {
+        if (extension_loaded('bitset')) {
+            foreach( $this->_terms as $termId=>$term ) {
+                if($this->_resVector === null) {
+                    $this->_resVector = bitset_from_array($reader->termDocs($term));
+                } else {
+                    $this->_resVector = bitset_intersection(
+                                $this->_resVector,
+                                bitset_from_array($reader->termDocs($term)) );
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+        } else {
+            foreach( $this->_terms as $termId=>$term ) {
+                if($this->_resVector === null) {
+                    $this->_resVector = array_flip($reader->termDocs($term));
+                } else {
+                    $termDocs = array_flip($reader->termDocs($term));
+                    foreach($this->_resVector as $key=>$value) {
+                        if (!isset( $termDocs[$key] )) {
+                            unset( $this->_resVector[$key] );
+                        }
+                    }
+                }
+
+                $this->_termsPositions[$termId] = $reader->termPositions($term);
+            }
+        }
+    }
+
+
+    /**
+     * Score calculator for exact phrase queries (terms sequence is fixed)
+     *
+     * @param integer $docId
+     * @return float
+     */
+    public function _exactPhraseFreq($docId)
+    {
+        $freq = 0;
+
+        // Term Id with lowest cardinality
+        $lowCardTermId = null;
+
+        // Calculate $lowCardTermId
+        foreach ($this->_terms as $termId => $term) {
+            if ($lowCardTermId === null ||
+                count($this->_termsPositions[$termId][$docId]) <
+                count($this->_termsPositions[$lowCardTermId][$docId]) ) {
+                    $lowCardTermId = $termId;
+                }
+        }
+
+        // Walk through positions of the term with lowest cardinality
+        foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
+            // We expect phrase to be found
+            $freq++;
+
+            // Walk through other terms
+            foreach ($this->_terms as $termId => $term) {
+                if ($termId != $lowCardTermId) {
+                    $expectedPosition = $lowCardPos +
+                                            ($this->_offsets[$termId] -
+                                             $this->_offsets[$lowCardTermId]);
+
+                    if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
+                        $freq--;  // Phrase wasn't found.
+                        break;
+                    }
+                }
+            }
+        }
+
+        return $freq;
+    }
+
+    /**
+     * Score calculator for sloppy phrase queries (terms sequence is fixed)
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function _sloppyPhraseFreq($docId, Zend_Search_Lucene $reader)
+    {
+        $freq = 0;
+
+        $phraseQueue = array();
+        $phraseQueue[0] = array(); // empty phrase
+        $lastTerm = null;
+
+        // Walk through the terms to create phrases.
+        foreach ($this->_terms as $termId => $term) {
+            $queueSize = count($phraseQueue);
+            $firstPass = true;
+
+            // Walk through the term positions.
+            // Each term position produces a set of phrases.
+            foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
+                if ($firstPass) {
+                    for ($count = 0; $count < $queueSize; $count++) {
+                        $phraseQueue[$count][$termId] = $termPosition;
+                    }
+                } else {
+                    for ($count = 0; $count < $queueSize; $count++) {
+                        if ($lastTerm !== null &&
+                            abs( $termPosition - $phraseQueue[$count][$lastTerm] -
+                                 ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
+                            continue;
+                        }
+
+                        $newPhraseId = count($phraseQueue);
+                        $phraseQueue[$newPhraseId]          = $phraseQueue[$count];
+                        $phraseQueue[$newPhraseId][$termId] = $termPosition;
+                    }
+
+                }
+
+                $firstPass = false;
+            }
+            $lastTerm = $termId;
+        }
+
+
+        foreach ($phraseQueue as $phrasePos) {
+            $minDistance = null;
+
+            for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
+                $distance = 0;
+                $start = reset($phrasePos) - reset($this->_offsets) + $shift;
+
+                foreach ($this->_terms as $termId => $term) {
+                    $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
+
+                    if($distance > $this->_slop) {
+                        break;
+                    }
+                }
+
+                if ($minDistance === null || $distance < $minDistance) {
+                    $minDistance = $distance;
+                }
+            }
+
+            if ($minDistance <= $this->_slop) {
+                $freq += $reader->getSimilarity()->sloppyFreq($minDistance);
+            }
+        }
+
+        return $freq;
+    }
+
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function score($docId, $reader)
+    {
+        // optimize zero-term case
+        if (count($this->_terms) == 0) {
+            return 0;
+        }
+
+        if($this->_resVector === null) {
+            $this->_calculateResult($reader);
+            $this->_initWeight($reader);
+        }
+
+        if ( (extension_loaded('bitset')) ?
+                bitset_in($this->_resVector, $docId) :
+                isset($this->_resVector[$docId])  ) {
+            if ($this->_slop == 0) {
+                $freq = $this->_exactPhraseFreq($docId);
+            } else {
+                $freq = $this->_sloppyPhraseFreq($docId, $reader);
+            }
+
+/*
+            return $reader->getSimilarity()->tf($freq) *
+                   $this->_weight->getValue() *
+                   $reader->norm($docId, reset($this->_terms)->field);
+*/
+            if ($freq != 0) {
+                $tf = $reader->getSimilarity()->tf($freq);
+                $weight = $this->_weight->getValue();
+                $norm = $reader->norm($docId, reset($this->_terms)->field);
+
+                return $tf*$weight*$norm;
+            }
+        } else {
+            return 0;
+        }
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Query/Term.php b/search/Zend/Search/Lucene/Search/Query/Term.php
new file mode 100644 (file)
index 0000000..b0baf0f
--- /dev/null
@@ -0,0 +1,128 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Query */
+require_once 'Zend/Search/Lucene/Search/Query.php';
+
+/** Zend_Search_Lucene_Search_Weight_Term */
+require_once 'Zend/Search/Lucene/Search/Weight/Term.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Query_Term extends Zend_Search_Lucene_Search_Query
+{
+    /**
+     * Term to find.
+     *
+     * @var Zend_Search_Lucene_Index_Term
+     */
+    private $_term;
+
+    /**
+     * Term sign.
+     * If true then term is required
+     * If false then term is prohibited.
+     *
+     * @var bool
+     */
+    private $_sign;
+
+    /**
+     * Documents vector.
+     * Bitset or array of document IDs
+     * (depending from Bitset extension availability).
+     *
+     * @var mixed
+     */
+    private $_docVector = null;
+
+    /**
+     * Term positions vector.
+     * Array: docId => array( pos1, pos2, ... )
+     *
+     * @var array
+     */
+    private $_termPositions;
+
+
+    /**
+     * Zend_Search_Lucene_Search_Query_Term constructor
+     *
+     * @param Zend_Search_Lucene_Index_Term $term
+     * @param boolean $sign
+     */
+    public function __construct( $term, $sign = true )
+    {
+        $this->_term = $term;
+        $this->_sign = $sign;
+    }
+
+
+    /**
+     * Constructs an appropriate Weight implementation for this query.
+     *
+     * @param Zend_Search_Lucene $reader
+     * @return Zend_Search_Lucene_Search_Weight
+     */
+    protected function _createWeight($reader)
+    {
+        return new Zend_Search_Lucene_Search_Weight_Term($this->_term, $this, $reader);
+    }
+
+    /**
+     * Score specified document
+     *
+     * @param integer $docId
+     * @param Zend_Search_Lucene $reader
+     * @return float
+     */
+    public function score( $docId, $reader )
+    {
+        if($this->_docVector===null) {
+            if (extension_loaded('bitset')) {
+                $this->_docVector = bitset_from_array( $reader->termDocs($this->_term) );
+            } else {
+                $this->_docVector = array_flip($reader->termDocs($this->_term));
+            }
+
+            $this->_termPositions = $reader->termPositions($this->_term);
+            $this->_initWeight($reader);
+        }
+
+        $match = extension_loaded('bitset') ?  bitset_in($this->_docVector, $docId) :
+                                               isset($this->_docVector[$docId]);
+        if ($this->_sign && $match) {
+            return $reader->getSimilarity()->tf(count($this->_termPositions[$docId]) ) *
+                   $this->_weight->getValue() *
+                   $reader->norm($docId, $this->_term->field);
+        } else {
+            return 0;
+        }
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/QueryHit.php b/search/Zend/Search/Lucene/Search/QueryHit.php
new file mode 100644 (file)
index 0000000..19ab381
--- /dev/null
@@ -0,0 +1,108 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryHit
+{
+    /**
+     * Object handle of the index
+     * @var Zend_Search_Lucene
+     */
+    protected $_index = null;
+
+    /**
+     * Object handle of the document associated with this hit
+     * @var Zend_Search_Lucene_Document
+     */
+    protected $_document = null;
+
+    /**
+     * Number of the document in the index
+     * @var integer
+     */
+    public $id;
+
+    /**
+     * Score of the hit
+     * @var float
+     */
+    public $score;
+
+
+    /**
+     * Constructor - pass object handle of Zend_Search_Lucene index that produced
+     * the hit so the document can be retrieved easily from the hit.
+     *
+     * @param Zend_Search_Lucene $index
+     */
+
+    public function __construct(Zend_Search_Lucene $index)
+    {
+        $this->_index = $index;
+    }
+
+
+    /**
+     * Convenience function for getting fields from the document
+     * associated with this hit.
+     *
+     * @param string $offset
+     * @return string
+     */
+    public function __get($offset)
+    {
+        return $this->getDocument()->getFieldValue($offset);
+    }
+
+
+    /**
+     * Return the document object for this hit
+     *
+     * @return Zend_Search_Lucene_Document
+     */
+    public function getDocument()
+    {
+        if (!$this->_document instanceof Zend_Search_Lucene_Document) {
+            $this->_document = $this->_index->getDocument($this->id);
+        }
+
+        return $this->_document;
+    }
+
+
+    /**
+     * Return the index object for this hit
+     *
+     * @return Zend_Search_Lucene
+     */
+    public function getIndex()
+    {
+        return $this->_index;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/QueryParser.php b/search/Zend/Search/Lucene/Search/QueryParser.php
new file mode 100644 (file)
index 0000000..63b6497
--- /dev/null
@@ -0,0 +1,142 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_QueryTokenizer */
+require_once 'Zend/Search/Lucene/Search/QueryTokenizer.php';
+
+/** Zend_Search_Lucene_Index_Term */
+require_once 'Zend/Search/Lucene/Index/Term.php';
+
+/** Zend_Search_Lucene_Search_Query_Term */
+require_once 'Zend/Search/Lucene/Search/Query/Term.php';
+
+/** Zend_Search_Lucene_Search_Query_MultiTerm */
+require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
+
+/** Zend_Search_Lucene_Search_Query_Phrase */
+require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryParser
+{
+
+    /**
+     * Parses a query string, returning a Zend_Search_Lucene_Search_Query
+     *
+     * @param string $strQuery
+     * @return Zend_Search_Lucene_Search_Query
+     */
+    static public function parse($strQuery)
+    {
+        $tokens = new Zend_Search_Lucene_Search_QueryTokenizer($strQuery);
+
+        // Empty query
+        if (!$tokens->count()) {
+            throw new Zend_Search_Lucene_Exception('Syntax error: query string cannot be empty.');
+        }
+
+        // Term query
+        if ($tokens->count() == 1) {
+            if ($tokens->current()->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
+                return new Zend_Search_Lucene_Search_Query_Term(new Zend_Search_Lucene_Index_Term($tokens->current()->text, 'contents'));
+            } else {
+                throw new Zend_Search_Lucene_Exception('Syntax error: query string must contain at least one word.');
+            }
+        }
+
+
+        /**
+         * MultiTerm Query
+         *
+         * Process each token that was returned by the tokenizer.
+         */
+        $terms = array();
+        $signs = array();
+        $prevToken = null;
+        $openBrackets = 0;
+        $field = 'contents';
+        foreach ($tokens as $token) {
+            switch ($token->type) {
+                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD:
+                    $terms[] = new Zend_Search_Lucene_Index_Term($token->text, $field);
+                    $field = 'contents';
+                    if ($prevToken !== null &&
+                        $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
+                            if ($prevToken->text == "+") {
+                                $signs[] = true;
+                            } else {
+                                $signs[] = false;
+                            }
+                    } else {
+                        $signs[] = null;
+                    }
+                    break;
+                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN:
+                    if ($prevToken !== null &&
+                        $prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
+                            throw new Zend_Search_Lucene_Exception('Syntax error: sign operator must be followed by a word.');
+                    }
+                    break;
+                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD:
+                    $field = $token->text;
+                    // let previous token to be signed as next $prevToken
+                    $token = $prevToken;
+                    break;
+                case Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET:
+                    $token->text=='(' ? $openBrackets++ : $openBrackets--;
+            }
+            $prevToken = $token;
+        }
+
+        // Finish up parsing: check the last token in the query for an opening sign or parenthesis.
+        if ($prevToken->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN) {
+            throw new Zend_Search_Lucene_Exception('Syntax Error: sign operator must be followed by a word.');
+        }
+
+        // Finish up parsing: check that every opening bracket has a matching closing bracket.
+        if ($openBrackets != 0) {
+            throw new Zend_Search_Lucene_Exception('Syntax Error: mismatched parentheses, every opening must have closing.');
+        }
+
+        switch (count($terms)) {
+            case 0:
+                throw new Zend_Search_Lucene_Exception('Syntax error: bad term count.');
+            case 1:
+                return new Zend_Search_Lucene_Search_Query_Term($terms[0],$signs[0] !== false);
+            default:
+                return new Zend_Search_Lucene_Search_Query_MultiTerm($terms,$signs);
+        }
+    }
+
+}
+
diff --git a/search/Zend/Search/Lucene/Search/QueryToken.php b/search/Zend/Search/Lucene/Search/QueryToken.php
new file mode 100644 (file)
index 0000000..56d3522
--- /dev/null
@@ -0,0 +1,104 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryToken
+{
+    /**
+     * Token type Word.
+     */
+    const TOKTYPE_WORD = 0;
+
+    /**
+     * Token type Field.
+     * Field indicator in 'field:word' pair
+     */
+    const TOKTYPE_FIELD = 1;
+
+    /**
+     * Token type Sign.
+     * '+' (required) or '-' (absentee) sign
+     */
+    const TOKTYPE_SIGN = 2;
+
+    /**
+     * Token type Bracket.
+     * '(' or ')'
+     */
+    const TOKTYPE_BRACKET = 3;
+
+
+    /**
+     * Token type.
+     *
+     * @var integer
+     */
+    public $type;
+
+    /**
+     * Token text.
+     *
+     * @var integer
+     */
+    public $text;
+
+
+    /**
+     * IndexReader constructor needs token type and token text as a parameters.
+     *
+     * @param $tokType integer
+     * @param $tokText string
+     */
+    public function __construct($tokType, $tokText)
+    {
+        switch ($tokType) {
+            case self::TOKTYPE_BRACKET:
+                // fall through to the next case
+            case self::TOKTYPE_FIELD:
+                // fall through to the next case
+            case self::TOKTYPE_SIGN:
+                // fall through to the next case
+            case self::TOKTYPE_WORD:
+                break;
+            default:
+                throw new Zend_Search_Lucene_Exception("Unrecognized token type \"$tokType\".");
+        }
+
+        if (!strlen($tokText)) {
+            throw new Zend_Search_Lucene_Exception('Token text must be supplied.');
+        }
+
+        $this->type = $tokType;
+        $this->text = $tokText;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Search/QueryTokenizer.php b/search/Zend/Search/Lucene/Search/QueryTokenizer.php
new file mode 100644 (file)
index 0000000..a59f8a8
--- /dev/null
@@ -0,0 +1,164 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_QueryToken */
+require_once 'Zend/Search/Lucene/Search/QueryToken.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_QueryTokenizer implements Iterator
+{
+    /**
+     * inputString tokens.
+     *
+     * @var array
+     */
+    protected $_tokens = array();
+
+    /**
+     * tokens pointer.
+     *
+     * @var integer
+     */
+    protected $_currToken = 0;
+
+
+    /**
+     * QueryTokenize constructor needs query string as a parameter.
+     *
+     * @param string $inputString
+     */
+    public function __construct($inputString)
+    {
+        if (!strlen($inputString)) {
+            throw new Zend_Search_Lucene_Exception('Cannot tokenize empty query string.');
+        }
+
+        $currentToken = '';
+        for ($count = 0; $count < strlen($inputString); $count++) {
+            if (ctype_alnum( $inputString{$count} )) {
+                $currentToken .= $inputString{$count};
+            } else {
+                // Previous token is finished
+                if (strlen($currentToken)) {
+                    $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD,
+                                                                $currentToken);
+                    $currentToken = '';
+                }
+
+                if ($inputString{$count} == '+' || $inputString{$count} == '-') {
+                    $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_SIGN,
+                                                                $inputString{$count});
+                } elseif ($inputString{$count} == '(' || $inputString{$count} == ')') {
+                    $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_BRACKET,
+                                                                $inputString{$count});
+                } elseif ($inputString{$count} == ':' && $this->count()) {
+                    if ($this->_tokens[count($this->_tokens)-1]->type == Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD) {
+                        $this->_tokens[count($this->_tokens)-1]->type = Zend_Search_Lucene_Search_QueryToken::TOKTYPE_FIELD;
+                    }
+                }
+            }
+        }
+
+        if (strlen($currentToken)) {
+            $this->_tokens[] = new Zend_Search_Lucene_Search_QueryToken(Zend_Search_Lucene_Search_QueryToken::TOKTYPE_WORD, $currentToken);
+        }
+    }
+
+
+    /**
+     * Returns number of tokens
+     *
+     * @return integer
+     */
+    public function count()
+    {
+        return count($this->_tokens);
+    }
+
+
+    /**
+     * Returns TRUE if a token exists at the current position.
+     *
+     * @return boolean
+     */
+    public function valid()
+    {
+        return $this->_currToken < $this->count();
+    }
+
+
+    /**
+     * Resets token stream.
+     *
+     * @return integer
+     */
+    public function rewind()
+    {
+        $this->_currToken = 0;
+    }
+
+
+    /**
+     * Returns the token at the current position or FALSE if
+     * the position does not contain a valid token.
+     *
+     * @return mixed
+     */
+    public function current()
+    {
+        return $this->valid() ? $this->_tokens[$this->_currToken] : false;
+    }
+
+
+    /**
+     * Returns next token
+     *
+     * @return Zend_Search_Lucene_Search_QueryToken
+     */
+    public function next()
+    {
+        return ++$this->_currToken;
+    }
+
+
+    /**
+     * Return the position of the current token.
+     *
+     * @return integer
+     */
+    public function key()
+    {
+        return $this->_currToken;
+    }
+
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Similarity.php b/search/Zend/Search/Lucene/Search/Similarity.php
new file mode 100644 (file)
index 0000000..74ecb1d
--- /dev/null
@@ -0,0 +1,553 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Similarity_Default */
+require_once 'Zend/Search/Lucene/Search/Similarity/Default.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Search_Similarity
+{
+    /**
+     * The Similarity implementation used by default.
+     *
+     * @var Zend_Search_Lucene_Search_Similarity
+     */
+    static private $_defaultImpl;
+
+    /**
+     * Cache of decoded bytes.
+     * Array of floats
+     *
+     * @var array
+     */
+    static private $_normTable = array( 0   => 0.0,
+                                        1   => 5.820766E-10,
+                                        2   => 6.9849193E-10,
+                                        3   => 8.1490725E-10,
+                                        4   => 9.313226E-10,
+                                        5   => 1.1641532E-9,
+                                        6   => 1.3969839E-9,
+                                        7   => 1.6298145E-9,
+                                        8   => 1.8626451E-9,
+                                        9   => 2.3283064E-9,
+                                        10  => 2.7939677E-9,
+                                        11  => 3.259629E-9,
+                                        12  => 3.7252903E-9,
+                                        13  => 4.656613E-9,
+                                        14  => 5.5879354E-9,
+                                        15  => 6.519258E-9,
+                                        16  => 7.4505806E-9,
+                                        17  => 9.313226E-9,
+                                        18  => 1.1175871E-8,
+                                        19  => 1.3038516E-8,
+                                        20  => 1.4901161E-8,
+                                        21  => 1.8626451E-8,
+                                        22  => 2.2351742E-8,
+                                        23  => 2.6077032E-8,
+                                        24  => 2.9802322E-8,
+                                        25  => 3.7252903E-8,
+                                        26  => 4.4703484E-8,
+                                        27  => 5.2154064E-8,
+                                        28  => 5.9604645E-8,
+                                        29  => 7.4505806E-8,
+                                        30  => 8.940697E-8,
+                                        31  => 1.0430813E-7,
+                                        32  => 1.1920929E-7,
+                                        33  => 1.4901161E-7,
+                                        34  => 1.7881393E-7,
+                                        35  => 2.0861626E-7,
+                                        36  => 2.3841858E-7,
+                                        37  => 2.9802322E-7,
+                                        38  => 3.5762787E-7,
+                                        39  => 4.172325E-7,
+                                        40  => 4.7683716E-7,
+                                        41  => 5.9604645E-7,
+                                        42  => 7.1525574E-7,
+                                        43  => 8.34465E-7,
+                                        44  => 9.536743E-7,
+                                        45  => 1.1920929E-6,
+                                        46  => 1.4305115E-6,
+                                        47  => 1.66893E-6,
+                                        48  => 1.9073486E-6,
+                                        49  => 2.3841858E-6,
+                                        50  => 2.861023E-6,
+                                        51  => 3.33786E-6,
+                                        52  => 3.8146973E-6,
+                                        53  => 4.7683716E-6,
+                                        54  => 5.722046E-6,
+                                        55  => 6.67572E-6,
+                                        56  => 7.6293945E-6,
+                                        57  => 9.536743E-6,
+                                        58  => 1.1444092E-5,
+                                        59  => 1.335144E-5,
+                                        60  => 1.5258789E-5,
+                                        61  => 1.9073486E-5,
+                                        62  => 2.2888184E-5,
+                                        63  => 2.670288E-5,
+                                        64  => 3.0517578E-5,
+                                        65  => 3.8146973E-5,
+                                        66  => 4.5776367E-5,
+                                        67  => 5.340576E-5,
+                                        68  => 6.1035156E-5,
+                                        69  => 7.6293945E-5,
+                                        70  => 9.1552734E-5,
+                                        71  => 1.0681152E-4,
+                                        72  => 1.2207031E-4,
+                                        73  => 1.5258789E-4,
+                                        74  => 1.8310547E-4,
+                                        75  => 2.1362305E-4,
+                                        76  => 2.4414062E-4,
+                                        77  => 3.0517578E-4,
+                                        78  => 3.6621094E-4,
+                                        79  => 4.272461E-4,
+                                        80  => 4.8828125E-4,
+                                        81  => 6.1035156E-4,
+                                        82  => 7.324219E-4,
+                                        83  => 8.544922E-4,
+                                        84  => 9.765625E-4,
+                                        85  => 0.0012207031,
+                                        86  => 0.0014648438,
+                                        87  => 0.0017089844,
+                                        88  => 0.001953125,
+                                        89  => 0.0024414062,
+                                        90  => 0.0029296875,
+                                        91  => 0.0034179688,
+                                        92  => 0.00390625,
+                                        93  => 0.0048828125,
+                                        94  => 0.005859375,
+                                        95  => 0.0068359375,
+                                        96  => 0.0078125,
+                                        97  => 0.009765625,
+                                        98  => 0.01171875,
+                                        99  => 0.013671875,
+                                        100 => 0.015625,
+                                        101 => 0.01953125,
+                                        102 => 0.0234375,
+                                        103 => 0.02734375,
+                                        104 => 0.03125,
+                                        105 => 0.0390625,
+                                        106 => 0.046875,
+                                        107 => 0.0546875,
+                                        108 => 0.0625,
+                                        109 => 0.078125,
+                                        110 => 0.09375,
+                                        111 => 0.109375,
+                                        112 => 0.125,
+                                        113 => 0.15625,
+                                        114 => 0.1875,
+                                        115 => 0.21875,
+                                        116 => 0.25,
+                                        117 => 0.3125,
+                                        118 => 0.375,
+                                        119 => 0.4375,
+                                        120 => 0.5,
+                                        121 => 0.625,
+                                        122 => 0.75,
+                                        123 => 0.875,
+                                        124 => 1.0,
+                                        125 => 1.25,
+                                        126 => 1.5,
+                                        127 => 1.75,
+                                        128 => 2.0,
+                                        129 => 2.5,
+                                        130 => 3.0,
+                                        131 => 3.5,
+                                        132 => 4.0,
+                                        133 => 5.0,
+                                        134 => 6.0,
+                                        135 => 7.0,
+                                        136 => 8.0,
+                                        137 => 10.0,
+                                        138 => 12.0,
+                                        139 => 14.0,
+                                        140 => 16.0,
+                                        141 => 20.0,
+                                        142 => 24.0,
+                                        143 => 28.0,
+                                        144 => 32.0,
+                                        145 => 40.0,
+                                        146 => 48.0,
+                                        147 => 56.0,
+                                        148 => 64.0,
+                                        149 => 80.0,
+                                        150 => 96.0,
+                                        151 => 112.0,
+                                        152 => 128.0,
+                                        153 => 160.0,
+                                        154 => 192.0,
+                                        155 => 224.0,
+                                        156 => 256.0,
+                                        157 => 320.0,
+                                        158 => 384.0,
+                                        159 => 448.0,
+                                        160 => 512.0,
+                                        161 => 640.0,
+                                        162 => 768.0,
+                                        163 => 896.0,
+                                        164 => 1024.0,
+                                        165 => 1280.0,
+                                        166 => 1536.0,
+                                        167 => 1792.0,
+                                        168 => 2048.0,
+                                        169 => 2560.0,
+                                        170 => 3072.0,
+                                        171 => 3584.0,
+                                        172 => 4096.0,
+                                        173 => 5120.0,
+                                        174 => 6144.0,
+                                        175 => 7168.0,
+                                        176 => 8192.0,
+                                        177 => 10240.0,
+                                        178 => 12288.0,
+                                        179 => 14336.0,
+                                        180 => 16384.0,
+                                        181 => 20480.0,
+                                        182 => 24576.0,
+                                        183 => 28672.0,
+                                        184 => 32768.0,
+                                        185 => 40960.0,
+                                        186 => 49152.0,
+                                        187 => 57344.0,
+                                        188 => 65536.0,
+                                        189 => 81920.0,
+                                        190 => 98304.0,
+                                        191 => 114688.0,
+                                        192 => 131072.0,
+                                        193 => 163840.0,
+                                        194 => 196608.0,
+                                        195 => 229376.0,
+                                        196 => 262144.0,
+                                        197 => 327680.0,
+                                        198 => 393216.0,
+                                        199 => 458752.0,
+                                        200 => 524288.0,
+                                        201 => 655360.0,
+                                        202 => 786432.0,
+                                        203 => 917504.0,
+                                        204 => 1048576.0,
+                                        205 => 1310720.0,
+                                        206 => 1572864.0,
+                                        207 => 1835008.0,
+                                        208 => 2097152.0,
+                                        209 => 2621440.0,
+                                        210 => 3145728.0,
+                                        211 => 3670016.0,
+                                        212 => 4194304.0,
+                                        213 => 5242880.0,
+                                        214 => 6291456.0,
+                                        215 => 7340032.0,
+                                        216 => 8388608.0,
+                                        217 => 1.048576E7,
+                                        218 => 1.2582912E7,
+                                        219 => 1.4680064E7,
+                                        220 => 1.6777216E7,
+                                        221 => 2.097152E7,
+                                        222 => 2.5165824E7,
+                                        223 => 2.9360128E7,
+                                        224 => 3.3554432E7,
+                                        225 => 4.194304E7,
+                                        226 => 5.0331648E7,
+                                        227 => 5.8720256E7,
+                                        228 => 6.7108864E7,
+                                        229 => 8.388608E7,
+                                        230 => 1.00663296E8,
+                                        231 => 1.17440512E8,
+                                        232 => 1.34217728E8,
+                                        233 => 1.6777216E8,
+                                        234 => 2.01326592E8,
+                                        235 => 2.34881024E8,
+                                        236 => 2.68435456E8,
+                                        237 => 3.3554432E8,
+                                        238 => 4.02653184E8,
+                                        239 => 4.69762048E8,
+                                        240 => 5.3687091E8,
+                                        241 => 6.7108864E8,
+                                        242 => 8.0530637E8,
+                                        243 => 9.395241E8,
+                                        244 => 1.07374182E9,
+                                        245 => 1.34217728E9,
+                                        246 => 1.61061274E9,
+                                        247 => 1.87904819E9,
+                                        248 => 2.14748365E9,
+                                        249 => 2.68435456E9,
+                                        250 => 3.22122547E9,
+                                        251 => 3.75809638E9,
+                                        252 => 4.2949673E9,
+                                        253 => 5.3687091E9,
+                                        254 => 6.4424509E9,
+                                        255 => 7.5161928E9 );
+
+
+    /**
+     * Set the default Similarity implementation used by indexing and search
+     * code.
+     *
+     * @param Zend_Search_Lucene_Search_Similarity $similarity
+     */
+    static public function setDefault(Zend_Search_Lucene_Search_Similarity $similarity)
+    {
+        self::$_defaultImpl = $similarity;
+    }
+
+
+    /**
+     * Return the default Similarity implementation used by indexing and search
+     * code.
+     *
+     * @return Zend_Search_Lucene_Search_Similarity
+     */
+    static public function getDefault()
+    {
+        if (!self::$_defaultImpl instanceof Zend_Search_Lucene_Search_Similarity) {
+            self::$_defaultImpl = new Zend_Search_Lucene_Search_Similarity_Default();
+        }
+
+        return self::$_defaultImpl;
+    }
+
+
+    /**
+     * Computes the normalization value for a field given the total number of
+     * terms contained in a field.  These values, together with field boosts, are
+     * stored in an index and multipled into scores for hits on each field by the
+     * search code.
+     *
+     * Matches in longer fields are less precise, so implemenations of this
+     * method usually return smaller values when 'numTokens' is large,
+     * and larger values when 'numTokens' is small.
+     *
+     * That these values are computed under
+     * IndexWriter::addDocument(Document) and stored then using
+     * encodeNorm(float).  Thus they have limited precision, and documents
+     * must be re-indexed if this method is altered.
+     *
+     * fieldName - name of field
+     * numTokens - the total number of tokens contained in fields named
+     *             'fieldName' of 'doc'.
+     * Returns a normalization factor for hits on this field of this document
+     *
+     * @param string $fieldName
+     * @param integer $numTokens
+     * @return float
+     */
+    abstract public function lengthNorm($fieldName, $numTokens);
+
+    /**
+     * Computes the normalization value for a query given the sum of the squared
+     * weights of each of the query terms.  This value is then multipled into the
+     * weight of each query term.
+     *
+     * This does not affect ranking, but rather just attempts to make scores
+     * from different queries comparable.
+     *
+     * sumOfSquaredWeights - the sum of the squares of query term weights
+     * Returns a normalization factor for query weights
+     *
+     * @param float $sumOfSquaredWeights
+     * @return float
+     */
+    abstract public function queryNorm($sumOfSquaredWeights);
+
+
+    /**
+     *  Decodes a normalization factor stored in an index.
+     *
+     * @param integer $byte
+     * @return float
+     */
+    static public function decodeNorm($byte)
+    {
+        return self::$_normTable[$byte & 0xFF];
+    }
+
+
+    /**
+     * Encodes a normalization factor for storage in an index.
+     *
+     * The encoding uses a five-bit exponent and three-bit mantissa, thus
+     * representing values from around 7x10^9 to 2x10^-9 with about one
+     * significant decimal digit of accuracy.  Zero is also represented.
+     * Negative numbers are rounded up to zero.  Values too large to represent
+     * are rounded down to the largest representable value.  Positive values too
+     * small to represent are rounded up to the smallest positive representable
+     * value.
+     *
+     * @param float $f
+     * @return integer
+     */
+    static function encodeNorm($f)
+    {
+      return self::_floatToByte($f);
+    }
+
+    /**
+     * Float to byte conversion
+     *
+     * @param integer $b
+     * @return float
+     */
+    static private function _floatToByte($f)
+    {
+        // round negatives up to zero
+        if ($f <= 0.0) {
+            return 0;
+        }
+
+        // search for appropriate value
+        $lowIndex = 0;
+        $highIndex = 255;
+        while ($highIndex >= $lowIndex) {
+            // $mid = ($highIndex - $lowIndex)/2;
+            $mid = ($highIndex + $lowIndex) >> 1;
+            $delta = $f - self::$_normTable[$mid];
+
+            if ($delta < 0) {
+                $highIndex = $mid-1;
+            } elseif ($delta > 0) {
+                $lowIndex  = $mid+1;
+            } else {
+                return $mid; // We got it!
+            }
+        }
+
+        // round to closest value
+        if ($highIndex != 255 &&
+            $f - self::$_normTable[$highIndex] > self::$_normTable[$highIndex+1] - $f ) {
+            return $highIndex + 1;
+        } else {
+            return $highIndex;
+        }
+    }
+
+
+    /**
+     * Computes a score factor based on a term or phrase's frequency in a
+     * document.  This value is multiplied by the idf(Term, Searcher)
+     * factor for each term in the query and these products are then summed to
+     * form the initial score for a document.
+     *
+     * Terms and phrases repeated in a document indicate the topic of the
+     * document, so implementations of this method usually return larger values
+     * when 'freq' is large, and smaller values when 'freq'
+     * is small.
+     *
+     * freq - the frequency of a term within a document
+     * Returns a score factor based on a term's within-document frequency
+     *
+     * @param float $freq
+     * @return float
+     */
+    abstract public function tf($freq);
+
+    /**
+     * Computes the amount of a sloppy phrase match, based on an edit distance.
+     * This value is summed for each sloppy phrase match in a document to form
+     * the frequency that is passed to tf(float).
+     *
+     * A phrase match with a small edit distance to a document passage more
+     * closely matches the document, so implementations of this method usually
+     * return larger values when the edit distance is small and smaller values
+     * when it is large.
+     *
+     * distance - the edit distance of this sloppy phrase match
+     * Returns the frequency increment for this match
+     *
+     * @param integer $distance
+     * @return float
+     */
+    abstract public function sloppyFreq($distance);
+
+
+    /**
+     * Computes a score factor for a simple term or a phrase.
+     *
+     * The default implementation is:
+     *   return idfFreq(searcher.docFreq(term), searcher.maxDoc());
+     *
+     * input - the term in question or array of terms
+     * reader - reader the document collection being searched
+     * Returns a score factor for the term
+     *
+     * @param mixed $input
+     * @param Zend_Search_Lucene $reader
+     * @return a score factor for the term
+     */
+    public function idf($input, $reader)
+    {
+        if (!is_array($input)) {
+            return $this->idfFreq($reader->docFreq($input), $reader->count());
+        } else {
+            $idf = 0.0;
+            foreach ($input as $term) {
+                $idf += $this->idfFreq($reader->docFreq($term), $reader->count());
+            }
+            return $idf;
+        }
+    }
+
+    /**
+     * Computes a score factor based on a term's document frequency (the number
+     * of documents which contain the term).  This value is multiplied by the
+     * tf(int) factor for each term in the query and these products are
+     * then summed to form the initial score for a document.
+     *
+     * Terms that occur in fewer documents are better indicators of topic, so
+     * implemenations of this method usually return larger values for rare terms,
+     * and smaller values for common terms.
+     *
+     * docFreq - the number of documents which contain the term
+     * numDocs - the total number of documents in the collection
+     * Returns a score factor based on the term's document frequency
+     *
+     * @param integer $docFreq
+     * @param integer $numDocs
+     * @return float
+     */
+    abstract public function idfFreq($docFreq, $numDocs);
+
+    /**
+     * Computes a score factor based on the fraction of all query terms that a
+     * document contains.  This value is multiplied into scores.
+     *
+     * The presence of a large portion of the query terms indicates a better
+     * match with the query, so implemenations of this method usually return
+     * larger values when the ratio between these parameters is large and smaller
+     * values when the ratio between them is small.
+     *
+     * overlap - the number of query terms matched in the document
+     * maxOverlap - the total number of terms in the query
+     * Returns a score factor based on term overlap with the query
+     *
+     * @param integer $overlap
+     * @param integer $maxOverlap
+     * @return float
+     */
+    abstract public function coord($overlap, $maxOverlap);
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Similarity/Default.php b/search/Zend/Search/Lucene/Search/Similarity/Default.php
new file mode 100644 (file)
index 0000000..6cafb59
--- /dev/null
@@ -0,0 +1,105 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Similarity_Default extends Zend_Search_Lucene_Search_Similarity
+{
+
+    /**
+     * Implemented as '1/sqrt(numTerms)'.
+     *
+     * @param string $fieldName
+     * @param integer numTerms
+     * @return float
+     */
+    public function lengthNorm($fieldName, $numTerms)
+    {
+        if ($numTerms == 0) {
+            return 1E10;
+        }
+
+        return 1.0/sqrt($numTerms);
+    }
+
+    /**
+     * Implemented as '1/sqrt(sumOfSquaredWeights)'.
+     *
+     * @param float $sumOfSquaredWeights
+     * @return float
+     */
+    public function queryNorm($sumOfSquaredWeights)
+    {
+        return 1.0/sqrt($sumOfSquaredWeights);
+    }
+
+    /**
+     * Implemented as 'sqrt(freq)'.
+     *
+     * @param float $freq
+     * @return float
+     */
+    public function tf($freq)
+    {
+        return sqrt($freq);
+    }
+
+    /**
+     * Implemented as '1/(distance + 1)'.
+     *
+     * @param integer $distance
+     * @return float
+     */
+    public function sloppyFreq($distance)
+    {
+        return 1.0/($distance + 1);
+    }
+
+    /**
+     * Implemented as 'log(numDocs/(docFreq+1)) + 1'.
+     *
+     * @param integer $docFreq
+     * @param integer $numDocs
+     * @return float
+     */
+    public function idfFreq($docFreq, $numDocs)
+    {
+        return log($numDocs/(float)($docFreq+1)) + 1.0;
+    }
+
+    /**
+     * Implemented as 'overlap/maxOverlap'.
+     *
+     * @param integer $overlap
+     * @param integer $maxOverlap
+     * @return float
+     */
+    public function coord($overlap, $maxOverlap)
+    {
+        return $overlap/(float)$maxOverlap;
+    }
+}
diff --git a/search/Zend/Search/Lucene/Search/Weight.php b/search/Zend/Search/Lucene/Search/Weight.php
new file mode 100644 (file)
index 0000000..248f5cb
--- /dev/null
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Calculate query weights and build query scorers.
+ *
+ * A Weight is constructed by a query Query->createWeight().
+ * The sumOfSquaredWeights() method is then called on the top-level
+ * query to compute the query normalization factor Similarity->queryNorm(float).
+ * This factor is then passed to normalize(float).  At this point the weighting
+ * is complete.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * The weight for this query.
+     *
+     * @return float
+     */
+    abstract public function getValue();
+
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    abstract public function sumOfSquaredWeights();
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param $norm
+     */
+    abstract public function normalize($norm);
+}
+
diff --git a/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php b/search/Zend/Search/Lucene/Search/Weight/MultiTerm.php
new file mode 100644 (file)
index 0000000..448bb06
--- /dev/null
@@ -0,0 +1,135 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once 'Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_MultiTerm extends Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * IndexReader.
+     *
+     * @var Zend_Search_Lucene
+     */
+    private $_reader;
+
+    /**
+     * The query that this concerns.
+     *
+     * @var Zend_Search_Lucene_Search_Query_MultiTerm
+     */
+    private $_query;
+
+    /**
+     * Query terms weights
+     * Array of Zend_Search_Lucene_Search_Weight_Term
+     *
+     * @var array
+     */
+    private $_weights;
+
+
+    /**
+     * Zend_Search_Lucene_Search_Weight_MultiTerm constructor
+     * query - the query that this concerns.
+     * reader - index reader
+     *
+     * @param Zend_Search_Lucene_Search_Query_MultiTerm $query
+     * @param Zend_Search_Lucene $reader
+     */
+    public function __construct($query, $reader)
+    {
+        $this->_query   = $query;
+        $this->_reader  = $reader;
+        $this->_weights = array();
+
+        $signs = $query->getSigns();
+
+        foreach ($query->getTerms() as $num => $term) {
+            if ($signs === null || $signs[$num] === null || $signs[$num]) {
+                $this->_weights[$num] = new Zend_Search_Lucene_Search_Weight_Term($term, $query, $reader);
+                $query->setWeight($num, $this->_weights[$num]);
+            }
+        }
+    }
+
+
+    /**
+     * The weight for this query
+     *
+     * @return float
+     */
+    public function getValue()
+    {
+        return $this->_query->getBoost();
+    }
+
+
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    public function sumOfSquaredWeights()
+    {
+        $sum = 0;
+        foreach ($this->_weights as $weight) {
+            // sum sub weights
+            $sum += $weight->sumOfSquaredWeights();
+        }
+
+        // boost each sub-weight
+        $sum *= $this->_query->getBoost() * $this->_query->getBoost();
+
+        // check for empty query (like '-something -another')
+        if ($sum == 0) {
+            $sum = 1.0;
+        }
+        return $sum;
+    }
+
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param float $queryNorm
+     */
+    public function normalize($queryNorm)
+    {
+        // incorporate boost
+        $queryNorm *= $this->_query->getBoost();
+
+        foreach ($this->_weights as $weight) {
+            $weight->normalize($queryNorm);
+        }
+    }
+}
+
+
diff --git a/search/Zend/Search/Lucene/Search/Weight/Phrase.php b/search/Zend/Search/Lucene/Search/Weight/Phrase.php
new file mode 100644 (file)
index 0000000..5366596
--- /dev/null
@@ -0,0 +1,141 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * Zend_Search_Lucene_Search_Weight
+ */
+require_once 'Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Phrase extends Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * IndexReader.
+     *
+     * @var Zend_Search_Lucene
+     */
+    private $_reader;
+
+    /**
+     * The query that this concerns.
+     *
+     * @var Zend_Search_Lucene_Search_Query_Phrase
+     */
+    private $_query;
+
+    /**
+     * Weight value
+     *
+     * @var float
+     */
+    private $_value;
+
+    /**
+     * Score factor
+     *
+     * @var float
+     */
+    private $_idf;
+
+    /**
+     * Normalization factor
+     *
+     * @var float
+     */
+    private $_queryNorm;
+
+
+    /**
+     * Query weight
+     *
+     * @var float
+     */
+    private $_queryWeight;
+
+
+    /**
+     * Zend_Search_Lucene_Search_Weight_Phrase constructor
+     *
+     * @param Zend_Search_Lucene_Search_Query_Phrase $query
+     * @param Zend_Search_Lucene $reader
+     */
+    public function __construct(Zend_Search_Lucene_Search_Query_Phrase $query, Zend_Search_Lucene $reader)
+    {
+        $this->_query  = $query;
+        $this->_reader = $reader;
+    }
+
+
+    /**
+     * The weight for this query
+     *
+     * @return float
+     */
+    public function getValue()
+    {
+        return $this->_value;
+    }
+
+
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    public function sumOfSquaredWeights()
+    {
+        // compute idf
+        $this->_idf = $this->_reader->getSimilarity()->idf($this->_query->getTerms(), $this->_reader);
+
+        // compute query weight
+        $this->_queryWeight = $this->_idf * $this->_query->getBoost();
+
+        // square it
+        return $this->_queryWeight * $this->_queryWeight;
+    }
+
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param float $queryNorm
+     */
+    public function normalize($queryNorm)
+    {
+        $this->_queryNorm = $queryNorm;
+
+        // normalize query weight
+        $this->_queryWeight *= $queryNorm;
+
+        // idf for documents
+        $this->_value = $this->_queryWeight * $this->_idf;
+    }
+}
+
+
diff --git a/search/Zend/Search/Lucene/Search/Weight/Term.php b/search/Zend/Search/Lucene/Search/Weight/Term.php
new file mode 100644 (file)
index 0000000..d502896
--- /dev/null
@@ -0,0 +1,146 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Search_Weight */
+require_once 'Zend/Search/Lucene/Search/Weight.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Search
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Search_Weight_Term extends Zend_Search_Lucene_Search_Weight
+{
+    /**
+     * IndexReader.
+     *
+     * @var Zend_Search_Lucene
+     */
+    private $_reader;
+
+    /**
+     * Term
+     *
+     * @var Zend_Search_Lucene_Index_Term
+     */
+    private $_term;
+
+    /**
+     * The query that this concerns.
+     *
+     * @var Zend_Search_Lucene_Search_Query
+     */
+    private $_query;
+
+    /**
+     * Weight value
+     *
+     * @var float
+     */
+    private $_value;
+
+    /**
+     * Score factor
+     *
+     * @var float
+     */
+    private $_idf;
+
+    /**
+     * Normalization factor
+     *
+     * @var float
+     */
+    private $_queryNorm;
+
+
+    /**
+     * Query weight
+     *
+     * @var float
+     */
+    private $_queryWeight;
+
+
+    /**
+     * Zend_Search_Lucene_Search_Weight_Term constructor
+     * reader - index reader
+     *
+     * @param Zend_Search_Lucene $reader
+     */
+    public function __construct($term, $query, $reader)
+    {
+        $this->_term   = $term;
+        $this->_query  = $query;
+        $this->_reader = $reader;
+    }
+
+
+    /**
+     * The weight for this query
+     *
+     * @return float
+     */
+    public function getValue()
+    {
+        return $this->_value;
+    }
+
+
+    /**
+     * The sum of squared weights of contained query clauses.
+     *
+     * @return float
+     */
+    public function sumOfSquaredWeights()
+    {
+        // compute idf
+        $this->_idf = $this->_reader->getSimilarity()->idf($this->_term, $this->_reader);
+
+        // compute query weight
+        $this->_queryWeight = $this->_idf * $this->_query->getBoost();
+
+        // square it
+        return $this->_queryWeight * $this->_queryWeight;
+    }
+
+
+    /**
+     * Assigns the query normalization factor to this.
+     *
+     * @param float $queryNorm
+     */
+    public function normalize($queryNorm)
+    {
+        $this->_queryNorm = $queryNorm;
+
+        // normalize query weight
+        $this->_queryWeight *= $queryNorm;
+
+        // idf for documents
+        $this->_value = $this->_queryWeight * $this->_idf;
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Storage/Directory.php b/search/Zend/Search/Lucene/Storage/Directory.php
new file mode 100644 (file)
index 0000000..01ea380
--- /dev/null
@@ -0,0 +1,120 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Storage_Directory
+{
+
+    /**
+     * Closes the store.
+     *
+     * @return void
+     */
+    abstract public function close();
+
+    /**
+     * Returns an array of strings, one for each file in the directory.
+     *
+     * @return array
+     */
+    abstract public function fileList();
+
+    /**
+     * Creates a new, empty file in the directory with the given $filename.
+     *
+     * @param string $filename
+     * @return Zend_Search_Lucene_Storage_File
+     */
+    abstract public function createFile($filename);
+
+
+    /**
+     * Removes an existing $filename in the directory.
+     *
+     * @param string $filename
+     * @return void
+     */
+    abstract public function deleteFile($filename);
+
+
+    /**
+     * Returns true if a file with the given $filename exists.
+     *
+     * @param string $filename
+     * @return boolean
+     */
+    abstract public function fileExists($filename);
+
+
+    /**
+     * Returns the length of a $filename in the directory.
+     *
+     * @param string $filename
+     * @return integer
+     */
+    abstract public function fileLength($filename);
+
+
+    /**
+     * Returns the UNIX timestamp $filename was last modified.
+     *
+     * @param string $filename
+     * @return integer
+     */
+    abstract public function fileModified($filename);
+
+
+    /**
+     * Renames an existing file in the directory.
+     *
+     * @param string $from
+     * @param string $to
+     * @return void
+     */
+    abstract public function renameFile($from, $to);
+
+
+    /**
+     * Sets the modified time of $filename to now.
+     *
+     * @param string $filename
+     * @return void
+     */
+    abstract public function touchFile($filename);
+
+
+    /**
+     * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
+     *
+     * @param string $filename
+     * @return Zend_Search_Lucene_Storage_File
+     */
+    abstract public function getFileObject($filename);
+
+}
+
diff --git a/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php b/search/Zend/Search/Lucene/Storage/Directory/Filesystem.php
new file mode 100644 (file)
index 0000000..8d675c3
--- /dev/null
@@ -0,0 +1,272 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Storage_Directory */
+require_once 'Zend/Search/Lucene/Storage/Directory.php';
+
+/** Zend_Search_Lucene_Storage_File_Filesystem */
+require_once 'Zend/Search/Lucene/Storage/File/Filesystem.php';
+
+
+/**
+ * FileSystem implementation of Directory abstraction.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Storage_Directory_Filesystem extends Zend_Search_Lucene_Storage_Directory
+{
+    /**
+     * Filesystem path to the directory
+     *
+     * @var string
+     */
+    private $_dirPath = null;
+
+    /**
+     * Cache for Zend_Search_Lucene_Storage_File_Filesystem objects
+     * Array: filename => Zend_Search_Lucene_Storage_File object
+     *
+     * @var array
+     * @throws Zend_Search_Lucene_Exception
+     */
+    private $_fileHandlers;
+
+
+    /**
+     * Utility function to recursive directory creation
+     *
+     * @param string $dir
+     * @param integer $mode
+     * @param boolean $recursive
+     * @return boolean
+     */
+
+    static public function mkdirs($dir, $mode = 0777, $recursive = true)
+    {
+        if (is_null($dir) || $dir === '') {
+            return false;
+        }
+        if (is_dir($dir) || $dir === '/') {
+            return true;
+        }
+        if (self::mkdirs(dirname($dir), $mode, $recursive)) {
+            return mkdir($dir, $mode);
+        }
+        return false;
+    }
+
+
+    /**
+     * Object constructor
+     * Checks if $path is a directory or tries to create it.
+     *
+     * @param string $path
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function __construct($path)
+    {
+        if (!is_dir($path)) {
+            if (file_exists($path)) {
+                throw new Zend_Search_Lucene_Exception('Path exists, but it\'s not a directory');
+            } else {
+                if (!self::mkdirs($path)) {
+                    throw new Zend_Search_Lucene_Exception("Can't create directory '$path'.");
+                }
+            }
+        }
+        $this->_dirPath = $path;
+        $this->_fileHandlers = array();
+    }
+
+
+    /**
+     * Closes the store.
+     *
+     * @return void
+     */
+    public function close()
+    {
+        foreach ($this->_fileHandlers as $fileObject) {
+            $fileObject->close();
+        }
+
+        unset($this->_fileHandlers);
+    }
+
+
+    /**
+     * Returns an array of strings, one for each file in the directory.
+     *
+     * @return array
+     */
+    public function fileList()
+    {
+        $result = array();
+
+        $dirContent = opendir( $this->_dirPath );
+        while ($file = readdir($dirContent)) {
+            if (($file == '..')||($file == '.'))   continue;
+
+            $fullName = $this->_dirPath . '/' . $file;
+
+            if( !is_dir($this->_dirPath . '/' . $file) ) {
+                $result[] = $file;
+            }
+        }
+
+        return $result;
+    }
+
+    /**
+     * Creates a new, empty file in the directory with the given $filename.
+     *
+     * @param string $filename
+     * @return Zend_Search_Lucene_Storage_File
+     */
+    public function createFile($filename)
+    {
+        if (isset($this->_fileHandlers[$filename])) {
+            $this->_fileHandlers[$filename]->close();
+        }
+        unset($this->_fileHandlers[$filename]);
+        $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename, 'w+b');
+        return $this->_fileHandlers[$filename];
+    }
+
+
+    /**
+     * Removes an existing $filename in the directory.
+     *
+     * @param string $filename
+     * @return void
+     */
+    public function deleteFile($filename)
+    {
+        if (isset($this->_fileHandlers[$filename])) {
+            $this->_fileHandlers[$filename]->close();
+        }
+        unset($this->_fileHandlers[$filename]);
+        unlink($this->_dirPath .'/'. $filename);
+    }
+
+
+    /**
+     * Returns true if a file with the given $filename exists.
+     *
+     * @param string $filename
+     * @return boolean
+     */
+    public function fileExists($filename)
+    {
+        return isset($this->_fileHandlers[$filename]) ||
+               file_exists($this->_dirPath . '/' . $filename);
+    }
+
+
+    /**
+     * Returns the length of a $filename in the directory.
+     *
+     * @param string $filename
+     * @return integer
+     */
+    public function fileLength($filename)
+    {
+        if (isset( $this->_fileHandlers[$filename] )) {
+            return $this->_fileHandlers[$filename]->size();
+        }
+        return filesize($this->_dirPath .'/'. $filename);
+    }
+
+
+    /**
+     * Returns the UNIX timestamp $filename was last modified.
+     *
+     * @param string $filename
+     * @return integer
+     */
+    public function fileModified($filename)
+    {
+        return filemtime($this->_dirPath .'/'. $filename);
+    }
+
+
+    /**
+     * Renames an existing file in the directory.
+     *
+     * @param string $from
+     * @param string $to
+     * @return void
+     */
+    public function renameFile($from, $to)
+    {
+        if ($this->_fileHandlers[$from] !== null) {
+            $this->_fileHandlers[$from]->close();
+        }
+        unset($this->_fileHandlers[$from]);
+
+        if ($this->_fileHandlers[$to] !== null) {
+            $this->_fileHandlers[$to]->close();
+        }
+        unset($this->_fileHandlers[$to]);
+
+        if (file_exists($this->_dirPath . '/' . $to)) {
+            unlink($this->_dirPath . '/' . $to);
+        }
+
+        return @rename($this->_dirPath . '/' . $from, $this->_dirPath . '/' . $to);
+    }
+
+
+    /**
+     * Sets the modified time of $filename to now.
+     *
+     * @param string $filename
+     * @return void
+     */
+    public function touchFile($filename)
+    {
+        return touch($this->_dirPath .'/'. $filename);
+    }
+
+
+    /**
+     * Returns a Zend_Search_Lucene_Storage_File object for a given $filename in the directory.
+     *
+     * @param string $filename
+     * @return Zend_Search_Lucene_Storage_File
+     */
+    public function getFileObject($filename)
+    {
+        if (isset( $this->_fileHandlers[$filename] )) {
+            $this->_fileHandlers[$filename]->seek(0);
+            return $this->_fileHandlers[$filename];
+        }
+
+        $this->_fileHandlers[$filename] = new Zend_Search_Lucene_Storage_File_Filesystem($this->_dirPath . '/' . $filename);
+        return $this->_fileHandlers[$filename];
+    }
+}
+
diff --git a/search/Zend/Search/Lucene/Storage/File.php b/search/Zend/Search/Lucene/Storage/File.php
new file mode 100644 (file)
index 0000000..a53c75b
--- /dev/null
@@ -0,0 +1,371 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+abstract class Zend_Search_Lucene_Storage_File
+{
+    /**
+     * Reads $length number of bytes at the current position in the
+     * file and advances the file pointer.
+     *
+     * @param integer $length
+     * @return string
+     */
+    abstract protected function _fread($length=1);
+
+
+    /**
+     * Sets the file position indicator and advances the file pointer.
+     * The new position, measured in bytes from the beginning of the file,
+     * is obtained by adding offset to the position specified by whence,
+     * whose values are defined as follows:
+     * SEEK_SET - Set position equal to offset bytes.
+     * SEEK_CUR - Set position to current location plus offset.
+     * SEEK_END - Set position to end-of-file plus offset. (To move to
+     * a position before the end-of-file, you need to pass a negative value
+     * in offset.)
+     * Upon success, returns 0; otherwise, returns -1
+     *
+     * @param integer $offset
+     * @param integer $whence
+     * @return integer
+     */
+    abstract public function seek($offset, $whence=SEEK_SET);
+
+    /**
+     * Get file position.
+     *
+     * @return integer
+     */
+    abstract public function tell();
+
+    /**
+     * Writes $length number of bytes (all, if $length===null) to the end
+     * of the file.
+     *
+     * @param string $data
+     * @param integer $length
+     */
+    abstract protected function _fwrite($data, $length=null);
+
+
+    /**
+     * Reads a byte from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readByte()
+    {
+        return ord($this->_fread(1));
+    }
+
+    /**
+     * Writes a byte to the end of the file.
+     *
+     * @param integer $byte
+     */
+    public function writeByte($byte)
+    {
+        return $this->_fwrite(chr($byte), 1);
+    }
+
+    /**
+     * Read num bytes from the current position in the file
+     * and advances the file pointer.
+     *
+     * @param integer $num
+     * @return string
+     */
+    public function readBytes($num)
+    {
+        return $this->_fread($num);
+    }
+
+    /**
+     * Writes num bytes of data (all, if $num===null) to the end
+     * of the string.
+     *
+     * @param string $data
+     * @param integer $num
+     */
+    public function writeBytes($data, $num=null)
+    {
+        $this->_fwrite($data, $num);
+    }
+
+
+    /**
+     * Reads an integer from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readInt()
+    {
+        $str = $this->_fread(4);
+
+        return  ord($str{0}) << 24 |
+                ord($str{1}) << 16 |
+                ord($str{2}) << 8  |
+                ord($str{3});
+    }
+
+
+    /**
+     * Writes an integer to the end of file.
+     *
+     * @param integer $value
+     */
+    public function writeInt($value)
+    {
+        settype($value, 'integer');
+        $this->_fwrite( chr($value>>24 & 0xFF) .
+                        chr($value>>16 & 0xFF) .
+                        chr($value>>8  & 0xFF) .
+                        chr($value     & 0xFF),   4  );
+    }
+
+
+    /**
+     * Returns a long integer from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readLong()
+    {
+        $str = $this->_fread(8);
+
+        /**
+         * PHP uses long as largest integer. fseek() uses long for offset.
+         * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
+         * conversion to float.
+         * So, largest index segment file is 2Gb
+         */
+        return  /* ord($str{0}) << 56  | */
+                /* ord($str{1}) << 48  | */
+                /* ord($str{2}) << 40  | */
+                /* ord($str{3}) << 32  | */
+                ord($str{4}) << 24  |
+                ord($str{5}) << 16  |
+                ord($str{6}) << 8   |
+                ord($str{7});
+    }
+
+    /**
+     * Writes long integer to the end of file
+     *
+     * @param integer $value
+     */
+    public function writeLong($value)
+    {
+        /**
+         * PHP uses long as largest integer. fseek() uses long for offset.
+         * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
+         * conversion to float.
+         * So, largest index segment file is 2Gb
+         */
+        settype($value, 'integer');
+        $this->_fwrite( "\x00\x00\x00\x00"     .
+                        chr($value>>24 & 0xFF) .
+                        chr($value>>16 & 0xFF) .
+                        chr($value>>8  & 0xFF) .
+                        chr($value     & 0xFF),   8  );
+    }
+
+
+
+    /**
+     * Returns a variable-length integer from the current
+     * position in the file and advances the file pointer.
+     *
+     * @return integer
+     */
+    public function readVInt()
+    {
+        $nextByte = ord($this->_fread(1));
+        $val = $nextByte & 0x7F;
+
+        for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
+            $nextByte = ord($this->_fread(1));
+            $val |= ($nextByte & 0x7F) << $shift;
+        }
+        return $val;
+    }
+
+    /**
+     * Writes a variable-length integer to the end of file.
+     *
+     * @param integer $value
+     */
+    public function writeVInt($value)
+    {
+        settype($value, 'integer');
+        while ($value > 0x7F) {
+            $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
+            $value >>= 7;
+        }
+        $this->_fwrite(chr($value));
+    }
+
+
+    /**
+     * Reads a string from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return string
+     */
+    public function readString()
+    {
+        $strlen = $this->readVInt();
+        if ($strlen == 0) {
+            return '';
+        } else {
+            /**
+             * This implementation supports only Basic Multilingual Plane
+             * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+             * "supplementary characters" (characters whose code points are
+             * greater than 0xFFFF)
+             * Java 2 represents these characters as a pair of char (16-bit)
+             * values, the first from the high-surrogates range (0xD800-0xDBFF),
+             * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+             * they are encoded as usual UTF-8 characters in six bytes.
+             * Standard UTF-8 representation uses four bytes for supplementary
+             * characters.
+             */
+
+            $str_val = $this->_fread($strlen);
+
+            for ($count = 0; $count < $strlen; $count++ ) {
+                if (( ord($str_val{$count}) & 0xC0 ) == 0xC0) {
+                    $addBytes = 1;
+                    if (ord($str_val{$count}) & 0x20 ) {
+                        $addBytes++;
+
+                        // Never used. Java2 doesn't encode strings in four bytes
+                        if (ord($str_val{$count}) & 0x10 ) {
+                            $addBytes++;
+                        }
+                    }
+                    $str_val .= $this->_fread($addBytes);
+                    $strlen += $addBytes;
+
+                    // Check for null character. Java2 encodes null character
+                    // in two bytes.
+                    if (ord($str_val{$count})   == 0xC0 &&
+                        ord($str_val{$count+1}) == 0x80   ) {
+                        $str_val{$count} = 0;
+                        $str_val = substr($str_val,0,$count+1)
+                                 . substr($str_val,$count+2);
+                    }
+                    $count += $addBytes;
+                }
+            }
+
+            return $str_val;
+        }
+    }
+
+    /**
+     * Writes a string to the end of file.
+     *
+     * @param string $str
+     * @throws Zend_Search_Lucene_Exception
+     */
+    public function writeString($str)
+    {
+        /**
+         * This implementation supports only Basic Multilingual Plane
+         * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
+         * "supplementary characters" (characters whose code points are
+         * greater than 0xFFFF)
+         * Java 2 represents these characters as a pair of char (16-bit)
+         * values, the first from the high-surrogates range (0xD800-0xDBFF),
+         * the second from the low-surrogates range (0xDC00-0xDFFF). Then
+         * they are encoded as usual UTF-8 characters in six bytes.
+         * Standard UTF-8 representation uses four bytes for supplementary
+         * characters.
+         */
+
+        // convert input to a string before iterating string characters
+        settype($str, 'string');
+
+        $chars = $strlen = strlen($str);
+        $containNullChars = false;
+
+        for ($count = 0; $count < $strlen; $count++ ) {
+            /**
+             * String is already in Java 2 representation.
+             * We should only calculate actual string length and replace
+             * \x00 by \xC0\x80
+             */
+            if ((ord($str{$count}) & 0xC0) == 0xC0) {
+                $addBytes = 1;
+                if (ord($str{$count}) & 0x20 ) {
+                    $addBytes++;
+
+                    // Never used. Java2 doesn't encode strings in four bytes
+                    // and we dont't support non-BMP characters
+                    if (ord($str{$count}) & 0x10 ) {
+                        $addBytes++;
+                    }
+                }
+                $chars -= $addBytes;
+
+                if (ord($str{$count}) == 0 ) {
+                    $containNullChars = true;
+                }
+                $count += $addBytes;
+            }
+        }
+
+        if ($chars < 0) {
+            throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
+        }
+
+        $this->writeVInt($chars);
+        if ($containNullChars) {
+            $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
+        } else {
+            $this->_fwrite($str);
+        }
+    }
+
+
+    /**
+     * Reads binary data from the current position in the file
+     * and advances the file pointer.
+     *
+     * @return string
+     */
+    public function readBinary()
+    {
+        return $this->_fread($this->readVInt());
+    }
+}
\ No newline at end of file
diff --git a/search/Zend/Search/Lucene/Storage/File/Filesystem.php b/search/Zend/Search/Lucene/Storage/File/Filesystem.php
new file mode 100644 (file)
index 0000000..7c33543
--- /dev/null
@@ -0,0 +1,171 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+
+
+/** Zend_Search_Lucene_Storage_File */
+require_once 'Zend/Search/Lucene/Storage/File.php';
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
+/**
+ * @category   Zend
+ * @package    Zend_Search_Lucene
+ * @subpackage Storage
+ * @copyright  Copyright (c) 2006 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Search_Lucene_Storage_File_Filesystem extends Zend_Search_Lucene_Storage_File
+{
+    /**
+     * Resource of the open file
+     *
+     * @var resource
+     */
+    private $_fileHandle;
+
+    /**
+     * Class constructor.  Open the file.
+     *
+     * @param string $filename
+     * @param string $mode
+     */
+    public function __construct($filename, $mode='r+b')
+    {
+        global $php_errormsg;
+
+        $trackErrors = ini_get( "track_errors");
+        ini_set('track_errors', '1');
+
+        $this->_fileHandle = @fopen($filename, $mode);
+
+        if ($this->_fileHandle===false) {
+            ini_set('track_errors', $trackErrors);
+            throw new Zend_Search_Lucene_Exception($php_errormsg);
+        }
+
+        ini_set('track_errors', $trackErrors);
+    }
+
+    /**
+     * Sets the file position indicator and advances the file pointer.
+     * The new position, measured in bytes from the beginning of the file,
+     * is obtained by adding offset to the position specified by whence,
+     * whose values are defined as follows:
+     * SEEK_SET - Set position equal to offset bytes.
+     * SEEK_CUR - Set position to current location plus offset.
+     * SEEK_END - Set position to end-of-file plus offset. (To move to
+     * a position before the end-of-file, you need to pass a negative value
+     * in offset.)
+     * SEEK_CUR is the only supported offset type for compound files
+     *
+     * Upon success, returns 0; otherwise, returns -1
+     *
+     * @param integer $offset
+     * @param integer $whence
+     * @return integer
+     */
+    public function seek($offset, $whence=SEEK_SET)
+    {
+        return fseek($this->_fileHandle, $offset, $whence);
+    }
+
+
+    /**
+     * Get file position.
+     *
+     * @return integer
+     */
+    public function tell()
+    {
+        return ftell($this->_fileHandle);
+    }
+
+
+    /**
+     * Close File object
+     */
+    public function close()
+    {
+        if ($this->_fileHandle !== null ) {
+            @fclose($this->_fileHandle);
+            $this->_fileHandle = null;
+        }
+    }
+
+    /**
+     * Get the size of the already opened file
+     *
+     * @return integer
+     */
+    public function size()
+    {
+        $position = ftell($this->_fileHandle);
+        fseek($this->_fileHandle, 0, SEEK_END);
+        $size = ftell($this->_fileHandle);
+        fseek($this->_fileHandle,$position);
+
+        return $size;
+    }
+
+    /**
+     * Read a $length bytes from the file and advance the file pointer.
+     *
+     * @param integer $length
+     * @return string
+     */
+    protected function _fread($length=1)
+    {
+        if ($length == 0) {
+            return '';
+        }
+
+        if ($length < 1024) {
+            return fread($this->_fileHandle, $length);
+        }
+
+        $data = '';
+        while ( $length > 0 && ($nextBlock = fread($this->_fileHandle, $length)) != false ) {
+            $data .= $nextBlock;
+            $length -= strlen($nextBlock);
+        }
+        return $data;
+    }
+
+
+    /**
+     * Writes $length number of bytes (all, if $length===null) to the end
+     * of the file.
+     *
+     * @param string $data
+     * @param integer $length
+     */
+    protected function _fwrite($data, $length=null)
+    {
+        if ($length === null ) {
+            fwrite($this->_fileHandle, $data);
+        } else {
+            fwrite($this->_fileHandle, $data, $length);
+        }
+    }
+}
+
diff --git a/search/Zend/Search/TODO.txt b/search/Zend/Search/TODO.txt
new file mode 100644 (file)
index 0000000..06f7b48
--- /dev/null
@@ -0,0 +1,14 @@
+@todo
+
+- Improve API: fix ZSearchMultiTermQuery($terms, $signs);
+
+- Analysis and indexing engine
+
+- Additional queries: phrase, wildcard, proximity, and range
+
+- Better class-level docblocks (most functions okay)
+
+- Some Windows issues(?) during indexing
+
+- Finish renaming classes to PEAR-like conventions
+
diff --git a/search/db/mysql.sql b/search/db/mysql.sql
new file mode 100644 (file)
index 0000000..867b575
--- /dev/null
@@ -0,0 +1,15 @@
+CREATE TABLE IF NOT EXISTS `search_documents` (
+  `id` int(11) NOT NULL auto_increment,
+  `type` varchar(12) NOT NULL default 'none',
+  `title` varchar(100) NOT NULL default '',
+  `url` varchar(100) NOT NULL default '',
+  `updated` timestamp NOT NULL default CURRENT_TIMESTAMP,
+  `courseid` int(11) NOT NULL default '0',
+  `userid` int(11) NOT NULL default '0',
+  `groupid` int(11) NOT NULL default '0',
+  PRIMARY KEY  (`id`)
+) ENGINE=MyISAM AUTO_INCREMENT=1;
+
+DELETE FROM `search_documents` WHERE 1;
+ALTER TABLE `search_documents` AUTO_INCREMENT =1;
+
diff --git a/search/db/postgres7.sql b/search/db/postgres7.sql
new file mode 100644 (file)
index 0000000..19e5fb3
--- /dev/null
@@ -0,0 +1,21 @@
+--probably a bit suspect, need to explicitly create
+--id sequence (i.e. don't depend on postgres default seq naming)?
+--not sure about table owner either
+
+CREATE TABLE search_documents
+(
+   id serial, 
+   "type" varchar(12) NOT NULL DEFAULT 'none', 
+   title varchar(100) NOT NULL default '', 
+   url varchar(100) NOT NULL default '', 
+   updated timestamp NOT NULL DEFAULT NOW(), 
+   courseid int4, 
+   userid int4, 
+   groupid int4, 
+   CONSTRAINT id_pkey PRIMARY KEY (id)
+) WITHOUT OIDS;
+
+--ALTER TABLE search_documents OWNER TO postgres;
+
+DELETE FROM search_documents;
+SELECT setval('public.search_documents_id_seq', 1);
diff --git a/search/documents/document.php b/search/documents/document.php
new file mode 100644 (file)
index 0000000..f5d4697
--- /dev/null
@@ -0,0 +1,12 @@
+<?php
+  
+  class SearchDocument extends Zend_Search_Lucene_Document {  
+    public function __construct($document_type, $cid, $uid, $gid) {
+      $this->addField(Zend_Search_Lucene_Field::Keyword('type', $document_type));
+      $this->addField(Zend_Search_Lucene_Field::Keyword('courseid', $cid));
+      $this->addField(Zend_Search_Lucene_Field::Keyword('userid', $uid));
+      $this->addField(Zend_Search_Lucene_Field::Keyword('groupid', $gid));      
+    } //constructor    
+  } //SearchDocument
+    
+?>
\ No newline at end of file
diff --git a/search/documents/wiki_document.php b/search/documents/wiki_document.php
new file mode 100644 (file)
index 0000000..a6d75ae
--- /dev/null
@@ -0,0 +1,28 @@
+<?php
+
+  require_once("$CFG->dirroot/search/documents/document.php");
+  
+  class WikiSearchDocument extends SearchDocument {  
+    public function __construct(&$page, $wiki_id, $cid, $uid, $gid) {
+      $this->addField(Zend_Search_Lucene_Field::Text('title', $page->pagename));
+      $this->addField(Zend_Search_Lucene_Field::Text('author', $page->author));
+      $this->addField(Zend_Search_Lucene_Field::UnStored('contents', $page->content));
+      
+      $this->addField(Zend_Search_Lucene_Field::Keyword('id', $page->id));
+      $this->addField(Zend_Search_Lucene_Field::Keyword('version', $page->version));
+      $this->addField(Zend_Search_Lucene_Field::Keyword('wiki', $wiki_id));
+      
+      parent::__construct(SEARCH_WIKI_TYPE, $cid, $uid, $gid);
+    } //constructor    
+  } //WikiSearchDocument
+  
+  function wiki_name_convert($str) {
+    return str_replace(' ', '+', $str);
+  } //wiki_name_convert
+  
+  function wiki_make_link(&$doc) {
+    global $CFG;    
+    return $CFG->wwwroot.'/mod/wiki/view.php?wid='.$doc->wiki.'&page='.wiki_name_convert($doc->title).'&version='.$doc->version;
+  } //wiki_make_link
+  
+?>
\ No newline at end of file
diff --git a/search/index.php b/search/index.php
new file mode 100644 (file)
index 0000000..8c4db65
--- /dev/null
@@ -0,0 +1,10 @@
+<?php  
+  /*$id = required_param('id', PARAM_INT);   // course
+  if (! $course = get_record("course", "id", $id)) {
+    error("Course ID is incorrect");
+  }
+  require_course_login($course);
+  add_to_log($course->id, "wiki", "view all", "index.php?id=$course->id", "");*/
+
+  header("Location: query.php");    
+?>
\ No newline at end of file
diff --git a/search/indexer.php b/search/indexer.php
new file mode 100644 (file)
index 0000000..b91b23a
--- /dev/null
@@ -0,0 +1,152 @@
+<?php
+  //this'll take some time, set up the environment
+  @set_time_limit(0);
+  @ob_implicit_flush(true);
+  @ob_end_flush();  
+
+  require_once('../config.php');
+  require_once("$CFG->dirroot/search/lib.php");  
+
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+  
+  $sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA));
+  
+  if ($sure != 'yes') {
+    mtrace("Sorry, you weren't sure enough (<a href='index.php'>back to query page</a>).");
+    exit(0);
+  } //if  
+  
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if
+    
+  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+  
+  //begin timer
+  search_stopwatch();    
+  mtrace('<pre>Server Time: '.date('r',time())."\n");
+  
+  //paths
+  $index_path = $CFG->dataroot.'/search';
+  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";  
+  
+  if (!file_exists($index_path)) {
+    mtrace("Data directory ($index_path) does not exist, attempting to create.");
+    if (!mkdir($index_path)) {
+      search_pexit("Error creating data directory at: $index_path. Please correct.");
+    } else {
+      mtrace("Directory successfully created.");
+    } //else
+  } else {
+    mtrace("Using $index_path as data directory.");
+  } //else
+
+  //stop accidental re-indexing (zzz)
+  //search_pexit("Not indexing at this time.");
+
+  $index = new Zend_Search_Lucene($index_path, true);
+  
+  //create the database tables
+  ob_start(); //turn output buffering on - to hide modify_database() output
+  modify_database($index_db_file, '', false);
+  ob_end_clean(); //chuck the buffer and resume normal operation
+  
+  //empty database table goes here
+  // delete * from search_documents;
+  // set auto_increment back to 1
+  
+  //-------- debug stuff
+  /*
+  include_once("$CFG->dirroot/mod/wiki/lib.php");
+  
+  $wikis = get_all_instances_in_courses("wiki", get_courses());
+  #search_pexit($wikis[1]);
+  $entries = wiki_get_entries($wikis[1]);
+  #search_pexit($entries);
+    
+  #$r = wiki_get_pages($entries[134]);
+  $r = wiki_get_latest_pages($entries[95]);
+  
+  search_pexit($r);
+  //ignore me --------*/
+    
+  mtrace('Starting activity modules');
+  if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
+    foreach ($mods as $mod) {
+      $libfile = "$CFG->dirroot/mod/$mod->name/lib.php";
+      if (file_exists($libfile)) {
+        include_once($libfile);
+        
+        $iter_function = $mod->name.'_iterator';
+        $index_function = $mod->name.'_get_content_for_index';
+        $include_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';        
+        $c = 0;
+        $doc = new stdClass;
+                
+        if (function_exists($index_function) && function_exists($iter_function)) {
+          include_once($include_file);
+          
+          mtrace("Processing module function $index_function ...");
+                     
+          foreach ($iter_function() as $i) {
+            $documents = $index_function($i);
+            
+            //begin transaction
+            
+            foreach($documents as $document) {
+              $c++;
+              
+              //db sync increases indexing time from 55 sec to 73 (64 on Saturday?), so ~30%
+              //therefore, let us make a custom insert function for this search module
+              
+              //data object for db
+              $doc->type = $document->type;
+              $doc->title = mysql_real_escape_string($document->title); //naughty
+              $doc->update = time();
+              $doc->permissions = 0;
+              $doc->url = 'none';
+              $doc->courseid = $document->courseid;
+              $doc->userid = $document->userid;
+              $doc->groupid = $document->groupid;
+              
+              //insert summary into db
+              $id = insert_record($CFG->prefix.'search_documents', $doc);
+              
+              //synchronise db with index
+              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $id));
+              $index->addDocument($document);                  
+                            
+              //commit every 100 new documents, and print a status message                            
+              if (($c%100) == 0) {
+                $index->commit();
+                mtrace(".. $c");                
+              } //if
+            } //foreach
+            
+            //end transaction
+            
+          } //foreach
+                  
+          //commit left over documents, and finish up  
+          $index->commit();
+          mtrace("-- $c documents indexed");
+          mtrace('done.');          
+        } //if
+      } //if
+    } //foreach
+  } //if
+  
+  //done modules
+  mtrace('Finished activity modules');
+  search_stopwatch();
+  mtrace(".<br><a href='index.php'>Back to query page</a>.");
+  mtrace('</pre>');
+
+?>
\ No newline at end of file
diff --git a/search/indexersplash.php b/search/indexersplash.php
new file mode 100644 (file)
index 0000000..c10df92
--- /dev/null
@@ -0,0 +1,44 @@
+<?php
+  require_once('../config.php');
+  require_once("$CFG->dirroot/search/lib.php");  
+
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+  
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if  
+  
+  $index_path = "$CFG->dataroot/search";  
+  $index_dir  = get_directory_list($index_path, '', false, false);
+  $index_filecount = count($index_dir);
+  
+  //check if the table exists in the db
+  $tables = $db->MetaTables();
+  
+  if (array_search('search_documents', $tables)) {  
+    $db_count = count_records($CFG->prefix.'search_documents');
+  } else {
+    $db_count = 0;
+  } //else    
+  
+  //elaborate on error messages, when db!=0 and index=0 -> corrupt, etc.
+  if ($index_filecount != 0 or $db_count != 0) {    
+    mtrace("<pre>The data directory ($index_path) contains $index_filecount files, and "
+          ."there are $db_count records in the <em>search_documents</em> table.");    
+    mtrace('');    
+    mtrace("This indicates that you have already indexed this site - click the following "
+          ."link if you're sure you want to continue: <a href='indexer.php?areyousure=yes'>Go!</a>");          
+    mtrace('');          
+    mtrace("<a href='index.php'>Back to query page</a>.");
+    mtrace("</pre>");
+  } else {
+    header('Location: indexer.php?areyousure=yes');
+  } //else    
+?>
\ No newline at end of file
diff --git a/search/lib.php b/search/lib.php
new file mode 100644 (file)
index 0000000..081d9ef
--- /dev/null
@@ -0,0 +1,59 @@
+<?php
+
+  //Move this stuff to lib/searchlib.php?
+  // Author: Michael Champanis
+
+  //document types that can be searched
+  define('SEARCH_NO_TYPE', 'none');
+  define('SEARCH_WIKI_TYPE', 'wiki');
+  
+  //returns all the document type constants
+  function search_get_document_types() {
+    $r = Array(SEARCH_WIKI_TYPE, SEARCH_NO_TYPE);
+    return $r;
+  } //search_get_document_types
+  
+  //shortens a url so it can fit on the results page
+  function search_shorten_url($url, $length=30) {    
+    return substr($url, 0, $length)."...";
+  } //search_shorten_url
+
+  //get a real php 5 version number, using 5.0.0 arbitrarily  
+  function search_check_php5($feedback=false) {
+    if (!check_php_version("5.0.0")) {
+      if ($feedback) {
+        $phpversion = phpversion();
+        print_heading("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+      } //if
+      
+      return false;
+    } else {
+      return true;
+    } //else
+  } //search_check_php5
+  
+  //simple timer function, outputs result on 2nd call
+  function search_stopwatch($cli = false) {
+    if (!empty($GLOBALS['search_script_start_time'])) {
+      if (!$cli) print '<em>';
+      print round(microtime(true) - $GLOBALS['search_script_start_time'], 6).' seconds';
+      if (!$cli) print '</em>';
+      
+      unset($GLOBALS['search_script_start_time']);
+    } else {
+      $GLOBALS['search_script_start_time'] = microtime(true);
+    } //else
+  } //search_stopwatch
+  
+  //print and exit (for debugging)
+  function search_pexit($str = "") {
+    if (is_array($str) or is_object($str)) {
+      print_r($str);
+    } else if ($str) {
+      print $str."<br>";
+    } //if
+    
+    exit(0);
+  } //search_pexit
+
+?>
\ No newline at end of file
diff --git a/search/query.php b/search/query.php
new file mode 100644 (file)
index 0000000..59169b5
--- /dev/null
@@ -0,0 +1,116 @@
+<?php
+
+  require_once('../config.php');  
+  require_once("$CFG->dirroot/search/lib.php");    
+    
+  //check for php5, but don't die yet (see line 27)
+  if ($check = search_check_php5()) {  
+    require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+    require_once("$CFG->dirroot/search/documents/wiki_document.php");
+    
+    $query_string = optional_param('query_string', '', PARAM_CLEAN);  
+    $index_path = "$CFG->dataroot/search";
+    $no_index = false; //optimism!
+    
+    try {
+      $index = new Zend_Search_Lucene($index_path, false);
+    } catch(Exception $e) {
+      //print $e;
+      $no_index = true;
+    } //catch
+  } //if  
+  
+  if (!$site = get_site()) {
+    redirect("index.php");
+  } //if
+  
+  $strsearch = "Search"; //get_string();
+  $strquery  = "Enter your search query"; //get_string();
+
+  print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", 
+               "<a href=\"index.php\">$strsearch</a> -> $strquery");
+  
+  //keep things pretty, even if php5 isn't available
+  if (!$check) {
+    print_heading(search_check_php5(true));
+    print_footer();
+    exit(0);
+  } //if
+  
+  print_simple_box_start('center', '100%', '', 20);
+  print_heading($strquery);
+  
+  print_simple_box_start('center', '', '', 20);
+?>
+
+<form name="query" method="get" action="query.php">
+  <input type="text" name="query_string" length="50" value="<?php print $query_string ?>"/>
+  &nbsp;<input type="submit" value="Search"/>&nbsp;&nbsp;<a href="query.php?advanced=yes">Advanced search</a>
+  <a href="stats.php">Statistics</a>
+</form>
+
+<br>
+
+<div align="center">
+<?php
+echo 'Searching: ';
+
+if ($no_index) {
+  print "0";
+} else {
+  print $index->count();
+} //else
+
+print ' documents.';
+
+if ($no_index and isadmin()) {
+  print "<br><br>Admin: There appears to be no index, click <a href='indexersplash.php'>here</a> to create one.";
+} //if
+?>
+</div>
+
+<?php  
+  print_simple_box_end();
+  
+  if (!empty($query_string) and !$no_index) {
+    print_simple_box_start('center', '50%', 'white', 10);
+    
+    search_stopwatch();
+    $hits = $index->find(strtolower($query_string));      
+    
+    if (count($hits) > 0) {
+      $link_function = $hits[0]->type.'_make_link';
+    } //if    
+    
+    print "<br>";
+
+    print count($hits)." results returned for '".$query_string."'.";
+    print "<br><br>";
+    
+    print "<ol>";    
+        
+    foreach ($hits as $listing) {
+      print "<li><a href='".$link_function($listing)."'>$listing->title</a><br>\n"
+           ."<em>".search_shorten_url($link_function($listing), 70)."</em><br>\n"        
+           ."Type: ".$listing->type.", score: ".round($listing->score, 3)."<br>\n"            
+           ."<br></li>\n";
+    } //foreach
+    
+    print "</ol>";
+    
+    print_simple_box_end();
+  } //if
+  
+  if (!empty($query_string) and !$no_index) {
+?>
+
+<div align="center">
+  It took <?php search_stopwatch(); ?> to fetch these results.
+</div>
+
+<?php
+  } //if
+  
+  print_simple_box_end();
+  print_footer();
+?>
\ No newline at end of file
diff --git a/search/stats.php b/search/stats.php
new file mode 100644 (file)
index 0000000..caf23e7
--- /dev/null
@@ -0,0 +1,91 @@
+<?php
+  require_once('../config.php');  
+  require_once("$CFG->dirroot/search/lib.php");  
+
+  //check for php5, but don't die yet
+  if ($check = search_check_php5()) {          
+    //filesystem stats
+    $index_path = "$CFG->dataroot/search";
+    $index_size = display_size(get_directory_size($index_path));
+    $index_dir  = get_directory_list($index_path, '', false, false);
+    $index_filecount = count($index_dir);
+    
+    //indexed documents stats
+    $tables = $db->MetaTables();
+    
+    if (array_search('search_documents', $tables)) {
+      $types = search_get_document_types();
+      sort($types);
+    
+      //total documents
+      $type_counts['Total'] = count_records($CFG->prefix.'search_documents');
+
+      foreach($types as $type) {
+        $c = count_records($CFG->prefix.'search_documents', 'type', $type);
+        $type_counts[$type] = (int)$c;
+      } //foreach
+    } else {
+      $type_counts['Total'] = 0;
+    } //else      
+  } //if  
+  
+  if (!$site = get_site()) {
+    redirect("index.php");
+  } //if
+  
+  $strsearch = "Search"; //get_string();
+  $strquery  = "Search statistics"; //get_string();
+
+  print_header("$site->shortname: $strsearch: $strquery", "$site->fullname", 
+               "<a href=\"index.php\">$strsearch</a> -> $strquery");
+  
+  //keep things pretty, even if php5 isn't available
+  if (!$check) {
+    print_heading(search_check_php5(true));
+    print_footer();
+    exit(0);
+  } //if
+    
+  print_simple_box_start('center', '100%', '', 20);
+  print_heading($strquery);
+  
+  print_simple_box_start('center', '', '', 20);
+  
+  $table->tablealign = "center";
+  $table->align = array ("right", "left");
+  $table->wrap = array ("nowrap", "nowrap");
+  $table->cellpadding = 5;
+  $table->cellspacing = 0;
+  $table->width = '500';
+
+  $table->data[] = array('<strong>Data directory</strong>', '<em><strong>'.$index_path.'</strong></em>');
+  $table->data[] = array('Files in index directory', $index_filecount);
+  $table->data[] = array('Total size', $index_size);
+  
+  if ($index_filecount == 0) {
+    $table->data[] = array('Click to create index', "<a href='indexersplash.php'>Indexer</a>");
+  } //if
+  
+  $return_of_table->tablealign = "center";
+  $return_of_table->align = array ("right", "left");
+  $return_of_table->wrap = array ("nowrap", "nowrap");
+  $return_of_table->cellpadding = 5;
+  $return_of_table->cellspacing = 0;
+  $return_of_table->width = '500';
+  
+  $return_of_table->data[] = array('<strong>Database</strong>', '<em><strong>search_documents<strong></em>');  
+  foreach($type_counts as $key => $value) {
+    $return_of_table->data[] = array($key, $value);
+  } //foreach    
+
+  if (isadmin()) {
+    print_table($table);
+    print_spacer(20);
+  } //if
+  
+  print_table($return_of_table);
+   
+  print_simple_box_end();
+  print_simple_box_end();
+  print_footer();
+?>
\ No newline at end of file