Fixed couple bugs in query, and improved logic of querylib.
authormchampan <mchampan>
Fri, 8 Sep 2006 01:18:48 +0000 (01:18 +0000)
committermchampan <mchampan>
Fri, 8 Sep 2006 01:18:48 +0000 (01:18 +0000)
search/README.txt
search/Zend/IMPORTANT.txt
search/Zend/Search/Lucene/Storage/File.php
search/query.php
search/querylib.php

index 379a272..14cc4f7 100644 (file)
@@ -1,3 +1,12 @@
+2006/09/08
+----------
+Google Summer of Code is finished, spent a couple of weeks away from
+the project to think about it and also to take a break. Working on it
+now I discovered bugs in the query parser (now fixed), and I also
+un-convoluted the querylib logic (well slighlty).
+
+Updated ZFS files to latest SVN.
+
 2006/08/21
 ----------
 Fixed index document count, and created new config variable to store
index d8b80df..c20ea57 100644 (file)
@@ -1,9 +1,8 @@
 We are running cutting-edge (i.e. HEAD) Zend Framework:
   URL: http://framework.zend.com/svn/framework/trunk
-  Revision: 924
-  Last Changed Rev: 924
-  Last Changed Date: 2006-07-27 10:23:04 +0200 (Thu, 27 Jul 2006)
-
+  Revision: 1042 
+  Last Changed Rev: 1042  
+  Last Changed Date: 2006-09-07 23:14:50 +0200 (Thu, 07 Sep 2006)
 
 This Zend Framework present in this directory only contains the minimum
 to run Zend_Search_Lucene - I don't foresee any problems, since the license
index a53c75b..5a195ae 100644 (file)
  */
 
 
+
+/** Zend_Search_Lucene_Exception */
+require_once 'Zend/Search/Lucene/Exception.php';
+
+
 /**
  * @category   Zend
  * @package    Zend_Search_Lucene
@@ -157,46 +162,74 @@ abstract class Zend_Search_Lucene_Storage_File
      * and advances the file pointer.
      *
      * @return integer
+     * @throws Zend_Search_Lucene_Exception
      */
     public function readLong()
     {
         $str = $this->_fread(8);
 
         /**
-         * PHP uses long as largest integer. fseek() uses long for offset.
-         * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
-         * conversion to float.
-         * So, largest index segment file is 2Gb
+         * Check, that we work in 64-bit mode.
+         * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          */
-        return  /* ord($str{0}) << 56  | */
-                /* ord($str{1}) << 48  | */
-                /* ord($str{2}) << 40  | */
-                /* ord($str{3}) << 32  | */
-                ord($str{4}) << 24  |
-                ord($str{5}) << 16  |
-                ord($str{6}) << 8   |
-                ord($str{7});
+        if (PHP_INT_SIZE > 4) {
+            return  ord($str{0}) << 56  |
+                    ord($str{1}) << 48  |
+                    ord($str{2}) << 40  |
+                    ord($str{3}) << 32  |
+                    ord($str{4}) << 24  |
+                    ord($str{5}) << 16  |
+                    ord($str{6}) << 8   |
+                    ord($str{7});
+        } else {
+            if ((ord($str{0})          != 0) ||
+                (ord($str{1})          != 0) ||
+                (ord($str{2})          != 0) ||
+                (ord($str{3})          != 0) ||
+                ((ord($str{0}) & 0x80) != 0)) {
+                     throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+                 }
+
+            return  ord($str{4}) << 24  |
+                    ord($str{5}) << 16  |
+                    ord($str{6}) << 8   |
+                    ord($str{7});
+        }
     }
 
     /**
      * Writes long integer to the end of file
      *
      * @param integer $value
+     * @throws Zend_Search_Lucene_Exception
      */
     public function writeLong($value)
     {
         /**
-         * PHP uses long as largest integer. fseek() uses long for offset.
-         * long has 4 bytes in a lot of systems. 4 bytes are discarded to prevent
-         * conversion to float.
-         * So, largest index segment file is 2Gb
+         * Check, that we work in 64-bit mode.
+         * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
          */
-        settype($value, 'integer');
-        $this->_fwrite( "\x00\x00\x00\x00"     .
-                        chr($value>>24 & 0xFF) .
-                        chr($value>>16 & 0xFF) .
-                        chr($value>>8  & 0xFF) .
-                        chr($value     & 0xFF),   8  );
+        if (PHP_INT_SIZE > 4) {
+            settype($value, 'integer');
+            $this->_fwrite( chr($value>>56 & 0xFF) .
+                            chr($value>>48 & 0xFF) .
+                            chr($value>>40 & 0xFF) .
+                            chr($value>>32 & 0xFF) .
+                            chr($value>>24 & 0xFF) .
+                            chr($value>>16 & 0xFF) .
+                            chr($value>>8  & 0xFF) .
+                            chr($value     & 0xFF),   8  );
+        } else {
+            if ($value > 0x7FFFFFFF) {
+                throw new Zend_Search_Lucene_Exception('Largest supported segment size (for 32-bit mode) is 2Gb');
+            }
+
+            $this->_fwrite( "\x00\x00\x00\x00"     .
+                            chr($value>>24 & 0xFF) .
+                            chr($value>>16 & 0xFF) .
+                            chr($value>>8  & 0xFF) .
+                            chr($value     & 0xFF),   8  );
+        }
     }
 
 
index 2bd6f5b..0d2b90e 100644 (file)
@@ -41,6 +41,9 @@
       //otherwise we are dealing with a new advanced query
       unset($_SESSION['search_advanced_query']);
       session_unregister('search_advanced_query');
+
+      //chars to strip from strings (whitespace)
+      $chars = " \t\n\r\0\x0B,-+";
             
       //retrieve advanced query variables
       $adv->mustappear  = trim(optional_param('mustappear', '', PARAM_CLEAN), $chars);
@@ -55,8 +58,6 @@
       //parse the advanced variables into a query string
       //TODO: move out to external query class (QueryParse?)
                   
-      //chars to strip from strings (whitespace)
-      $chars = ' \t\n\r\0\x0B,;';
       $query_string = '';      
       
       //get all available module types
     } //if    
     
     //run the query against the index
-    $sq = new SearchQuery($query_string, $page_number, 10, true);  
+    $sq = new SearchQuery($query_string, $page_number, 10, false);  
   } //if
   
   if (!$site = get_site()) {
index 87007ff..e435b0d 100644 (file)
@@ -93,7 +93,8 @@
             $validquery,
             $validindex,            
             $results,
-            $results_per_page;
+            $results_per_page,
+            $total_results;
     
     public function __construct($term='', $page=1, $results_per_page=10, $cache=false) {
       global $CFG;
     } //set_query
     
     public function results() {
-      if ($this->validquery and $this->validindex) {
-        return $this->get_subset_results();
-      } else {
-        return array();
-      } //else
+      return $this->results;
     } //results
-    
-    private function get_subset_results() {
-      if ($this->count() < $this->results_per_page) {
-        $this->pagenumber = 1;
-      } else if ($this->pagenumber > $this->total_pages()) {
-        $this->pagenumber = $this->total_pages();
-      } //if
-    
-      $start  = ($this->pagenumber - 1) * $this->results_per_page;
-                     
-      return array_slice($this->results, $start, $this->results_per_page);    
-    } //get_results
-    
-    private function get_all_results() {
+        
+    private function process_results($all=false) {
       global $USER;
+
+      $term = strtolower($this->term);         
       
-      $resultdoc  = new SearchResult();
-      $resultdocs = array();
-      $i = 0;
+      //experimental - return more results
+      $strip_arr = array('author:', 'title:', '+', '-', 'doctype:');   
+      $stripped_term = str_replace($strip_arr, '', $term);
       
-      $term = strtolower($this->term);
+      $hits = $this->index->find($term." title:".$stripped_term." author:".$stripped_term);
+      //--
       
-      $hits = $this->index->find($term." title:".$term." author:".$term);
+      $hitcount = count($hits);
+      $this->total_results = $hitcount; 
             
-      foreach ($hits as $hit) {            
+      if ($hitcount == 0) return array();
+      
+      $totalpages = ceil($hitcount/$this->results_per_page);
+      
+      if (!$all) {
+        if ($hitcount < $this->results_per_page) {
+          $this->pagenumber = 1;
+        } else if ($this->pagenumber > $totalpages) {
+          $this->pagenumber  =$totalpages;
+        } //if      
+        
+        $start = ($this->pagenumber - 1) * $this->results_per_page;
+        $end = $start + $this->results_per_page;
+        
+        if ($end > $hitcount) {
+          $end = $hitcount;
+        } //if        
+      } else {
+        $start = 0;
+        $end = $hitcount;
+      } //else
+      
+      $resultdoc  = new SearchResult();
+      $resultdocs = array();      
+                  
+      for ($i = $start; $i < $end; $i++) {
+        $hit = $hits[$i];
+                           
         //check permissions on each result
         if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) {
           $resultdoc->number  = $i;
           $resultdoc->author  = $hit->author;
         
           //and store it
-          $resultdocs[] = clone($resultdoc);
-          
-          $i++;
+          $resultdocs[] = clone($resultdoc);          
         } //if
       } //foreach
-      
+            
       return $resultdocs;
-    } //get_all_results
+    } //process_results
               
     private function get_results() {
       $cache = new SearchCache();
       
       if ($this->cache and $cache->can_cache()) {        
         if (!($resultdocs = $cache->cache($this->term))) {
-          $resultdocs = $this->get_all_results();
+          $resultdocs = $this->process_results();
           //cache the results so we don't have to compute this on every page-load
           $cache->cache($this->term, $resultdocs);          
           //print "Using new results.";
       } else {
         //no caching :(
         //print "Caching disabled!";
-        $resultdocs = $this->get_all_results();
+        $resultdocs = $this->process_results();
       } //else
 
       return $resultdocs;
     } //can_display
     
     public function count() {
-      return count($this->results);
+      return $this->total_results;
     } //count
     
-    //this shouldn't be in this class
-    //public function index_count() {
-    //  return $this->index->count();
-    //} //index_count    
-    
     public function is_valid() {
       return ($this->validquery and $this->validindex);
     } //is_valid