Add/delete/update cron functions added.
authormchampan <mchampan>
Wed, 16 Aug 2006 13:34:07 +0000 (13:34 +0000)
committermchampan <mchampan>
Wed, 16 Aug 2006 13:34:07 +0000 (13:34 +0000)
IndexDBControl class added to abstract some index functionality out.

search/.cvsignore [new file with mode: 0644]
search/README.txt
search/add.php [new file with mode: 0644]
search/cron.php
search/delete.php
search/indexer.php
search/indexlib.php
search/querylib.php
search/update.php [new file with mode: 0644]

diff --git a/search/.cvsignore b/search/.cvsignore
new file mode 100644 (file)
index 0000000..04fd92e
--- /dev/null
@@ -0,0 +1 @@
+delete_log.php
index 0737770..4841d3e 100644 (file)
@@ -1,3 +1,19 @@
+2006/08/16
+----------
+Add/delete/update cron functions finished - can be called seperately
+or all at once via cron.php.
+
+Document date field added to index and database summary.
+
+Some index db functionality abstracted out to indexlib.php - can
+use IndexDBControl class to add/del documents from database, and
+to make sure the db table is functioning.
+
+DB sql files changed to add some extra fields.
+
+Default 'simple' query modified to search title and author, as well
+as contents of document, to provide better results for users.
+
 2006/08/14
 ----------
 First revision of the advanced search page completed. Functional,
@@ -101,5 +117,6 @@ To index for the first time, login as an admin user and browse to /search/index.
 or /search/stats.php - there will be a message and a link telling you to go index.
 
 -- Michael Champanis (mchampan)
-   cynnical@gmail.com
+   email: cynnical@gmail.com
+   skype: mchampan
    Summer of Code 2006
\ No newline at end of file
diff --git a/search/add.php b/search/add.php
new file mode 100644 (file)
index 0000000..b862fdd
--- /dev/null
@@ -0,0 +1,83 @@
+<?php
+
+  require_once('../config.php');
+  require_once("$CFG->dirroot/search/lib.php");
+  
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+    
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if  
+    
+  require_once("$CFG->dirroot/search/indexlib.php");   
+  
+  $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
+  $dbcontrol = new IndexDBControl();
+  $addition_count = 0;
+  
+  mtrace('<pre>Starting index update (additions)...');
+  mtrace('Index size before: '.$index->count()."\n");
+  
+  if ($mods = get_records_select('modules')) {
+  foreach ($mods as $mod) {
+    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';    
+    $db_names_function = $mod->name.'_db_names';
+    $get_document_function = $mod->name.'_single_document';
+    $additions = array();    
+    
+    if (file_exists($class_file)) {
+      require_once($class_file);
+    
+      if (function_exists($db_names_function) and function_exists($get_document_function)) {
+        mtrace("Checking $mod->name module for additions.");
+        $values = $db_names_function();
+        
+        $sql = "select id, ".$values[0]." as docid from ".$values[1]."
+                where id not in
+                (select docid from ".SEARCH_DATABASE_TABLE." where doctype like '$mod->name')";
+
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $additions[] = $get_document_function($record->id);
+          } //foreach
+        } //if    
+          
+        foreach ($additions as $add) {
+          ++$addition_count;
+          
+          //object to insert into db
+          $dbid = $dbcontrol->addDocument($add);          
+              
+          //synchronise db with index
+          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));          
+          
+          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                            
+          $index->addDocument($add);
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
+  } //foreach
+  } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
+
+  mtrace("Added $addition_count documents.");
+  mtrace('Index size after: '.$index->count().'</pre>');
+
+?>
\ No newline at end of file
index 9bc2c20..68e8677 100644 (file)
@@ -4,7 +4,15 @@
   require_once("$CFG->dirroot/search/lib.php");
   
   mtrace("<pre>Starting cron...\n");
+  
+  mtrace("--DELETE----");
+  require_once("$CFG->dirroot/search/delete.php");
+  mtrace("--UPDATE----");
+  require_once("$CFG->dirroot/search/update.php");
+  mtrace("--ADD-------");
+  require_once("$CFG->dirroot/search/add.php");
+  mtrace("------------");
 
-  mtrace("</pre>");
+  mtrace("cron finished.</pre>");
 
 ?>
\ No newline at end of file
index 56c7122..a7aae71 100644 (file)
@@ -2,23 +2,82 @@
 
   require_once('../config.php');
   require_once("$CFG->dirroot/search/lib.php");
+    
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+    
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if  
+  
+  require_once("$CFG->dirroot/search/indexlib.php");  
   
-  mtrace("<pre>Starting clean-up...\n");
+  $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
+  $dbcontrol = new IndexDBControl();
+  $deletion_count = 0;   
+  
+  mtrace('<pre>Starting clean-up of removed records...');
+  mtrace('Index size before: '.$index->count()."\n");
   
   if ($mods = get_records_select('modules')) {
   foreach ($mods as $mod) {
     $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+    $delete_function = $mod->name.'_delete';
+    $db_names_function = $mod->name.'_db_names';
+    $deletions = array();    
     
     if (file_exists($class_file)) {
-      mtrace("Checking $mod->name module for deletions.\n");
-      
-      $records = get_records_sql("select * from ".$CFG->prefix."log where module = '$mod->name' and action like '%delete%'");
-      
-      print_r($records);
-    } //if    
+      require_once($class_file);
+    
+      if (function_exists($delete_function) and function_exists($db_names_function)) {
+        mtrace("Checking $mod->name module for deletions.");
+        $values = $db_names_function();
+        
+        $sql = "select id, docid from ".SEARCH_DATABASE_TABLE."
+                where doctype like '$mod->name'
+                and docid not in
+                (select ".$values[0]." from ".$values[1].")";
+
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $deletions[] = $delete_function($record->docid);
+          } //foreach
+        } //if    
+          
+        foreach ($deletions as $delete) {        
+          $doc = $index->find("+docid:$delete +doctype:$mod->name");            
+          
+          //get the record, should only be one
+          foreach ($doc as $thisdoc) {
+            ++$deletion_count;
+            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+            
+            $dbcontrol->delDocument($thisdoc);
+            $index->delete($thisdoc->id);              
+          } //foreach
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
   } //foreach
   } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
 
-  mtrace("</pre>");
+  mtrace("Finished $deletion_count removals.");
+  mtrace('Index size after: '.$index->count().'</pre>');
 
 ?>
\ No newline at end of file
index c7d8df8..874775e 100644 (file)
@@ -47,7 +47,8 @@
   } //if
     
   //php5 found, continue including php5-only files
-  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+  //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+  require_once("$CFG->dirroot/search/indexlib.php");  
     
   mtrace('<pre>Server Time: '.date('r',time())."\n");
 
@@ -61,7 +62,8 @@
   
   //paths
   $index_path = SEARCH_INDEX_PATH;
-  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";  
+  $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
+  $dbcontrol = new IndexDBControl();  
   
   //setup directory in data root
   if (!file_exists($index_path)) {
   
   $index = new Zend_Search_Lucene($index_path, true);
   
-  //create the database tables
-  $tables = $db->MetaTables();
-    
-  if (in_array($CFG->prefix.'search_documents', $tables)) {
-    //delete_records('search_documents');    
-    //temporary measure - db doesn't have update scripts and I realised that cvs 1.1 db
-    //is incompatible with cvs 1.2! Must fix ASAP.    
-    execute_sql('drop table '.$CFG->prefix.'search_documents', false);
-    
-    ob_start(); //turn output buffering on - to hide modify_database() output
-    modify_database($index_db_file, '', false);
-    ob_end_clean(); //chuck the buffer and resume normal operation
-  } else {        
-    ob_start(); //turn output buffering on - to hide modify_database() output
-    modify_database($index_db_file, '', false);
-    ob_end_clean(); //chuck the buffer and resume normal operation
-  } //else
+  if (!$dbcontrol->checkDB()) {
+    search_pexit("Database error. Please check settings/files.");
+  } //if     
 
   //begin timer
   search_stopwatch();
             foreach($documents as $document) {
               $counter++;
                             
-              //object to insert into db                            
-              $doc->doctype   = $document->doctype;
-              $doc->title     = search_escape_string($document->title);
-              $doc->url       = search_escape_string($document->url);              
-              $doc->update    = time();                            
-              $doc->courseid  = $document->course_id;              
-              $doc->groupid   = $document->group_id;              
-              
-              //insert summary into db
-              $id = insert_record('search_documents', $doc);
+              //object to insert into db
+              $dbid = $dbcontrol->addDocument($document);
               
               //synchronise db with index
-              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $id));
+              $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
               
               //add document to index
               $index->addDocument($document);                  
index de1cf20..305768c 100644 (file)
@@ -6,7 +6,8 @@
    * and the index itself.
    * */
 
-  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
+  require_once("$CFG->dirroot/search/lib.php");
+  require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");   
 
   class IndexInfo {
     private $path,        //index data directory
             
       $admin_tables = $db->MetaTables();
       
-      if (in_array($CFG->prefix.'search_documents', $admin_tables)) {
+      if (in_array($CFG->prefix.SEARCH_DATABASE_TABLE, $admin_tables)) {
         $db_exists = true;
         
         //total documents
-        $this->dbcount = count_records('search_documents');
+        $this->dbcount = count_records(SEARCH_DATABASE_TABLE);
 
         //individual document types
         $types = search_get_document_types();
         sort($types);
   
         foreach($types as $type) {
-          $c = count_records('search_documents', 'doctype', $type);
+          $c = count_records(SEARCH_DATABASE_TABLE, 'doctype', $type);
           $this->types[$type] = (int)$c;
         } //foreach
       } else {
       } //if
     } //__get        
   } //IndexInfo
+  
+  
+  /* DB Index control class 
+   * 
+   * */
+   
+  class IndexDBControl {  
+    public function checkTableExists() {
+      global $CFG, $db;
+      
+      $table = SEARCH_DATABASE_TABLE;
+      $tables = $db->MetaTables();
+      
+      if (in_array($CFG->prefix.$table, $tables)) {
+        return true;
+      } else {
+        return false;
+      } //else
+    } //checkTableExists
+    
+    public function checkDB() {
+      global $CFG, $db;
+            
+      $sqlfile = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
+      $ret = false;     
+   
+      if ($this->checkTableExists()) {                 
+        execute_sql('drop table '.$CFG->prefix.SEARCH_DATABASE_TABLE, false);
+      } //if
+        
+      ob_start(); //turn output buffering on - to hide modify_database() output
+      $ret = modify_database($sqlfile, '', false);
+      ob_end_clean(); //chuck the buffer and resume normal operation
+      
+      return $ret;
+    } //checkDB
+    
+    public function addDocument($document=null) {
+      global $db;
+      
+      if ($document == null) {
+        return false;
+      } //if
+      
+      //object to insert into db                            
+      $doc->doctype   = $document->doctype;
+      $doc->docid     = $document->docid;
+      $doc->title     = search_escape_string($document->title);
+      $doc->url       = search_escape_string($document->url);              
+      $doc->update    = time();  
+      $doc->docdate   = $document->date;                          
+      $doc->courseid  = $document->course_id;              
+      $doc->groupid   = $document->group_id;              
+        
+      //insert summary into db
+      $id = insert_record(SEARCH_DATABASE_TABLE, $doc);
+              
+      return $id;
+    } //addDocument
+    
+    public function delDocument($document) {
+      global $db;
+      
+      delete_records(SEARCH_DATABASE_TABLE, 'id', $document->dbid);
+    } //delDocument
+  } //IndexControl
       
 ?>
\ No newline at end of file
index 83705ce..c31a0df 100644 (file)
       $resultdocs = array();
       $i = 0;
       
-      $hits = $this->index->find(strtolower($this->term));
+      $term = strtolower($this->term);
       
+      $hits = $this->index->find($term." title:".$term." author:".$term);
+            
       foreach ($hits as $hit) {            
         //check permissions on each result
         if ($this->can_display($USER, $hit->id, $hit->doctype, $hit->course_id, $hit->group_id)) {
diff --git a/search/update.php b/search/update.php
new file mode 100644 (file)
index 0000000..6b11e4d
--- /dev/null
@@ -0,0 +1,97 @@
+<?php
+
+  require_once('../config.php');
+  require_once("$CFG->dirroot/search/lib.php");
+  
+  require_login();
+
+  if (!isadmin()) {
+    error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
+  } //if
+    
+  //check for php5 (lib.php)
+  if (!search_check_php5()) {
+    $phpversion = phpversion();
+    mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
+    exit(0);
+  } //if  
+    
+  require_once("$CFG->dirroot/search/indexlib.php");   
+  
+  $index = new Zend_Search_Lucene(SEARCH_INDEX_PATH);
+  $dbcontrol = new IndexDBControl();
+  $update_count = 0;
+  
+  $indexdate = $CFG->search_indexer_run_date;
+
+  mtrace("<pre>Starting index update (updates)...\n");  
+  
+  if ($mods = get_records_select('modules')) {
+  foreach ($mods as $mod) {
+    $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
+    $get_document_function = $mod->name.'_single_document';
+    $delete_function = $mod->name.'_delete';
+    $db_names_function = $mod->name.'_db_names';
+    $updates = array();    
+    
+    if (file_exists($class_file)) {
+      require_once($class_file);
+    
+      if (function_exists($delete_function) and function_exists($db_names_function) and function_exists($get_document_function)) {
+        mtrace("Checking $mod->name module for updates.");
+        $values = $db_names_function();
+        
+        $sql = "select id, ".$values[0]." as docid from ".$values[1]."
+                where ".$values[2]." > $indexdate";
+                
+        $records = get_records_sql($sql);     
+        
+        if (is_array($records)) {       
+          foreach($records as $record) {
+            $updates[] = $delete_function($record->docid);
+          } //foreach
+        } //if    
+          
+        foreach ($updates as $update) {
+          ++$update_count;
+                
+          //delete old document  
+          $doc = $index->find("+docid:$update +doctype:$mod->name");            
+          
+          //get the record, should only be one
+          foreach ($doc as $thisdoc) {            
+            mtrace("  Delete: $thisdoc->title (database id = $thisdoc->dbid, index id = $thisdoc->id, moodle instance id = $thisdoc->docid)");
+            
+            $dbcontrol->delDocument($thisdoc);
+            $index->delete($thisdoc->id);              
+          } //foreach
+          
+          //add new modified document back into index
+          $add = $get_document_function($update);
+          
+          //object to insert into db
+          $dbid = $dbcontrol->addDocument($add);                   
+              
+          //synchronise db with index
+          $add->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));          
+          
+          mtrace("  Add: $add->title (database id = $add->dbid, moodle instance id = $add->docid)");
+                            
+          $index->addDocument($add);          
+        } //foreach
+                    
+        mtrace("Finished $mod->name.\n");
+      } //if              
+    } //if
+  } //foreach
+  } //if
+  
+  //commit changes
+  $index->commit();
+  
+  //update index date
+  set_config("search_indexer_run_date", time());
+
+  mtrace("Finished $update_count updates.</pre>");  
+
+?>
\ No newline at end of file