Initial commit
[moodle.git] / search / indexer.php
CommitLineData
682d4032 1<?php
abb4ea20 2 /* The indexer logic -
a25a3912 3 * Look through each installed module's search document class file (/search/documents)
4 * for necessary search functions, and if they're present add the content to the index.
5 * Repeat this for blocks.
6 *
7 * Because the iterator/retrieval functions are now stored in /search/documents/mod_document.php,
8 * /mod/mod/lib.php doesn't have to be modified - and thus the search module becomes quite
9 * self-sufficient. URL's are now stored in the index, stopping us from needing to require
10 * the class files to generate a results page.
abb4ea20 11 *
12 * Along with the index data, each document's summary gets stored in the database
13 * and synchronised to the index (flat file) via the primary key ('id') which is mapped
a25a3912 14 * to the 'db_id' field in the index
abb4ea20 15 * */
16
682d4032 17 //this'll take some time, set up the environment
18 @set_time_limit(0);
19 @ob_implicit_flush(true);
20 @ob_end_flush();
21
22 require_once('../config.php');
23 require_once("$CFG->dirroot/search/lib.php");
24
abb4ea20 25 //only administrators can index the moodle installation, because access to all pages is required
682d4032 26 require_login();
27
28 if (!isadmin()) {
29 error("You need to be an admin user to use this page.", "$CFG->wwwroot/login/index.php");
30 } //if
31
abb4ea20 32 //confirmation flag to prevent accidental reindexing (indexersplash.php is the correct entry point)
682d4032 33 $sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA));
34
35 if ($sure != 'yes') {
abb4ea20 36 mtrace("<pre>Sorry, you need to confirm indexing via <a href='indexersplash.php'>indexersplash.php</a>"
37 .". (<a href='index.php'>Back to query page</a>).</pre>");
38
682d4032 39 exit(0);
40 } //if
41
42 //check for php5 (lib.php)
43 if (!search_check_php5()) {
44 $phpversion = phpversion();
45 mtrace("Sorry, global search requires PHP 5.0.0 or later (currently using version $phpversion)");
46 exit(0);
47 } //if
48
abb4ea20 49 //php5 found, continue including php5-only files
682d4032 50 require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
51
a25a3912 52 if (get_config("search_indexer_busy") == 1) {
53 } //if
54
55 //turn on busy flag
56 set_config("search_indexer_busy", 1);
682d4032 57 mtrace('<pre>Server Time: '.date('r',time())."\n");
58
59 //paths
60 $index_path = $CFG->dataroot.'/search';
61 $index_db_file = "$CFG->dirroot/search/db/$CFG->dbtype.sql";
62
abb4ea20 63 //setup directory in data root
682d4032 64 if (!file_exists($index_path)) {
65 mtrace("Data directory ($index_path) does not exist, attempting to create.");
66 if (!mkdir($index_path)) {
67 search_pexit("Error creating data directory at: $index_path. Please correct.");
68 } else {
69 mtrace("Directory successfully created.");
70 } //else
71 } else {
72 mtrace("Using $index_path as data directory.");
73 } //else
a25a3912 74
682d4032 75 $index = new Zend_Search_Lucene($index_path, true);
76
77 //create the database tables
bef08835 78 $tables = $db->MetaTables();
79
80 if (in_array($CFG->prefix.'search_documents', $tables)) {
a25a3912 81 //delete_records('search_documents');
82 //temporary measure - db doesn't have update scripts and I realised that cvs 1.1 db
83 //is incompatible with cvs 1.2! Must fix ASAP.
84 execute_sql('drop table '.$CFG->prefix.'search_documents', false);
85
86 ob_start(); //turn output buffering on - to hide modify_database() output
87 modify_database($index_db_file, '', false);
88 ob_end_clean(); //chuck the buffer and resume normal operation
bef08835 89 } else {
90 ob_start(); //turn output buffering on - to hide modify_database() output
91 modify_database($index_db_file, '', false);
92 ob_end_clean(); //chuck the buffer and resume normal operation
93 } //else
a25a3912 94
95 //begin timer
96 search_stopwatch();
97 mtrace("Starting activity modules\n");
abb4ea20 98
99 //the presence of the required search functions -
100 // * mod_iterator
101 // * mod_get_content_for_index
102 //are the sole basis for including a module in the index at the moment.
103
682d4032 104 if ($mods = get_records_select('modules' /*'index this module?' where statement*/)) {
105 foreach ($mods as $mod) {
a25a3912 106 $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
abb4ea20 107
a25a3912 108 if (file_exists($class_file)) {
109 include_once($class_file);
682d4032 110
111 $iter_function = $mod->name.'_iterator';
112 $index_function = $mod->name.'_get_content_for_index';
a25a3912 113
abb4ea20 114 $counter = 0;
682d4032 115 $doc = new stdClass;
116
a25a3912 117 if (function_exists($index_function) && function_exists($iter_function)) {
682d4032 118 mtrace("Processing module function $index_function ...");
119
120 foreach ($iter_function() as $i) {
121 $documents = $index_function($i);
122
123 //begin transaction
124
125 foreach($documents as $document) {
abb4ea20 126 $counter++;
127
a25a3912 128 //object to insert into db
129 $doc->doctype = $document->doctype;
130 $doc->title = search_escape_string($document->title);
131 $doc->url = search_escape_string($document->url);
132 $doc->update = time();
133 $doc->courseid = $document->course_id;
134 $doc->groupid = $document->group_id;
682d4032 135
136 //insert summary into db
bef08835 137 $id = insert_record('search_documents', $doc);
682d4032 138
139 //synchronise db with index
a25a3912 140 $document->addField(Zend_Search_Lucene_Field::Keyword('db_id', $id));
abb4ea20 141
142 //add document to index
682d4032 143 $index->addDocument($document);
144
abb4ea20 145 //commit every x new documents, and print a status message
a25a3912 146 if (($counter%2000) == 0) {
682d4032 147 $index->commit();
a25a3912 148 mtrace(".. $counter");
682d4032 149 } //if
150 } //foreach
a25a3912 151
682d4032 152 //end transaction
153
154 } //foreach
155
156 //commit left over documents, and finish up
157 $index->commit();
abb4ea20 158
159 mtrace("-- $counter documents indexed");
a25a3912 160 mtrace("done.\n");
682d4032 161 } //if
162 } //if
163 } //foreach
164 } //if
165
abb4ea20 166 //finished modules
682d4032 167 mtrace('Finished activity modules');
168 search_stopwatch();
abb4ea20 169
170 //now blocks...
171 //
172
682d4032 173 mtrace(".<br><a href='index.php'>Back to query page</a>.");
174 mtrace('</pre>');
a25a3912 175
176 //finished, turn busy flag off
177 set_config("search_indexer_busy", 0);
682d4032 178
179?>