global search MDL-25009 indexing failures in each module are now handled so that...
[moodle.git] / search / indexer.php
CommitLineData
682d4032 1<?php
2f338ab5 2/**
3* Global Search Engine for Moodle
3319ef85 4*
5* @package search
6* @category core
7* @subpackage search_engine
8* @author Michael Champanis (mchampan) [cynnical@gmail.com], Valery Fremaux [valery.fremaux@club-internet.fr] > 1.8
9* @date 2008/03/31
63c13a82 10* @version prepared for Moodle 2.0
3319ef85 11* @license http://www.gnu.org/copyleft/gpl.html GNU Public License
2f338ab5 12*
13* The indexer logic -
14*
15* Look through each installed module's or block's search document class file (/search/documents)
16* for necessary search functions, and if they're present add the content to the index.
17* Repeat this for blocks.
18*
19* Because the iterator/retrieval functions are now stored in /search/documents/<mod>_document.php,
20* /mod/mod/lib.php doesn't have to be modified - and thus the search module becomes quite
21* self-sufficient. URL's are now stored in the index, stopping us from needing to require
22* the class files to generate a results page.
23*
24* Along with the index data, each document's summary gets stored in the database
25* and synchronised to the index (flat file) via the primary key ('id') which is mapped
26* to the 'dbid' field in the index
27* */
28
2f338ab5 29
3319ef85 30/**
31* includes and requires
32*/
2f338ab5 33require_once('../config.php');
63c13a82 34require_once($CFG->dirroot.'/search/lib.php');
e3c7f155 35
6cd2c7f2
AB
36//this'll take some time, set up the environment
37@set_time_limit(0);
38@ob_implicit_flush(true);
39@ob_end_flush();
40
d8f209e8 41 ini_set('include_path', $CFG->dirroot.DIRECTORY_SEPARATOR.'search'.PATH_SEPARATOR.ini_get('include_path'));
2f338ab5 42
3319ef85 43/// only administrators can index the moodle installation, because access to all pages is required
eef868d1 44
3319ef85 45 require_login();
63c13a82 46
3319ef85 47 if (empty($CFG->enableglobalsearch)) {
32487831 48 print_error('globalsearchdisabled', 'search');
3319ef85 49 }
63c13a82 50
4f0c2d00 51 if (!has_capability('moodle/site:config', get_context_instance(CONTEXT_SYSTEM))) {
93f66983 52 print_error('beadmin', 'search', get_login_url());
32487831 53 }
54
3319ef85 55/// confirmation flag to prevent accidental reindexing (indexersplash.php is the correct entry point)
eef868d1 56
3319ef85 57 $sure = strtolower(optional_param('areyousure', '', PARAM_ALPHA));
63c13a82 58
3319ef85 59 if ($sure != 'yes') {
60 mtrace("<pre>Sorry, you need to confirm indexing via <a href='indexersplash.php'>indexersplash.php</a>"
61 .". (<a href='index.php'>Back to query page</a>).</pre>");
63c13a82 62
3319ef85 63 exit(0);
64 }
63c13a82 65
e3c7f155 66/// check for php5 (lib.php)
67
3319ef85 68 //php5 found, continue including php5-only files
69 //require_once("$CFG->dirroot/search/Zend/Search/Lucene.php");
63c13a82 70 require_once($CFG->dirroot.'/search/indexlib.php');
71
3319ef85 72 mtrace('<html><head><meta http-equiv="content-type" content="text/html; charset=utf-8" /></head><body>');
73 mtrace('<pre>Server Time: '.date('r',time())."\n");
63c13a82 74
3319ef85 75 if (isset($CFG->search_indexer_busy) && $CFG->search_indexer_busy == '1') {
76 //means indexing was not finished previously
77 mtrace("Warning: Indexing was not successfully completed last time, restarting.\n");
78 }
63c13a82 79
3319ef85 80/// turn on busy flag
eef868d1 81
3319ef85 82 set_config('search_indexer_busy', '1');
63c13a82 83
3319ef85 84 //paths
85 $index_path = SEARCH_INDEX_PATH;
86 $index_db_file = "{$CFG->dirroot}/search/db/$CFG->dbtype.sql";
87 $dbcontrol = new IndexDBControl();
63c13a82 88
3319ef85 89/// setup directory in data root
90
91 if (!file_exists($index_path)) {
92 mtrace("Data directory ($index_path) does not exist, attempting to create.");
c9a433cd 93 if (!mkdir($index_path, $CFG->directorypermissions)) {
3319ef85 94 search_pexit("Error creating data directory at: $index_path. Please correct.");
63c13a82 95 }
e3c7f155 96 else {
3319ef85 97 mtrace("Directory successfully created.");
63c13a82 98 }
99 }
e3c7f155 100 else {
63c13a82 101 mtrace("Using {$index_path} as data directory.");
102 }
103
104 Zend_Search_Lucene_Analysis_Analyzer::setDefault(new Zend_Search_Lucene_Analysis_Analyzer_Common_Utf8_CaseInsensitive());
3319ef85 105 $index = new Zend_Search_Lucene($index_path, true);
63c13a82 106
3319ef85 107/// New regeneration
b585dc5f 108
3319ef85 109 mtrace('Deleting old index entries.');
63c13a82 110 $DB->delete_records(SEARCH_DATABASE_TABLE);
111
3319ef85 112/// begin timer
eef868d1 113
3319ef85 114 search_stopwatch();
115 mtrace("Starting activity modules\n");
63c13a82 116
3319ef85 117 //the presence of the required search functions -
118 // * mod_iterator
119 // * mod_get_content_for_index
120 //are the sole basis for including a module in the index at the moment.
eef868d1 121
e3c7f155 122 $searchables = search_collect_searchables();
63c13a82 123
e3c7f155 124/// start indexation
3319ef85 125
3319ef85 126 if ($searchables){
127 foreach ($searchables as $mod) {
7ee2741b
AB
128
129 //mark last update times for mods to now.
130 $indexdatestring = 'search_indexer_update_date_'.$mod->name;
131 set_config($indexdatestring, time());
132 $indexdatestring = 'search_indexer_run_date_'.$mod->name;
133 set_config($indexdatestring, time());
63c13a82 134
135 mtrace("starting indexing {$mod->name}\n");
136
e3c7f155 137 $key = 'search_in_'.$mod->name;
138 if (isset($CFG->$key) && !$CFG->$key) {
139 mtrace("module $key has been administratively disabled. Skipping...\n");
140 continue;
141 }
63c13a82 142
e3c7f155 143 if ($mod->location == 'internal'){
144 $class_file = $CFG->dirroot.'/search/documents/'.$mod->name.'_document.php';
145 } else {
146 $class_file = $CFG->dirroot.'/'.$mod->location.'/'.$mod->name.'/search_document.php';
147 }
63c13a82 148
3319ef85 149 if (file_exists($class_file)) {
150 include_once($class_file);
63c13a82 151
3319ef85 152 //build function names
153 $iter_function = $mod->name.'_iterator';
154 $index_function = $mod->name.'_get_content_for_index';
155 $counter = 0;
156 if (function_exists($index_function) && function_exists($iter_function)) {
157 mtrace("Processing module function $index_function ...");
158 $sources = $iter_function();
159 if ($sources){
160 foreach ($sources as $i) {
161 $documents = $index_function($i);
63c13a82 162
3319ef85 163 //begin transaction
164 if ($documents){
165 foreach($documents as $document) {
166 $counter++;
6cd2c7f2
AB
167
168 // temporary fix until MDL-24822 is resolved
169 if ($document->group_id == -1 and $mod->name ='forum') {
170 $document->group_id = 0;
171 }
3319ef85 172 //object to insert into db
173 $dbid = $dbcontrol->addDocument($document);
63c13a82 174
3319ef85 175 //synchronise db with index
176 $document->addField(Zend_Search_Lucene_Field::Keyword('dbid', $dbid));
63c13a82 177
3319ef85 178 //add document to index
179 $index->addDocument($document);
63c13a82 180
3319ef85 181 //commit every x new documents, and print a status message
182 if (($counter % 2000) == 0) {
183 $index->commit();
184 mtrace(".. $counter");
63c13a82 185 }
3319ef85 186 }
2f338ab5 187 }
3319ef85 188 //end transaction
2f338ab5 189 }
2f338ab5 190 }
63c13a82 191
3319ef85 192 //commit left over documents, and finish up
193 $index->commit();
63c13a82 194
3319ef85 195 mtrace("-- $counter documents indexed");
196 mtrace("done.\n");
2f338ab5 197 }
e3c7f155 198 } else {
199 mtrace ("No search document found for plugin {$mod->name}. Ignoring.");
2f338ab5 200 }
201 }
202 }
63c13a82 203
3319ef85 204/// finished modules
205
206 mtrace('Finished activity modules');
207 search_stopwatch();
63c13a82 208
3319ef85 209 mtrace(".<br/><a href='index.php'>Back to query page</a>.");
210 mtrace('</pre>');
63c13a82 211
3319ef85 212/// finished, turn busy flag off
2f338ab5 213
3319ef85 214 set_config('search_indexer_busy', '0');
63c13a82 215
3319ef85 216/// mark the time we last updated
2f338ab5 217
3319ef85 218 set_config('search_indexer_run_date', time());
8dc68e3e 219
3319ef85 220/// and the index size
2f338ab5 221
3319ef85 222 set_config('search_index_size', (int)$index->count());
eef868d1 223
63c13a82 224?>