MDL-61102 core_search: Improve performance by reducing schema checks
[moodle.git] / search / classes / engine.php
CommitLineData
db48207e
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Base class for search engines.
19 *
20 * All search engines must extend this class.
21 *
22 * @package core_search
23 * @copyright 2015 Daniel Neis
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 */
26
27namespace core_search;
28
29defined('MOODLE_INTERNAL') || die();
30
31/**
32 * Base class for search engines.
33 *
34 * All search engines must extend this class.
35 *
36 * @package core_search
37 * @copyright 2015 Daniel Neis
38 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
39 */
40abstract class engine {
41
42 /**
43 * The search engine configuration.
44 *
4359ef18 45 * @var \stdClass
db48207e
DM
46 */
47 protected $config = null;
48
49 /**
50 * Last executed query error, if there was any.
51 * @var string
52 */
53 protected $queryerror = null;
54
55 /**
56 * @var array Internal cache.
57 */
58 protected $cachedareas = array();
59
60 /**
61 * @var array Internal cache.
62 */
63 protected $cachedcourses = array();
64
65 /**
66 * User data required to show their fullnames. Indexed by userid.
67 *
4359ef18 68 * @var \stdClass[]
db48207e
DM
69 */
70 protected static $cachedusers = array();
71
72 /**
73 * @var string Frankenstyle plugin name.
74 */
75 protected $pluginname = null;
76
73fd5666 77 /**
78 * @var bool If true, should skip schema validity check when checking the search engine is ready
79 */
80 protected $skipschemacheck = false;
81
db48207e
DM
82 /**
83 * Initialises the search engine configuration.
84 *
85 * Search engine availability should be checked separately.
86 *
db48207e
DM
87 * @return void
88 */
89 public function __construct() {
90
91 $classname = get_class($this);
92 if (strpos($classname, '\\') === false) {
93 throw new \coding_exception('"' . $classname . '" class should specify its component namespace and it should be named engine.');
94 } else if (strpos($classname, '_') === false) {
95 throw new \coding_exception('"' . $classname . '" class namespace should be its frankenstyle name');
96 }
97
98 // This is search_xxxx config.
99 $this->pluginname = substr($classname, 0, strpos($classname, '\\'));
100 if ($config = get_config($this->pluginname)) {
101 $this->config = $config;
102 } else {
103 $this->config = new stdClass();
104 }
105 }
106
107 /**
108 * Returns a course instance checking internal caching.
109 *
110 * @param int $courseid
111 * @return stdClass
112 */
113 protected function get_course($courseid) {
114 if (!empty($this->cachedcourses[$courseid])) {
115 return $this->cachedcourses[$courseid];
116 }
117
118 // No need to clone, only read.
119 $this->cachedcourses[$courseid] = get_course($courseid, false);
120
121 return $this->cachedcourses[$courseid];
122 }
123
124 /**
125 * Returns user data checking the internal static cache.
126 *
127 * Including here the minimum required user information as this may grow big.
128 *
129 * @param int $userid
130 * @return stdClass
131 */
132 public function get_user($userid) {
133 global $DB;
134
135 if (empty(self::$cachedusers[$userid])) {
136 $fields = get_all_user_name_fields(true);
137 self::$cachedusers[$userid] = $DB->get_record('user', array('id' => $userid), 'id, ' . $fields);
138 }
139 return self::$cachedusers[$userid];
140 }
141
142 /**
143 * Returns a search instance of the specified area checking internal caching.
144 *
145 * @param string $areaid Area id
0bd8383a 146 * @return \core_search\base
db48207e
DM
147 */
148 protected function get_search_area($areaid) {
149
150 if (isset($this->cachedareas[$areaid]) && $this->cachedareas[$areaid] === false) {
151 // We already checked that area and it is not available.
152 return false;
153 }
154
155 if (!isset($this->cachedareas[$areaid])) {
156 // First result that matches this area.
157
158 $this->cachedareas[$areaid] = \core_search\manager::get_search_area($areaid);
159 if ($this->cachedareas[$areaid] === false) {
160 // The area does not exist or it is not available any more.
161
162 $this->cachedareas[$areaid] = false;
163 return false;
164 }
165
166 if (!$this->cachedareas[$areaid]->is_enabled()) {
167 // We skip the area if it is not enabled.
168
169 // Marking it as false so next time we don' need to check it again.
170 $this->cachedareas[$areaid] = false;
171
172 return false;
173 }
174 }
175
176 return $this->cachedareas[$areaid];
177 }
178
179 /**
180 * Returns a document instance prepared to be rendered.
181 *
0bd8383a 182 * @param \core_search\base $searcharea
db48207e
DM
183 * @param array $docdata
184 * @return \core_search\document
185 */
0bd8383a 186 protected function to_document(\core_search\base $searcharea, $docdata) {
db48207e
DM
187
188 list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($docdata['areaid']);
189 $doc = \core_search\document_factory::instance($docdata['itemid'], $componentname, $areaname, $this);
190 $doc->set_data_from_engine($docdata);
191 $doc->set_doc_url($searcharea->get_doc_url($doc));
192 $doc->set_context_url($searcharea->get_context_url($doc));
193
194 // Uses the internal caches to get required data needed to render the document later.
195 $course = $this->get_course($doc->get('courseid'));
196 $doc->set_extra('coursefullname', $course->fullname);
197
198 if ($doc->is_set('userid')) {
199 $user = $this->get_user($doc->get('userid'));
200 $doc->set_extra('userfullname', fullname($user));
201 }
202
203 return $doc;
204 }
205
0a9a10f0
MP
206 /**
207 * Loop through given iterator of search documents
208 * and and have the search engine back end add them
209 * to the index.
210 *
211 * @param iterator $iterator the iterator of documents to index
212 * @param searcharea $searcharea the area for the documents to index
213 * @param array $options document indexing options
214 * @return array Processed document counts
215 */
216 public function add_documents($iterator, $searcharea, $options) {
217 $numrecords = 0;
218 $numdocs = 0;
219 $numdocsignored = 0;
220 $lastindexeddoc = 0;
67d64795 221 $firstindexeddoc = 0;
222 $partial = false;
1b8cf12a 223 $lastprogress = manager::get_current_time();
0a9a10f0
MP
224
225 foreach ($iterator as $document) {
67d64795 226 // Stop if we have exceeded the time limit (and there are still more items). Always
227 // do at least one second's worth of documents otherwise it will never make progress.
228 if ($lastindexeddoc !== $firstindexeddoc &&
82735dec 229 !empty($options['stopat']) && manager::get_current_time() >= $options['stopat']) {
67d64795 230 $partial = true;
231 break;
232 }
233
0a9a10f0
MP
234 if (!$document instanceof \core_search\document) {
235 continue;
236 }
237
4ba11aa9 238 if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
0a9a10f0
MP
239 // If we have never indexed this area before, it must be new.
240 $document->set_is_new(true);
241 }
242
243 if ($options['indexfiles']) {
244 // Attach files if we are indexing.
245 $searcharea->attach_files($document);
246 }
247
248 if ($this->add_document($document, $options['indexfiles'])) {
249 $numdocs++;
250 } else {
251 $numdocsignored++;
252 }
253
254 $lastindexeddoc = $document->get('modified');
67d64795 255 if (!$firstindexeddoc) {
256 $firstindexeddoc = $lastindexeddoc;
257 }
0a9a10f0 258 $numrecords++;
1b8cf12a 259
260 // If indexing the area takes a long time, periodically output progress information.
261 if (isset($options['progress'])) {
262 $now = manager::get_current_time();
263 if ($now - $lastprogress >= manager::DISPLAY_INDEXING_PROGRESS_EVERY) {
264 $lastprogress = $now;
265 // The first date format is the same used in cron_trace_time_and_memory().
266 $options['progress']->output(date('H:i:s', $now) . ': Done to ' . userdate(
267 $lastindexeddoc, get_string('strftimedatetimeshort', 'langconfig')), 1);
268 }
269 }
0a9a10f0
MP
270 }
271
67d64795 272 return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
0a9a10f0
MP
273 }
274
db48207e
DM
275 /**
276 * Returns the plugin name.
277 *
278 * @return string Frankenstyle plugin name.
279 */
280 public function get_plugin_name() {
281 return $this->pluginname;
282 }
283
284 /**
285 * Gets the document class used by this search engine.
286 *
287 * Search engines can overwrite \core_search\document with \search_ENGINENAME\document class.
288 *
289 * Looks for a document class in the current search engine namespace, falling back to \core_search\document.
290
291 * Publicly available because search areas do not have access to the engine details,
292 * \core_search\document_factory accesses this function.
293 *
294 * @return string
295 */
296 public function get_document_classname() {
297 $classname = $this->pluginname . '\\document';
298 if (!class_exists($classname)) {
299 $classname = '\\core_search\\document';
300 }
301 return $classname;
302 }
303
075fa912
EM
304 /**
305 * Run any pre-indexing operations.
306 *
307 * Should be overwritten if the search engine needs to do any pre index preparation.
308 *
309 * @param bool $fullindex True if a full index will be performed
310 * @return void
311 */
312 public function index_starting($fullindex = false) {
313 // Nothing by default.
314 }
315
bf2235bb
EM
316 /**
317 * Run any post indexing operations.
318 *
319 * Should be overwritten if the search engine needs to do any post index cleanup.
320 *
321 * @param int $numdocs The number of documents that were added to the index
322 * @param bool $fullindex True if a full index was performed
323 * @return void
324 */
325 public function index_complete($numdocs = 0, $fullindex = false) {
326 // Nothing by default.
327 }
328
075fa912
EM
329 /**
330 * Do anything that may need to be done before an area is indexed.
331 *
0bd8383a 332 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
333 * @param bool $fullindex True if a full index is being performed
334 * @return void
335 */
336 public function area_index_starting($searcharea, $fullindex = false) {
337 // Nothing by default.
338 }
339
340 /**
341 * Do any area cleanup needed, and do anything to confirm contents.
342 *
343 * Return false to prevent the search area completed time and stats from being updated.
344 *
0bd8383a 345 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
346 * @param int $numdocs The number of documents that were added to the index
347 * @param bool $fullindex True if a full index is being performed
348 * @return bool True means that data is considered indexed
349 */
350 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
351 return true;
352 }
353
db48207e
DM
354 /**
355 * Optimizes the search engine.
356 *
357 * Should be overwritten if the search engine can optimize its contents.
358 *
359 * @return void
360 */
361 public function optimize() {
362 // Nothing by default.
363 }
364
365 /**
366 * Does the system satisfy all the requirements.
367 *
368 * Should be overwritten if the search engine has any system dependencies
369 * that needs to be checked.
370 *
371 * @return bool
372 */
373 public function is_installed() {
374 return true;
375 }
376
377 /**
378 * Returns any error reported by the search engine when executing the provided query.
379 *
380 * It should be called from static::execute_query when an exception is triggered.
381 *
382 * @return string
383 */
384 public function get_query_error() {
385 return $this->queryerror;
386 }
387
053118a1
EM
388 /**
389 * Returns the total number of documents available for the most recent call to execute_query.
390 *
391 * This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
392 * To do that, the engine can use (actual result returned count + count of unchecked documents), or
393 * (total possible docs - docs that have been checked and rejected).
394 *
395 * Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
396 * If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
397 *
398 * @return int
399 */
400 abstract public function get_query_total_count();
401
091973db
EM
402 /**
403 * Return true if file indexing is supported and enabled. False otherwise.
404 *
405 * @return bool
406 */
407 public function file_indexing_enabled() {
408 return false;
409 }
410
db48207e
DM
411 /**
412 * Clears the current query error value.
413 *
414 * @return void
415 */
416 public function clear_query_error() {
417 $this->queryerror = null;
418 }
419
420 /**
421 * Is the server ready to use?
422 *
423 * This should also check that the search engine configuration is ok.
424 *
73fd5666 425 * If the function $this->should_skip_schema_check() returns true, then this function may leave
426 * out time-consuming checks that the schema is valid. (This allows for improved performance on
427 * critical pages such as the main search form.)
428 *
db48207e
DM
429 * @return true|string Returns true if all good or an error string.
430 */
431 abstract function is_server_ready();
432
73fd5666 433 /**
434 * Tells the search engine to skip any time-consuming checks that it might do as part of the
435 * is_server_ready function, and only carry out a basic check that it can contact the server.
436 *
437 * This setting is not remembered and applies only to the current request.
438 *
439 * @since Moodle 3.5
440 * @param bool $skip True to skip the checks, false to start checking again
441 */
442 public function skip_schema_check($skip = true) {
443 $this->skipschemacheck = $skip;
444 }
445
446 /**
447 * For use by subclasses. The engine can call this inside is_server_ready to check whether it
448 * should skip time-consuming schema checks.
449 *
450 * @since Moodle 3.5
451 * @return bool True if schema checks should be skipped
452 */
453 protected function should_skip_schema_check() {
454 return $this->skipschemacheck;
455 }
456
db48207e
DM
457 /**
458 * Adds a document to the search engine.
459 *
091973db
EM
460 * @param document $document
461 * @param bool $fileindexing True if file indexing is to be used
462 * @return bool False if the file was skipped or failed, true on success
db48207e 463 */
091973db 464 abstract function add_document($document, $fileindexing = false);
db48207e 465
db48207e
DM
466 /**
467 * Executes the query on the engine.
468 *
69d66020 469 * Implementations of this function should check user context array to limit the results to contexts where the
f6b425e2 470 * user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
053118a1
EM
471 * Engines must use area->check_access() to confirm user access.
472 *
473 * Engines should reasonably attempt to fill up to limit with valid results if they are available.
69d66020 474 *
4359ef18 475 * The $filters object may include the following fields (optional except q):
476 * - q: value of main search field; results should include this text
477 * - title: if included, title must match this search
478 * - areaids: array of search area id strings (only these areas will be searched)
479 * - courseids: array of course ids (only these courses will be searched)
480 * - groupids: array of group ids (only results specifically from these groupids will be
481 * searched) - this option will be ignored if the search engine doesn't support groups
482 *
483 * The $accessinfo parameter has two different values (for historical compatibility). If the
484 * engine returns false to supports_group_filtering then it is an array of user contexts, or
485 * true if the user can access all contexts. (This parameter used to be called $usercontexts.)
486 * If the engine returns true to supports_group_filtering then it will be an object containing
487 * these fields:
488 * - everything (true if admin is searching with no restrictions)
489 * - usercontexts (same as above)
490 * - separategroupscontexts (array of context ids where separate groups are used)
491 * - visiblegroupscontextsareas (array of subset of those where some areas use visible groups)
492 * - usergroups (array of relevant group ids that user belongs to)
493 *
494 * The engine should apply group restrictions to those contexts listed in the
495 * 'separategroupscontexts' array. In these contexts, it shouled only include results if the
496 * groupid is not set, or if the groupid matches one of the values in USER_GROUPS array, or
497 * if the search area is one of those listed in 'visiblegroupscontextsareas' for that context.
498 *
499 * @param \stdClass $filters Query and filters to apply.
500 * @param \stdClass $accessinfo Information about the contexts the user can access
053118a1 501 * @param int $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
db48207e
DM
502 * @return \core_search\document[] Results or false if no results
503 */
4359ef18 504 public abstract function execute_query($filters, $accessinfo, $limit = 0);
db48207e
DM
505
506 /**
507 * Delete all documents.
508 *
509 * @param string $areaid To filter by area
510 * @return void
511 */
512 abstract function delete($areaid = null);
4359ef18 513
514 /**
515 * Checks that the schema is the latest version. If the version stored in config does not match
516 * the current, this function will attempt to upgrade the schema.
517 *
518 * @return bool|string True if schema is OK, a string if user needs to take action
519 */
520 public function check_latest_schema() {
521 if (empty($this->config->schemaversion)) {
522 $currentversion = 0;
523 } else {
524 $currentversion = $this->config->schemaversion;
525 }
526 if ($currentversion < document::SCHEMA_VERSION) {
527 return $this->update_schema((int)$currentversion, (int)document::SCHEMA_VERSION);
528 } else {
529 return true;
530 }
531 }
532
533 /**
534 * Usually called by the engine; marks that the schema has been updated.
535 *
536 * @param int $version Records the schema version now applied
537 */
538 public function record_applied_schema_version($version) {
539 set_config('schemaversion', $version, $this->pluginname);
540 }
541
542 /**
543 * Requests the search engine to upgrade the schema. The engine should update the schema if
544 * possible/necessary, and should ensure that record_applied_schema_version is called as a
545 * result.
546 *
547 * If it is not possible to upgrade the schema at the moment, it can do nothing and return; the
548 * function will be called again next time search is initialised.
549 *
550 * The default implementation just returns, with a DEBUG_DEVELOPER warning.
551 *
552 * @param int $oldversion Old schema version
553 * @param int $newversion New schema version
554 * @return bool|string True if schema is updated successfully, a string if it needs updating manually
555 */
556 protected function update_schema($oldversion, $newversion) {
557 debugging('Unable to update search engine schema: ' . $this->pluginname, DEBUG_DEVELOPER);
558 return get_string('schemanotupdated', 'search');
559 }
560
561 /**
562 * Checks if this search engine supports groups.
563 *
564 * Note that returning true to this function causes the parameters to execute_query to be
565 * passed differently!
566 *
567 * In order to implement groups and return true to this function, the search engine should:
568 *
569 * 1. Handle the fields ->separategroupscontexts and ->usergroups in the $accessinfo parameter
570 * to execute_query (ideally, using these to automatically restrict search results).
571 * 2. Support the optional groupids parameter in the $filter parameter for execute_query to
572 * restrict results to only those where the stored groupid matches the given value.
573 *
574 * @return bool True if this engine supports searching by group id field
575 */
576 public function supports_group_filtering() {
577 return false;
578 }
fc440796 579
580 /**
581 * Obtain a list of results orders (and names for them) that are supported by this
582 * search engine in the given context.
583 *
584 * By default, engines sort by relevance only.
585 *
586 * @param \context $context Context that the user requested search from
587 * @return array Array from order name => display text
588 */
589 public function get_supported_orders(\context $context) {
590 return ['relevance' => get_string('order_relevance', 'search')];
591 }
db48207e 592}