Merge branch 'MDL-62899-search-icons-master' of https://github.com/dmitriim/moodle
[moodle.git] / search / classes / engine.php
CommitLineData
db48207e
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Base class for search engines.
19 *
20 * All search engines must extend this class.
21 *
22 * @package core_search
23 * @copyright 2015 Daniel Neis
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 */
26
27namespace core_search;
28
29defined('MOODLE_INTERNAL') || die();
30
31/**
32 * Base class for search engines.
33 *
34 * All search engines must extend this class.
35 *
36 * @package core_search
37 * @copyright 2015 Daniel Neis
38 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
39 */
40abstract class engine {
41
42 /**
43 * The search engine configuration.
44 *
4359ef18 45 * @var \stdClass
db48207e
DM
46 */
47 protected $config = null;
48
49 /**
50 * Last executed query error, if there was any.
51 * @var string
52 */
53 protected $queryerror = null;
54
55 /**
56 * @var array Internal cache.
57 */
58 protected $cachedareas = array();
59
60 /**
61 * @var array Internal cache.
62 */
63 protected $cachedcourses = array();
64
65 /**
66 * User data required to show their fullnames. Indexed by userid.
67 *
4359ef18 68 * @var \stdClass[]
db48207e
DM
69 */
70 protected static $cachedusers = array();
71
72 /**
73 * @var string Frankenstyle plugin name.
74 */
75 protected $pluginname = null;
76
73fd5666 77 /**
78 * @var bool If true, should skip schema validity check when checking the search engine is ready
79 */
80 protected $skipschemacheck = false;
81
db48207e
DM
82 /**
83 * Initialises the search engine configuration.
84 *
85 * Search engine availability should be checked separately.
86 *
db48207e
DM
87 * @return void
88 */
89 public function __construct() {
90
91 $classname = get_class($this);
92 if (strpos($classname, '\\') === false) {
93 throw new \coding_exception('"' . $classname . '" class should specify its component namespace and it should be named engine.');
94 } else if (strpos($classname, '_') === false) {
95 throw new \coding_exception('"' . $classname . '" class namespace should be its frankenstyle name');
96 }
97
98 // This is search_xxxx config.
99 $this->pluginname = substr($classname, 0, strpos($classname, '\\'));
100 if ($config = get_config($this->pluginname)) {
101 $this->config = $config;
102 } else {
103 $this->config = new stdClass();
104 }
105 }
106
107 /**
108 * Returns a course instance checking internal caching.
109 *
110 * @param int $courseid
111 * @return stdClass
112 */
113 protected function get_course($courseid) {
114 if (!empty($this->cachedcourses[$courseid])) {
115 return $this->cachedcourses[$courseid];
116 }
117
118 // No need to clone, only read.
119 $this->cachedcourses[$courseid] = get_course($courseid, false);
120
121 return $this->cachedcourses[$courseid];
122 }
123
124 /**
125 * Returns user data checking the internal static cache.
126 *
127 * Including here the minimum required user information as this may grow big.
128 *
129 * @param int $userid
130 * @return stdClass
131 */
132 public function get_user($userid) {
133 global $DB;
134
135 if (empty(self::$cachedusers[$userid])) {
136 $fields = get_all_user_name_fields(true);
137 self::$cachedusers[$userid] = $DB->get_record('user', array('id' => $userid), 'id, ' . $fields);
138 }
139 return self::$cachedusers[$userid];
140 }
141
26b86f31
DM
142 /**
143 * Clears the users cache.
144 *
145 * @return null
146 */
147 public static function clear_users_cache() {
148 self::$cachedusers = [];
149 }
150
db48207e
DM
151 /**
152 * Returns a search instance of the specified area checking internal caching.
153 *
154 * @param string $areaid Area id
0bd8383a 155 * @return \core_search\base
db48207e
DM
156 */
157 protected function get_search_area($areaid) {
158
159 if (isset($this->cachedareas[$areaid]) && $this->cachedareas[$areaid] === false) {
160 // We already checked that area and it is not available.
161 return false;
162 }
163
164 if (!isset($this->cachedareas[$areaid])) {
165 // First result that matches this area.
166
167 $this->cachedareas[$areaid] = \core_search\manager::get_search_area($areaid);
168 if ($this->cachedareas[$areaid] === false) {
169 // The area does not exist or it is not available any more.
170
171 $this->cachedareas[$areaid] = false;
172 return false;
173 }
174
175 if (!$this->cachedareas[$areaid]->is_enabled()) {
176 // We skip the area if it is not enabled.
177
178 // Marking it as false so next time we don' need to check it again.
179 $this->cachedareas[$areaid] = false;
180
181 return false;
182 }
183 }
184
185 return $this->cachedareas[$areaid];
186 }
187
188 /**
189 * Returns a document instance prepared to be rendered.
190 *
0bd8383a 191 * @param \core_search\base $searcharea
db48207e
DM
192 * @param array $docdata
193 * @return \core_search\document
194 */
0bd8383a 195 protected function to_document(\core_search\base $searcharea, $docdata) {
db48207e
DM
196
197 list($componentname, $areaname) = \core_search\manager::extract_areaid_parts($docdata['areaid']);
198 $doc = \core_search\document_factory::instance($docdata['itemid'], $componentname, $areaname, $this);
199 $doc->set_data_from_engine($docdata);
200 $doc->set_doc_url($searcharea->get_doc_url($doc));
201 $doc->set_context_url($searcharea->get_context_url($doc));
66f145ef 202 $doc->set_doc_icon($searcharea->get_doc_icon($doc));
db48207e
DM
203
204 // Uses the internal caches to get required data needed to render the document later.
205 $course = $this->get_course($doc->get('courseid'));
206 $doc->set_extra('coursefullname', $course->fullname);
207
208 if ($doc->is_set('userid')) {
209 $user = $this->get_user($doc->get('userid'));
210 $doc->set_extra('userfullname', fullname($user));
211 }
212
213 return $doc;
214 }
215
0a9a10f0
MP
216 /**
217 * Loop through given iterator of search documents
218 * and and have the search engine back end add them
219 * to the index.
220 *
221 * @param iterator $iterator the iterator of documents to index
222 * @param searcharea $searcharea the area for the documents to index
223 * @param array $options document indexing options
224 * @return array Processed document counts
225 */
226 public function add_documents($iterator, $searcharea, $options) {
227 $numrecords = 0;
228 $numdocs = 0;
229 $numdocsignored = 0;
230 $lastindexeddoc = 0;
67d64795 231 $firstindexeddoc = 0;
232 $partial = false;
1b8cf12a 233 $lastprogress = manager::get_current_time();
0a9a10f0
MP
234
235 foreach ($iterator as $document) {
67d64795 236 // Stop if we have exceeded the time limit (and there are still more items). Always
237 // do at least one second's worth of documents otherwise it will never make progress.
238 if ($lastindexeddoc !== $firstindexeddoc &&
82735dec 239 !empty($options['stopat']) && manager::get_current_time() >= $options['stopat']) {
67d64795 240 $partial = true;
241 break;
242 }
243
0a9a10f0
MP
244 if (!$document instanceof \core_search\document) {
245 continue;
246 }
247
4ba11aa9 248 if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
0a9a10f0
MP
249 // If we have never indexed this area before, it must be new.
250 $document->set_is_new(true);
251 }
252
253 if ($options['indexfiles']) {
254 // Attach files if we are indexing.
255 $searcharea->attach_files($document);
256 }
257
258 if ($this->add_document($document, $options['indexfiles'])) {
259 $numdocs++;
260 } else {
261 $numdocsignored++;
262 }
263
264 $lastindexeddoc = $document->get('modified');
67d64795 265 if (!$firstindexeddoc) {
266 $firstindexeddoc = $lastindexeddoc;
267 }
0a9a10f0 268 $numrecords++;
1b8cf12a 269
270 // If indexing the area takes a long time, periodically output progress information.
271 if (isset($options['progress'])) {
272 $now = manager::get_current_time();
273 if ($now - $lastprogress >= manager::DISPLAY_INDEXING_PROGRESS_EVERY) {
274 $lastprogress = $now;
275 // The first date format is the same used in cron_trace_time_and_memory().
276 $options['progress']->output(date('H:i:s', $now) . ': Done to ' . userdate(
277 $lastindexeddoc, get_string('strftimedatetimeshort', 'langconfig')), 1);
278 }
279 }
0a9a10f0
MP
280 }
281
67d64795 282 return array($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial);
0a9a10f0
MP
283 }
284
db48207e
DM
285 /**
286 * Returns the plugin name.
287 *
288 * @return string Frankenstyle plugin name.
289 */
290 public function get_plugin_name() {
291 return $this->pluginname;
292 }
293
294 /**
295 * Gets the document class used by this search engine.
296 *
297 * Search engines can overwrite \core_search\document with \search_ENGINENAME\document class.
298 *
299 * Looks for a document class in the current search engine namespace, falling back to \core_search\document.
300
301 * Publicly available because search areas do not have access to the engine details,
302 * \core_search\document_factory accesses this function.
303 *
304 * @return string
305 */
306 public function get_document_classname() {
307 $classname = $this->pluginname . '\\document';
308 if (!class_exists($classname)) {
309 $classname = '\\core_search\\document';
310 }
311 return $classname;
312 }
313
075fa912
EM
314 /**
315 * Run any pre-indexing operations.
316 *
317 * Should be overwritten if the search engine needs to do any pre index preparation.
318 *
319 * @param bool $fullindex True if a full index will be performed
320 * @return void
321 */
322 public function index_starting($fullindex = false) {
323 // Nothing by default.
324 }
325
bf2235bb
EM
326 /**
327 * Run any post indexing operations.
328 *
329 * Should be overwritten if the search engine needs to do any post index cleanup.
330 *
331 * @param int $numdocs The number of documents that were added to the index
332 * @param bool $fullindex True if a full index was performed
333 * @return void
334 */
335 public function index_complete($numdocs = 0, $fullindex = false) {
336 // Nothing by default.
337 }
338
075fa912
EM
339 /**
340 * Do anything that may need to be done before an area is indexed.
341 *
0bd8383a 342 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
343 * @param bool $fullindex True if a full index is being performed
344 * @return void
345 */
346 public function area_index_starting($searcharea, $fullindex = false) {
347 // Nothing by default.
348 }
349
350 /**
351 * Do any area cleanup needed, and do anything to confirm contents.
352 *
353 * Return false to prevent the search area completed time and stats from being updated.
354 *
0bd8383a 355 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
356 * @param int $numdocs The number of documents that were added to the index
357 * @param bool $fullindex True if a full index is being performed
358 * @return bool True means that data is considered indexed
359 */
360 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
361 return true;
362 }
363
db48207e
DM
364 /**
365 * Optimizes the search engine.
366 *
367 * Should be overwritten if the search engine can optimize its contents.
368 *
369 * @return void
370 */
371 public function optimize() {
372 // Nothing by default.
373 }
374
375 /**
376 * Does the system satisfy all the requirements.
377 *
378 * Should be overwritten if the search engine has any system dependencies
379 * that needs to be checked.
380 *
381 * @return bool
382 */
383 public function is_installed() {
384 return true;
385 }
386
387 /**
388 * Returns any error reported by the search engine when executing the provided query.
389 *
390 * It should be called from static::execute_query when an exception is triggered.
391 *
392 * @return string
393 */
394 public function get_query_error() {
395 return $this->queryerror;
396 }
397
053118a1
EM
398 /**
399 * Returns the total number of documents available for the most recent call to execute_query.
400 *
401 * This can be an estimate, but should get more accurate the higher the limited passed to execute_query is.
402 * To do that, the engine can use (actual result returned count + count of unchecked documents), or
403 * (total possible docs - docs that have been checked and rejected).
404 *
405 * Engine can limit to manager::MAX_RESULTS if there is cost to determining more.
406 * If this cannot be computed in a reasonable way, manager::MAX_RESULTS may be returned.
407 *
408 * @return int
409 */
410 abstract public function get_query_total_count();
411
091973db
EM
412 /**
413 * Return true if file indexing is supported and enabled. False otherwise.
414 *
415 * @return bool
416 */
417 public function file_indexing_enabled() {
418 return false;
419 }
420
db48207e
DM
421 /**
422 * Clears the current query error value.
423 *
424 * @return void
425 */
426 public function clear_query_error() {
427 $this->queryerror = null;
428 }
429
430 /**
431 * Is the server ready to use?
432 *
433 * This should also check that the search engine configuration is ok.
434 *
73fd5666 435 * If the function $this->should_skip_schema_check() returns true, then this function may leave
436 * out time-consuming checks that the schema is valid. (This allows for improved performance on
437 * critical pages such as the main search form.)
438 *
db48207e
DM
439 * @return true|string Returns true if all good or an error string.
440 */
441 abstract function is_server_ready();
442
73fd5666 443 /**
444 * Tells the search engine to skip any time-consuming checks that it might do as part of the
445 * is_server_ready function, and only carry out a basic check that it can contact the server.
446 *
447 * This setting is not remembered and applies only to the current request.
448 *
449 * @since Moodle 3.5
450 * @param bool $skip True to skip the checks, false to start checking again
451 */
452 public function skip_schema_check($skip = true) {
453 $this->skipschemacheck = $skip;
454 }
455
456 /**
457 * For use by subclasses. The engine can call this inside is_server_ready to check whether it
458 * should skip time-consuming schema checks.
459 *
460 * @since Moodle 3.5
461 * @return bool True if schema checks should be skipped
462 */
463 protected function should_skip_schema_check() {
464 return $this->skipschemacheck;
465 }
466
db48207e
DM
467 /**
468 * Adds a document to the search engine.
469 *
091973db
EM
470 * @param document $document
471 * @param bool $fileindexing True if file indexing is to be used
472 * @return bool False if the file was skipped or failed, true on success
db48207e 473 */
091973db 474 abstract function add_document($document, $fileindexing = false);
db48207e 475
db48207e
DM
476 /**
477 * Executes the query on the engine.
478 *
69d66020 479 * Implementations of this function should check user context array to limit the results to contexts where the
f6b425e2 480 * user have access. They should also limit the owneruserid field to manger::NO_OWNER_ID or the current user's id.
053118a1
EM
481 * Engines must use area->check_access() to confirm user access.
482 *
483 * Engines should reasonably attempt to fill up to limit with valid results if they are available.
69d66020 484 *
4359ef18 485 * The $filters object may include the following fields (optional except q):
486 * - q: value of main search field; results should include this text
487 * - title: if included, title must match this search
488 * - areaids: array of search area id strings (only these areas will be searched)
489 * - courseids: array of course ids (only these courses will be searched)
490 * - groupids: array of group ids (only results specifically from these groupids will be
491 * searched) - this option will be ignored if the search engine doesn't support groups
492 *
493 * The $accessinfo parameter has two different values (for historical compatibility). If the
494 * engine returns false to supports_group_filtering then it is an array of user contexts, or
495 * true if the user can access all contexts. (This parameter used to be called $usercontexts.)
496 * If the engine returns true to supports_group_filtering then it will be an object containing
497 * these fields:
498 * - everything (true if admin is searching with no restrictions)
499 * - usercontexts (same as above)
500 * - separategroupscontexts (array of context ids where separate groups are used)
501 * - visiblegroupscontextsareas (array of subset of those where some areas use visible groups)
502 * - usergroups (array of relevant group ids that user belongs to)
503 *
504 * The engine should apply group restrictions to those contexts listed in the
505 * 'separategroupscontexts' array. In these contexts, it shouled only include results if the
506 * groupid is not set, or if the groupid matches one of the values in USER_GROUPS array, or
507 * if the search area is one of those listed in 'visiblegroupscontextsareas' for that context.
508 *
509 * @param \stdClass $filters Query and filters to apply.
510 * @param \stdClass $accessinfo Information about the contexts the user can access
053118a1 511 * @param int $limit The maximum number of results to return. If empty, limit to manager::MAX_RESULTS.
db48207e
DM
512 * @return \core_search\document[] Results or false if no results
513 */
4359ef18 514 public abstract function execute_query($filters, $accessinfo, $limit = 0);
db48207e
DM
515
516 /**
517 * Delete all documents.
518 *
519 * @param string $areaid To filter by area
520 * @return void
521 */
522 abstract function delete($areaid = null);
4359ef18 523
524 /**
525 * Checks that the schema is the latest version. If the version stored in config does not match
526 * the current, this function will attempt to upgrade the schema.
527 *
528 * @return bool|string True if schema is OK, a string if user needs to take action
529 */
530 public function check_latest_schema() {
531 if (empty($this->config->schemaversion)) {
532 $currentversion = 0;
533 } else {
534 $currentversion = $this->config->schemaversion;
535 }
536 if ($currentversion < document::SCHEMA_VERSION) {
537 return $this->update_schema((int)$currentversion, (int)document::SCHEMA_VERSION);
538 } else {
539 return true;
540 }
541 }
542
543 /**
544 * Usually called by the engine; marks that the schema has been updated.
545 *
546 * @param int $version Records the schema version now applied
547 */
548 public function record_applied_schema_version($version) {
549 set_config('schemaversion', $version, $this->pluginname);
550 }
551
552 /**
553 * Requests the search engine to upgrade the schema. The engine should update the schema if
554 * possible/necessary, and should ensure that record_applied_schema_version is called as a
555 * result.
556 *
557 * If it is not possible to upgrade the schema at the moment, it can do nothing and return; the
558 * function will be called again next time search is initialised.
559 *
560 * The default implementation just returns, with a DEBUG_DEVELOPER warning.
561 *
562 * @param int $oldversion Old schema version
563 * @param int $newversion New schema version
564 * @return bool|string True if schema is updated successfully, a string if it needs updating manually
565 */
566 protected function update_schema($oldversion, $newversion) {
567 debugging('Unable to update search engine schema: ' . $this->pluginname, DEBUG_DEVELOPER);
568 return get_string('schemanotupdated', 'search');
569 }
570
571 /**
572 * Checks if this search engine supports groups.
573 *
574 * Note that returning true to this function causes the parameters to execute_query to be
575 * passed differently!
576 *
577 * In order to implement groups and return true to this function, the search engine should:
578 *
579 * 1. Handle the fields ->separategroupscontexts and ->usergroups in the $accessinfo parameter
580 * to execute_query (ideally, using these to automatically restrict search results).
581 * 2. Support the optional groupids parameter in the $filter parameter for execute_query to
582 * restrict results to only those where the stored groupid matches the given value.
583 *
584 * @return bool True if this engine supports searching by group id field
585 */
586 public function supports_group_filtering() {
587 return false;
588 }
fc440796 589
590 /**
591 * Obtain a list of results orders (and names for them) that are supported by this
592 * search engine in the given context.
593 *
594 * By default, engines sort by relevance only.
595 *
596 * @param \context $context Context that the user requested search from
597 * @return array Array from order name => display text
598 */
599 public function get_supported_orders(\context $context) {
600 return ['relevance' => get_string('order_relevance', 'search')];
601 }
222a97ce 602
603 /**
604 * Checks if the search engine supports searching by user.
605 *
606 * If it returns true to this function, the search engine should support the 'userids' option
607 * in the $filters value passed to execute_query(), returning only items where the userid in
608 * the search document matches one of those user ids.
609 *
610 * @return bool True if the search engine supports searching by user
611 */
612 public function supports_users() {
613 return false;
614 }
db48207e 615}