MDL-68729 Search: Allow query on one server while indexing another
[moodle.git] / search / engine / solr / classes / engine.php
CommitLineData
95c6aeaf
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Solr engine.
19 *
20 * @package search_solr
21 * @copyright 2015 Daniel Neis Araujo
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace search_solr;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Solr engine.
31 *
32 * @package search_solr
33 * @copyright 2015 Daniel Neis Araujo
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class engine extends \core_search\engine {
37
38 /**
39 * @var string The date format used by solr.
40 */
41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
42
43 /**
44 * @var int Commit documents interval (number of miliseconds).
45 */
46 const AUTOCOMMIT_WITHIN = 15000;
47
053118a1
EM
48 /**
49 * The maximum number of results to fetch at a time.
50 */
51 const QUERY_SIZE = 120;
52
95c6aeaf 53 /**
4894840d 54 * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.
95c6aeaf 55 */
4894840d
EM
56 const FRAG_SIZE = 510;
57
58 /**
59 * Marker for the start of a highlight.
60 */
61 const HIGHLIGHT_START = '@@HI_S@@';
62
63 /**
64 * Marker for the end of a highlight.
65 */
66 const HIGHLIGHT_END = '@@HI_E@@';
95c6aeaf 67
fc440796 68 /** @var float Boost value for matching course in location-ordered searches */
69 const COURSE_BOOST = 1;
70
71 /** @var float Boost value for matching context (in addition to course boost) */
72 const CONTEXT_BOOST = 0.5;
73
95c6aeaf
DM
74 /**
75 * @var \SolrClient
76 */
77 protected $client = null;
78
7a4a0bc8
EM
79 /**
80 * @var bool True if we should reuse SolrClients, false if not.
81 */
82 protected $cacheclient = true;
83
5dc4624c
EM
84 /**
85 * @var \curl Direct curl object.
86 */
87 protected $curl = null;
88
95c6aeaf
DM
89 /**
90 * @var array Fields that can be highlighted.
91 */
4894840d 92 protected $highlightfields = array('title', 'content', 'description1', 'description2');
95c6aeaf 93
053118a1
EM
94 /**
95 * @var int Number of total docs reported by Sorl for the last query.
96 */
97 protected $totalenginedocs = 0;
98
99 /**
100 * @var int Number of docs we have processed for the last query.
101 */
102 protected $processeddocs = 0;
103
104 /**
105 * @var int Number of docs that have been skipped while processing the last query.
106 */
107 protected $skippeddocs = 0;
108
895e0059
DM
109 /**
110 * Solr server major version.
111 *
112 * @var int
113 */
114 protected $solrmajorversion = null;
115
7a4a0bc8
EM
116 /**
117 * Initialises the search engine configuration.
118 *
679e8d8b 119 * @param bool $alternateconfiguration If true, use alternate configuration settings
7a4a0bc8
EM
120 * @return void
121 */
679e8d8b 122 public function __construct(bool $alternateconfiguration = false) {
123 parent::__construct($alternateconfiguration);
7a4a0bc8
EM
124
125 $curlversion = curl_version();
126 if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {
127 // There is a flaw with curl 7.35.0 that causes problems with client reuse.
128 $this->cacheclient = false;
129 }
130 }
131
95c6aeaf
DM
132 /**
133 * Prepares a Solr query, applies filters and executes it returning its results.
134 *
135 * @throws \core_search\engine_exception
4359ef18 136 * @param \stdClass $filters Containing query and filters.
137 * @param \stdClass $accessinfo Information about areas user can access.
053118a1 138 * @param int $limit The maximum number of results to return.
95c6aeaf
DM
139 * @return \core_search\document[] Results or false if no results
140 */
4359ef18 141 public function execute_query($filters, $accessinfo, $limit = 0) {
f6b425e2 142 global $USER;
95c6aeaf 143
053118a1
EM
144 if (empty($limit)) {
145 $limit = \core_search\manager::MAX_RESULTS;
146 }
95c6aeaf
DM
147
148 // If there is any problem we trigger the exception as soon as possible.
7a4a0bc8 149 $client = $this->get_search_client();
95c6aeaf 150
053118a1 151 // Create the query object.
4359ef18 152 $query = $this->create_user_query($filters, $accessinfo);
053118a1 153
cfa00fc5 154 // If the query cannot have results, return none.
155 if (!$query) {
156 return [];
157 }
158
053118a1
EM
159 // We expect good match rates, so for our first get, we will get a small number of records.
160 // This significantly speeds solr response time for first few pages.
161 $query->setRows(min($limit * 3, static::QUERY_SIZE));
162 $response = $this->get_query_response($query);
163
164 // Get count data out of the response, and reset our counters.
165 list($included, $found) = $this->get_response_counts($response);
166 $this->totalenginedocs = $found;
167 $this->processeddocs = 0;
168 $this->skippeddocs = 0;
169 if ($included == 0 || $this->totalenginedocs == 0) {
170 // No results.
171 return array();
172 }
173
174 // Get valid documents out of the response.
175 $results = $this->process_response($response, $limit);
176
177 // We have processed all the docs in the response at this point.
178 $this->processeddocs += $included;
179
180 // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.
181 while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {
182 // Offset the start of the query, and since we are making another call, get more per call.
183 $query->setStart($this->processeddocs);
184 $query->setRows(static::QUERY_SIZE);
185
186 $response = $this->get_query_response($query);
187 list($included, $found) = $this->get_response_counts($response);
188 if ($included == 0 || $found == 0) {
189 // No new results were found. Found being empty would be weird, so we will just return.
190 return $results;
191 }
192 $this->totalenginedocs = $found;
193
194 // Get the new response docs, limiting to remaining we need, then add it to the end of the results array.
195 $newdocs = $this->process_response($response, $limit - count($results));
196 $results = array_merge($results, $newdocs);
197
198 // Add to our processed docs count.
199 $this->processeddocs += $included;
200 }
201
202 return $results;
203 }
204
205 /**
206 * Takes a query and returns the response in SolrObject format.
207 *
208 * @param SolrQuery $query Solr query object.
209 * @return SolrObject|false Response document or false on error.
210 */
211 protected function get_query_response($query) {
212 try {
213 return $this->get_search_client()->query($query)->getResponse();
214 } catch (\SolrClientException $ex) {
215 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
216 $this->queryerror = $ex->getMessage();
217 return false;
218 } catch (\SolrServerException $ex) {
219 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
220 $this->queryerror = $ex->getMessage();
221 return false;
cd894f84 222 }
053118a1
EM
223 }
224
225 /**
226 * Returns the total number of documents available for the most recently call to execute_query.
227 *
228 * @return int
229 */
230 public function get_query_total_count() {
231 // Return the total engine count minus the docs we have determined are bad.
232 return $this->totalenginedocs - $this->skippeddocs;
233 }
234
235 /**
236 * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.
237 *
238 * @param SolrDocument $response The response document from Solr.
239 * @return array A two part array. First how many response docs are in the response.
240 * Second, how many results are vailable in the engine.
241 */
242 protected function get_response_counts($response) {
243 $found = 0;
244 $included = 0;
245
246 if (isset($response->grouped->solr_filegroupingid->ngroups)) {
247 // Get the number of results for file grouped queries.
248 $found = $response->grouped->solr_filegroupingid->ngroups;
249 $included = count($response->grouped->solr_filegroupingid->groups);
250 } else if (isset($response->response->numFound)) {
251 // Get the number of results for standard queries.
252 $found = $response->response->numFound;
93b22672
DM
253 if ($found > 0 && is_array($response->response->docs)) {
254 $included = count($response->response->docs);
255 }
cd894f84 256 }
053118a1
EM
257
258 return array($included, $found);
259 }
260
261 /**
262 * Prepares a new query object with needed limits, filters, etc.
263 *
4359ef18 264 * @param \stdClass $filters Containing query and filters.
265 * @param \stdClass $accessinfo Information about contexts the user can access
cfa00fc5 266 * @return \SolrDisMaxQuery|null Query object or null if they can't get any results
053118a1 267 */
4359ef18 268 protected function create_user_query($filters, $accessinfo) {
053118a1
EM
269 global $USER;
270
271 // Let's keep these changes internal.
272 $data = clone $filters;
273
274 $query = new \SolrDisMaxQuery();
275
c2072891 276 $this->set_query($query, self::replace_underlines($data->q));
95c6aeaf
DM
277 $this->add_fields($query);
278
279 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
280 // we are really interested in caching contexts filters instead.
281 if (!empty($data->title)) {
282 $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
283 }
501801a2
EM
284 if (!empty($data->areaids)) {
285 // If areaids are specified, we want to get any that match.
286 $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');
95c6aeaf 287 }
427e3cbc
EM
288 if (!empty($data->courseids)) {
289 $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');
290 }
4359ef18 291 if (!empty($data->groupids)) {
292 $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');
293 }
222a97ce 294 if (!empty($data->userids)) {
295 $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');
296 }
95c6aeaf
DM
297
298 if (!empty($data->timestart) or !empty($data->timeend)) {
299 if (empty($data->timestart)) {
300 $data->timestart = '*';
301 } else {
302 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
303 }
304 if (empty($data->timeend)) {
305 $data->timeend = '*';
306 } else {
307 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
308 }
309
310 // No cache.
311 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
312 }
313
f6b425e2
EM
314 // Restrict to users who are supposed to be able to see a particular result.
315 $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
316
95c6aeaf
DM
317 // And finally restrict it to the context where the user can access, we want this one cached.
318 // If the user can access all contexts $usercontexts value is just true, we don't need to filter
319 // in that case.
4359ef18 320 if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {
427e3cbc
EM
321 // Join all area contexts into a single array and implode.
322 $allcontexts = array();
4359ef18 323 foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {
501801a2 324 if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {
427e3cbc
EM
325 // Skip unused areas.
326 continue;
95c6aeaf 327 }
427e3cbc
EM
328 foreach ($areacontexts as $contextid) {
329 // Ensure they are unique.
330 $allcontexts[$contextid] = $contextid;
331 }
332 }
333 if (empty($allcontexts)) {
334 // This means there are no valid contexts for them, so they get no results.
cfa00fc5 335 return null;
95c6aeaf 336 }
427e3cbc 337 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
95c6aeaf
DM
338 }
339
4359ef18 340 if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {
341 // Add another restriction to handle group ids. If there are any contexts using separate
342 // groups, then results in that context will not show unless you belong to the group.
343 // (Note: Access all groups is taken care of earlier, when computing these arrays.)
344
345 // This special exceptions list allows for particularly pig-headed developers to create
346 // multiple search areas within the same module, where one of them uses separate
347 // groups and the other uses visible groups. It is a little inefficient, but this should
348 // be rare.
349 $exceptions = '';
350 if ($accessinfo->visiblegroupscontextsareas) {
351 foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {
352 $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .
353 implode(' OR ', $areaids) . '))';
354 }
355 }
356
357 if ($accessinfo->usergroups) {
358 // Either the document has no groupid, or the groupid is one that the user
359 // belongs to, or the context is not one of the separate groups contexts.
360 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
361 'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .
362 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
363 $exceptions);
364 } else {
365 // Either the document has no groupid, or the context is not a restricted one.
366 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
367 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
368 $exceptions);
369 }
370 }
371
053118a1
EM
372 if ($this->file_indexing_enabled()) {
373 // Now group records by solr_filegroupingid. Limit to 3 results per group.
374 $query->setGroup(true);
375 $query->setGroupLimit(3);
376 $query->setGroupNGroups(true);
377 $query->addGroupField('solr_filegroupingid');
e0867b22
EM
378 } else {
379 // Make sure we only get text files, in case the index has pre-existing files.
380 $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);
95c6aeaf
DM
381 }
382
fc440796 383 // If ordering by location, add in boost for the relevant course or context ids.
384 if (!empty($filters->order) && $filters->order === 'location') {
385 $coursecontext = $filters->context->get_course_context();
386 $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);
387 if ($filters->context->contextlevel !== CONTEXT_COURSE) {
388 // If it's a block or activity, also add a boost for the specific context id.
389 $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);
390 }
391 }
392
053118a1 393 return $query;
95c6aeaf
DM
394 }
395
396 /**
397 * Prepares a new query by setting the query, start offset and rows to return.
3744ceb6 398 *
95c6aeaf 399 * @param SolrQuery $query
cd894f84 400 * @param object $q Containing query and filters.
95c6aeaf 401 */
053118a1 402 protected function set_query($query, $q) {
95c6aeaf
DM
403 // Set hightlighting.
404 $query->setHighlight(true);
405 foreach ($this->highlightfields as $field) {
406 $query->addHighlightField($field);
407 }
408 $query->setHighlightFragsize(static::FRAG_SIZE);
4894840d
EM
409 $query->setHighlightSimplePre(self::HIGHLIGHT_START);
410 $query->setHighlightSimplePost(self::HIGHLIGHT_END);
411 $query->setHighlightMergeContiguous(true);
95c6aeaf
DM
412
413 $query->setQuery($q);
414
415 // A reasonable max.
053118a1 416 $query->setRows(static::QUERY_SIZE);
95c6aeaf
DM
417 }
418
419 /**
420 * Sets fields to be returned in the result.
421 *
3744ceb6 422 * @param SolrDisMaxQuery|SolrQuery $query object.
95c6aeaf
DM
423 */
424 public function add_fields($query) {
425 $documentclass = $this->get_document_classname();
3744ceb6
EM
426 $fields = $documentclass::get_default_fields_definition();
427
428 $dismax = false;
546c0af5 429 if ($query instanceof \SolrDisMaxQuery) {
3744ceb6
EM
430 $dismax = true;
431 }
432
433 foreach ($fields as $key => $field) {
434 $query->addField($key);
435 if ($dismax && !empty($field['mainquery'])) {
436 // Add fields the main query should be run against.
46342114
MS
437 // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740,
438 // a boost value is required, even if it is optional; to avoid boosting one among other fields,
439 // the explicit boost value will be the default one, for every field.
440 $query->addQueryField($key, 1);
3744ceb6 441 }
95c6aeaf
DM
442 }
443 }
444
445 /**
446 * Finds the key common to both highlighing and docs array returned from response.
447 * @param object $response containing results.
448 */
449 public function add_highlight_content($response) {
cd894f84
EM
450 if (!isset($response->highlighting)) {
451 // There is no highlighting to add.
452 return;
453 }
454
95c6aeaf
DM
455 $highlightedobject = $response->highlighting;
456 foreach ($response->response->docs as $doc) {
457 $x = $doc->id;
458 $highlighteddoc = $highlightedobject->$x;
459 $this->merge_highlight_field_values($doc, $highlighteddoc);
460 }
461 }
462
463 /**
464 * Adds the highlighting array values to docs array values.
465 *
466 * @throws \core_search\engine_exception
467 * @param object $doc containing the results.
468 * @param object $highlighteddoc containing the highlighted results values.
469 */
470 public function merge_highlight_field_values($doc, $highlighteddoc) {
471
472 foreach ($this->highlightfields as $field) {
473 if (!empty($doc->$field)) {
474
475 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
476 if (is_array($doc->{$field})) {
477 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
478 }
479
480 if (!empty($highlighteddoc->$field)) {
481 // Replace by the highlighted result.
482 $doc->$field = reset($highlighteddoc->$field);
483 }
484 }
485 }
486 }
487
488 /**
489 * Filters the response on Moodle side.
490 *
053118a1
EM
491 * @param SolrObject $response Solr object containing the response return from solr server.
492 * @param int $limit The maximum number of results to return. 0 for all.
493 * @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.
95c6aeaf
DM
494 * @return array $results containing final results to be displayed.
495 */
053118a1 496 protected function process_response($response, $limit = 0, $skipaccesscheck = false) {
f6b425e2
EM
497 global $USER;
498
053118a1
EM
499 if (empty($response)) {
500 return array();
501 }
502
503 if (isset($response->grouped)) {
504 return $this->grouped_files_process_response($response, $limit);
505 }
506
f6b425e2
EM
507 $userid = $USER->id;
508 $noownerid = \core_search\manager::NO_OWNER_ID;
95c6aeaf 509
95c6aeaf
DM
510 $numgranted = 0;
511
512 if (!$docs = $response->response->docs) {
513 return array();
514 }
515
053118a1 516 $out = array();
95c6aeaf
DM
517 if (!empty($response->response->numFound)) {
518 $this->add_highlight_content($response);
519
520 // Iterate through the results checking its availability and whether they are available for the user or not.
521 foreach ($docs as $key => $docdata) {
f6b425e2
EM
522 if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
523 // If owneruserid is set, no other user should be able to access this record.
f6b425e2
EM
524 continue;
525 }
526
95c6aeaf 527 if (!$searcharea = $this->get_search_area($docdata->areaid)) {
95c6aeaf
DM
528 continue;
529 }
530
531 $docdata = $this->standarize_solr_obj($docdata);
532
053118a1
EM
533 if ($skipaccesscheck) {
534 $access = \core_search\manager::ACCESS_GRANTED;
535 } else {
536 $access = $searcharea->check_access($docdata['itemid']);
537 }
95c6aeaf
DM
538 switch ($access) {
539 case \core_search\manager::ACCESS_DELETED:
540 $this->delete_by_id($docdata['id']);
053118a1
EM
541 // Remove one from our processed and total counters, since we promptly deleted.
542 $this->processeddocs--;
543 $this->totalenginedocs--;
95c6aeaf
DM
544 break;
545 case \core_search\manager::ACCESS_DENIED:
053118a1 546 $this->skippeddocs++;
95c6aeaf
DM
547 break;
548 case \core_search\manager::ACCESS_GRANTED:
549 $numgranted++;
550
551 // Add the doc.
053118a1 552 $out[] = $this->to_document($searcharea, $docdata);
95c6aeaf
DM
553 break;
554 }
555
053118a1
EM
556 // Stop when we hit our limit.
557 if (!empty($limit) && count($out) >= $limit) {
95c6aeaf
DM
558 break;
559 }
560 }
561 }
562
053118a1 563 return $out;
95c6aeaf
DM
564 }
565
cd894f84
EM
566 /**
567 * Processes grouped file results into documents, with attached matching files.
568 *
053118a1
EM
569 * @param SolrObject $response The response returned from solr server
570 * @param int $limit The maximum number of results to return. 0 for all.
cd894f84
EM
571 * @return array Final results to be displayed.
572 */
053118a1 573 protected function grouped_files_process_response($response, $limit = 0) {
cd894f84
EM
574 // If we can't find the grouping, or there are no matches in the grouping, return empty.
575 if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
576 return array();
577 }
578
579 $numgranted = 0;
580 $orderedids = array();
581 $completedocs = array();
582 $incompletedocs = array();
583
584 $highlightingobj = $response->highlighting;
585
586 // Each group represents a "master document".
587 $groups = $response->grouped->solr_filegroupingid->groups;
588 foreach ($groups as $group) {
589 $groupid = $group->groupValue;
590 $groupdocs = $group->doclist->docs;
591 $firstdoc = reset($groupdocs);
592
593 if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
594 // Well, this is a problem.
595 continue;
596 }
597
598 // Check for access.
599 $access = $searcharea->check_access($firstdoc->itemid);
600 switch ($access) {
601 case \core_search\manager::ACCESS_DELETED:
602 // If deleted from Moodle, delete from index and then continue.
603 $this->delete_by_id($firstdoc->id);
053118a1
EM
604 // Remove one from our processed and total counters, since we promptly deleted.
605 $this->processeddocs--;
606 $this->totalenginedocs--;
cd894f84
EM
607 continue 2;
608 break;
609 case \core_search\manager::ACCESS_DENIED:
610 // This means we should just skip for the current user.
053118a1 611 $this->skippeddocs++;
cd894f84
EM
612 continue 2;
613 break;
614 }
615 $numgranted++;
616
617 $maindoc = false;
618 $fileids = array();
619 // Seperate the main document and any files returned.
620 foreach ($groupdocs as $groupdoc) {
621 if ($groupdoc->id == $groupid) {
622 $maindoc = $groupdoc;
623 } else if (isset($groupdoc->solr_fileid)) {
624 $fileids[] = $groupdoc->solr_fileid;
625 }
626 }
627
628 // Store the id of this group, in order, for later merging.
629 $orderedids[] = $groupid;
630
631 if (!$maindoc) {
632 // We don't have the main doc, store what we know for later building.
633 $incompletedocs[$groupid] = $fileids;
634 } else {
635 if (isset($highlightingobj->$groupid)) {
636 // Merge the highlighting for this doc.
637 $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
638 }
639 $docdata = $this->standarize_solr_obj($maindoc);
640 $doc = $this->to_document($searcharea, $docdata);
641 // Now we need to attach the result files to the doc.
642 foreach ($fileids as $fileid) {
643 $doc->add_stored_file($fileid);
644 }
645 $completedocs[$groupid] = $doc;
646 }
647
053118a1 648 if (!empty($limit) && $numgranted >= $limit) {
cd894f84
EM
649 // We have hit the max results, we will just ignore the rest.
650 break;
651 }
652 }
653
654 $incompletedocs = $this->get_missing_docs($incompletedocs);
655
656 $out = array();
657 // Now merge the complete and incomplete documents, in results order.
658 foreach ($orderedids as $docid) {
659 if (isset($completedocs[$docid])) {
660 $out[] = $completedocs[$docid];
661 } else if (isset($incompletedocs[$docid])) {
662 $out[] = $incompletedocs[$docid];
663 }
664 }
665
666 return $out;
667 }
668
669 /**
670 * Retreive any missing main documents and attach provided files.
671 *
672 * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
673 * associated to the key should be an array of stored_files or stored file ids to attach to the result document.
674 *
675 * Return array also indexed by document id.
676 *
677 * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
678 * @return document[]
679 */
680 protected function get_missing_docs($missingdocs) {
681 if (empty($missingdocs)) {
682 return array();
683 }
684
685 $docids = array_keys($missingdocs);
686
687 // Build a custom query that will get all the missing documents.
688 $query = new \SolrQuery();
053118a1 689 $this->set_query($query, '*');
cd894f84 690 $this->add_fields($query);
053118a1 691 $query->setRows(count($docids));
cd894f84
EM
692 $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
693
053118a1
EM
694 $response = $this->get_query_response($query);
695 // We know the missing docs have already been checked for access, so don't recheck.
696 $results = $this->process_response($response, 0, true);
cd894f84
EM
697
698 $out = array();
699 foreach ($results as $result) {
700 $resultid = $result->get('id');
701 if (!isset($missingdocs[$resultid])) {
702 // We got a result we didn't expect. Skip it.
703 continue;
704 }
705 // Attach the files.
706 foreach ($missingdocs[$resultid] as $filedoc) {
707 $result->add_stored_file($filedoc);
708 }
709 $out[$resultid] = $result;
710 }
711
712 return $out;
713 }
714
95c6aeaf
DM
715 /**
716 * Returns a standard php array from a \SolrObject instance.
717 *
718 * @param \SolrObject $obj
719 * @return array The returned document as an array.
720 */
721 public function standarize_solr_obj(\SolrObject $obj) {
722 $properties = $obj->getPropertyNames();
723
724 $docdata = array();
725 foreach($properties as $name) {
726 // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
727 $name = trim($name);
728 $docdata[$name] = $obj->offsetGet($name);
729 }
730 return $docdata;
731 }
732
733 /**
734 * Adds a document to the search engine.
735 *
736 * This does not commit to the search engine.
737 *
091973db
EM
738 * @param document $document
739 * @param bool $fileindexing True if file indexing is to be used
740 * @return bool
95c6aeaf 741 */
091973db
EM
742 public function add_document($document, $fileindexing = false) {
743 $docdata = $document->export_for_engine();
744
cd894f84 745 if (!$this->add_solr_document($docdata)) {
091973db
EM
746 return false;
747 }
748
cd894f84
EM
749 if ($fileindexing) {
750 // This will take care of updating all attached files in the index.
751 $this->process_document_files($document);
752 }
753
091973db
EM
754 return true;
755 }
95c6aeaf 756
0deb1946 757 /**
758 * Adds a batch of documents to the engine at once.
759 *
760 * @param \core_search\document[] $documents Documents to add
761 * @param bool $fileindexing If true, indexes files (these are done one at a time)
762 * @return int[] Array of three elements: successfully processed, failed processed, batch count
763 */
764 public function add_document_batch(array $documents, bool $fileindexing = false): array {
765 $docdatabatch = [];
766 foreach ($documents as $document) {
767 $docdatabatch[] = $document->export_for_engine();
768 }
769
770 $resultcounts = $this->add_solr_documents($docdatabatch);
771
772 // Files are processed one document at a time (if there are files it's slow anyway).
773 if ($fileindexing) {
774 foreach ($documents as $document) {
775 // This will take care of updating all attached files in the index.
776 $this->process_document_files($document);
777 }
778 }
779
780 return $resultcounts;
781 }
782
c2072891 783 /**
784 * Replaces underlines at edges of words in the content with spaces.
785 *
786 * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
787 * and 'frogs_and_toads' will be left as 'frogs_and_toads'.
788 *
789 * The reason for this is that for italic content_to_text puts _italic_ underlines at the start
790 * and end of the italicised phrase (not between words). Solr treats underlines as part of the
791 * word, which means that if you search for a word in italic then you can't find it.
792 *
793 * @param string $str String to replace
794 * @return string Replaced string
795 */
796 protected static function replace_underlines(string $str): string {
797 return preg_replace('~\b_|_\b~', '', $str);
798 }
799
091973db 800 /**
0deb1946 801 * Creates a Solr document object.
091973db 802 *
0deb1946 803 * @param array $doc Array of document fields
804 * @return \SolrInputDocument Created document
091973db 805 */
0deb1946 806 protected function create_solr_document(array $doc): \SolrInputDocument {
95c6aeaf 807 $solrdoc = new \SolrInputDocument();
c2072891 808
809 // Replace underlines in the content with spaces. The reason for this is that for italic
810 // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
811 // word, which means that if you search for a word in italic then you can't find it.
812 if (array_key_exists('content', $doc)) {
813 $doc['content'] = self::replace_underlines($doc['content']);
814 }
815
0deb1946 816 // Set all the fields.
95c6aeaf
DM
817 foreach ($doc as $field => $value) {
818 $solrdoc->addField($field, $value);
819 }
820
0deb1946 821 return $solrdoc;
822 }
823
824 /**
825 * Adds a text document to the search engine.
826 *
827 * @param array $doc
828 * @return bool
829 */
830 protected function add_solr_document($doc) {
831 $solrdoc = $this->create_solr_document($doc);
832
95c6aeaf
DM
833 try {
834 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
091973db 835 return true;
95c6aeaf
DM
836 } catch (\SolrClientException $e) {
837 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
63d5007b
EM
838 } catch (\SolrServerException $e) {
839 // We only use the first line of the message, as it's a fully java stacktrace behind it.
840 $msg = strtok($e->getMessage(), "\n");
841 debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
95c6aeaf 842 }
091973db
EM
843
844 return false;
95c6aeaf
DM
845 }
846
0deb1946 847 /**
848 * Adds multiple text documents to the search engine.
849 *
850 * @param array $docs Array of documents (each an array of fields) to add
851 * @return int[] Array of success, failure, batch count
852 * @throws \core_search\engine_exception
853 */
854 protected function add_solr_documents(array $docs): array {
855 $solrdocs = [];
856 foreach ($docs as $doc) {
857 $solrdocs[] = $this->create_solr_document($doc);
858 }
859
860 try {
861 // Add documents in a batch and report that they all succeeded.
862 $this->get_search_client()->addDocuments($solrdocs, true, static::AUTOCOMMIT_WITHIN);
863 return [count($solrdocs), 0, 1];
864 } catch (\SolrClientException $e) {
865 // If there is an exception, fall through...
866 $donothing = true;
867 } catch (\SolrServerException $e) {
868 // If there is an exception, fall through...
869 $donothing = true;
870 }
871
872 // When there is an error, we fall back to adding them individually so that we can report
873 // which document(s) failed. Since it overwrites, adding the successful ones multiple
874 // times won't hurt.
875 $success = 0;
876 $failure = 0;
877 $batches = 0;
878 foreach ($docs as $doc) {
879 $result = $this->add_solr_document($doc);
880 $batches++;
881 if ($result) {
882 $success++;
883 } else {
884 $failure++;
885 }
886 }
887
888 return [$success, $failure, $batches];
889 }
890
cd894f84
EM
891 /**
892 * Index files attached to the docuemnt, ensuring the index matches the current document files.
893 *
894 * For documents that aren't known to be new, we check the index for existing files.
895 * - New files we will add.
896 * - Existing and unchanged files we will skip.
897 * - File that are in the index but not on the document will be deleted from the index.
898 * - Files that have changed will be re-indexed.
899 *
900 * @param document $document
901 */
902 protected function process_document_files($document) {
903 if (!$this->file_indexing_enabled()) {
904 return;
905 }
906
907 // Maximum rows to process at a time.
908 $rows = 500;
909
910 // Get the attached files.
911 $files = $document->get_files();
912
913 // If this isn't a new document, we need to check the exiting indexed files.
914 if (!$document->get_is_new()) {
915 // We do this progressively, so we can handle lots of files cleanly.
916 list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
917 $count = 0;
918 $idstodelete = array();
919
920 do {
921 // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
922 foreach ($indexedfiles as $indexedfile) {
923 $fileid = $indexedfile->solr_fileid;
924
925 if (isset($files[$fileid])) {
926 // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
927 // Filelib does not guarantee time modified is updated, so we will check important values.
1aaead91 928 if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
cd894f84
EM
929 continue;
930 }
931 if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
932 continue;
933 }
934 if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
935 continue;
936 }
f6b4ec7b 937 if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
cd894f84
EM
938 $this->file_is_indexable($files[$fileid])) {
939 // This means that the last time we indexed this file, filtering blocked it.
940 // Current settings say it is indexable, so we will allow it to be indexed.
941 continue;
942 }
943
944 // If the file is already indexed, we can just remove it from the files array and skip it.
945 unset($files[$fileid]);
946 } else {
947 // This means we have found a file that is no longer attached, so we need to delete from the index.
948 // We do it later, since this is progressive, and it could reorder results.
949 $idstodelete[] = $indexedfile->id;
950 }
951 }
952 $count += $rows;
953
954 if ($count < $numfound) {
955 // If we haven't hit the total count yet, fetch the next batch.
956 list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
957 }
958
959 } while ($count < $numfound);
960
961 // Delete files that are no longer attached.
962 foreach ($idstodelete as $id) {
963 // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
964 $this->get_search_client()->deleteById($id);
965 }
966 }
967
968 // Now we can actually index all the remaining files.
969 foreach ($files as $file) {
970 $this->add_stored_file($document, $file);
971 }
972 }
973
974 /**
975 * Get the currently indexed files for a particular document, returns the total count, and a subset of files.
976 *
977 * @param document $document
978 * @param int $start The row to start the results on. Zero indexed.
979 * @param int $rows The number of rows to fetch
980 * @return array A two element array, the first is the total number of availble results, the second is an array
981 * of documents for the current request.
982 */
983 protected function get_indexed_files($document, $start = 0, $rows = 500) {
984 // Build a custom query that will get any document files that are in our solr_filegroupingid.
985 $query = new \SolrQuery();
986
987 // We want to get all file records tied to a document.
988 // For efficiency, we are building our own, stripped down, query.
989 $query->setQuery('*');
990 $query->setRows($rows);
991 $query->setStart($start);
992 // We want a consistent sorting.
993 $query->addSortField('id');
994
995 // We only want the bare minimum of fields.
996 $query->addField('id');
997 $query->addField('modified');
998 $query->addField('title');
999 $query->addField('solr_fileid');
1000 $query->addField('solr_filecontenthash');
f6b4ec7b 1001 $query->addField('solr_fileindexstatus');
cd894f84
EM
1002
1003 $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
1004 $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
1005
053118a1
EM
1006 $response = $this->get_query_response($query);
1007 if (empty($response->response->numFound)) {
cd894f84
EM
1008 return array(0, array());
1009 }
053118a1
EM
1010
1011 return array($response->response->numFound, $this->convert_file_results($response));
cd894f84
EM
1012 }
1013
1014 /**
1015 * A very lightweight handler for getting information about already indexed files from a Solr response.
1016 *
1017 * @param SolrObject $responsedoc A Solr response document
1018 * @return stdClass[] An array of objects that contain the basic information for file processing.
1019 */
1020 protected function convert_file_results($responsedoc) {
1021 if (!$docs = $responsedoc->response->docs) {
1022 return array();
1023 }
1024
1025 $out = array();
1026
1027 foreach ($docs as $doc) {
1028 // Copy the bare minimim needed info.
1029 $result = new \stdClass();
1030 $result->id = $doc->id;
1031 $result->modified = document::import_time_from_engine($doc->modified);
1032 $result->title = $doc->title;
1033 $result->solr_fileid = $doc->solr_fileid;
1034 $result->solr_filecontenthash = $doc->solr_filecontenthash;
f6b4ec7b 1035 $result->solr_fileindexstatus = $doc->solr_fileindexstatus;
cd894f84
EM
1036 $out[] = $result;
1037 }
1038
1039 return $out;
1040 }
1041
1042 /**
1043 * Adds a file to the search engine.
1044 *
1045 * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
1046 * Tika has much better content type detection than Moodle, and we will have many more doc failures
1047 * if we try to send mime types.
1048 *
1049 * @param document $document
1050 * @param \stored_file $storedfile
1051 * @return void
1052 */
1053 protected function add_stored_file($document, $storedfile) {
1054 $filedoc = $document->export_file_for_engine($storedfile);
1055
1056 if (!$this->file_is_indexable($storedfile)) {
1057 // For files that we don't consider indexable, we will still place a reference in the search engine.
f6b4ec7b 1058 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
cd894f84
EM
1059 $this->add_solr_document($filedoc);
1060 return;
1061 }
1062
1063 $curl = $this->get_curl_object();
1064
1065 $url = $this->get_connection_url('/update/extract');
1066
895e0059
DM
1067 // Return results as XML.
1068 $url->param('wt', 'xml');
1069
cd894f84
EM
1070 // This will prevent solr from automatically making fields for every tika output.
1071 $url->param('uprefix', 'ignored_');
1072
546c0af5
EM
1073 // Control how content is captured. This will keep our file content clean of non-important metadata.
1074 $url->param('captureAttr', 'true');
1075 // Move the content to a field for indexing.
1076 $url->param('fmap.content', 'solr_filecontent');
1077
cd894f84
EM
1078 // These are common fields that matches the standard *_point dynamic field and causes an error.
1079 $url->param('fmap.media_white_point', 'ignored_mwp');
1080 $url->param('fmap.media_black_point', 'ignored_mbp');
1081
1082 // Copy each key to the url with literal.
1083 // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
1084 foreach ($filedoc as $key => $value) {
1085 // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
1086 $url->param('fmap.'.$key, 'ignored_'.$key);
1087 // Place data in a tmp field.
1088 $url->param('literal.mdltmp_'.$key, $value);
1089 // Then move to the final field.
1090 $url->param('fmap.mdltmp_'.$key, $key);
1091 }
1092
1093 // This sets the true filename for Tika.
1094 $url->param('resource.name', $storedfile->get_filename());
1095
1096 // A giant block of code that is really just error checking around the curl request.
1097 try {
1098 // Now actually do the request.
1099 $result = $curl->post($url->out(false), array('myfile' => $storedfile));
1100
1101 $code = $curl->get_errno();
1102 $info = $curl->get_info();
1103
1104 // Now error handling. It is just informational, since we aren't tracking per file/doc results.
1105 if ($code != 0) {
1106 // This means an internal cURL error occurred error is in result.
1107 $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
1108 debugging($message, DEBUG_DEVELOPER);
1109 } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
1110 // Unexpected HTTP response code.
1111 $message = 'Error while indexing file with document id '.$filedoc['id'];
1112 // Try to get error message out of msg or title if it exists.
1113 if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
1114 $message .= ': '.$matches[1];
1115 } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
1116 $message .= ': '.$matches[1];
1117 }
1118 // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
1119 if (CLI_SCRIPT && !PHPUNIT_TEST) {
1120 mtrace($message);
1121 }
1122 } else {
1123 // Check for the expected status field.
1124 if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
1125 // Now check for the expected status of 0, if not, error.
1126 if ((int)$matches[1] !== 0) {
1127 $message = 'Unexpected Solr status code '.(int)$matches[1];
1128 $message .= ' while indexing file with document id '.$filedoc['id'].'.';
1129 debugging($message, DEBUG_DEVELOPER);
1130 } else {
1131 // The document was successfully indexed.
1132 return;
1133 }
1134 } else {
1135 // We received an unprocessable response.
1136 $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
1137 $message .= strtok($result, "\n");
1138 debugging($message, DEBUG_DEVELOPER);
1139 }
1140 }
1141 } catch (\Exception $e) {
1142 // There was an error, but we are not tracking per-file success, so we just continue on.
1143 debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
1144 }
1145
1146 // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
f6b4ec7b 1147 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
cd894f84
EM
1148 $this->add_solr_document($filedoc);
1149 }
1150
1151 /**
1152 * Checks to see if a passed file is indexable.
1153 *
1154 * @param \stored_file $file The file to check
1155 * @return bool True if the file can be indexed
1156 */
1157 protected function file_is_indexable($file) {
1158 if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
1159 // The file is too big to index.
1160 return false;
1161 }
1162
1163 $mime = $file->get_mimetype();
1164
1165 if ($mime == 'application/vnd.moodle.backup') {
1166 // We don't index Moodle backup files. There is nothing usefully indexable in them.
1167 return false;
1168 }
1169
1170 return true;
1171 }
1172
95c6aeaf
DM
1173 /**
1174 * Commits all pending changes.
1175 *
1176 * @return void
1177 */
075fa912 1178 protected function commit() {
95c6aeaf
DM
1179 $this->get_search_client()->commit();
1180 }
1181
075fa912
EM
1182 /**
1183 * Do any area cleanup needed, and do anything to confirm contents.
1184 *
1185 * Return false to prevent the search area completed time and stats from being updated.
1186 *
0bd8383a 1187 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
1188 * @param int $numdocs The number of documents that were added to the index
1189 * @param bool $fullindex True if a full index is being performed
1190 * @return bool True means that data is considered indexed
1191 */
1192 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
1193 $this->commit();
1194
1195 return true;
1196 }
1197
cd894f84
EM
1198 /**
1199 * Return true if file indexing is supported and enabled. False otherwise.
1200 *
1201 * @return bool
1202 */
1203 public function file_indexing_enabled() {
1204 return (bool)$this->config->fileindexing;
1205 }
1206
95c6aeaf
DM
1207 /**
1208 * Deletes the specified document.
1209 *
1210 * @param string $id The document id to delete
1211 * @return void
1212 */
1213 public function delete_by_id($id) {
cd894f84
EM
1214 // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
1215 $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
075fa912 1216 $this->commit();
95c6aeaf
DM
1217 }
1218
1219 /**
1220 * Delete all area's documents.
1221 *
1222 * @param string $areaid
1223 * @return void
1224 */
1225 public function delete($areaid = null) {
1226 if ($areaid) {
1227 $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
1228 } else {
1229 $this->get_search_client()->deleteByQuery('*:*');
1230 }
075fa912 1231 $this->commit();
95c6aeaf
DM
1232 }
1233
1234 /**
1235 * Pings the Solr server using search_solr config
1236 *
1237 * @return true|string Returns true if all good or an error string.
1238 */
1239 public function is_server_ready() {
1240
23fc1be8
DM
1241 $configured = $this->is_server_configured();
1242 if ($configured !== true) {
1243 return $configured;
1244 }
1245
73fd5666 1246 // As part of the above we have already checked that we can contact the server. For pages
1247 // where performance is important, we skip doing a full schema check as well.
1248 if ($this->should_skip_schema_check()) {
1249 return true;
1250 }
1251
4359ef18 1252 // Update schema if required/possible.
1253 $schemalatest = $this->check_latest_schema();
1254 if ($schemalatest !== true) {
1255 return $schemalatest;
1256 }
1257
23fc1be8
DM
1258 // Check that the schema is already set up.
1259 try {
679e8d8b 1260 $schema = new schema($this);
23fc1be8
DM
1261 $schema->validate_setup();
1262 } catch (\moodle_exception $e) {
1263 return $e->getMessage();
1264 }
1265
1266 return true;
1267 }
1268
1269 /**
1270 * Is the solr server properly configured?.
1271 *
1272 * @return true|string Returns true if all good or an error string.
1273 */
1274 public function is_server_configured() {
1275
95c6aeaf
DM
1276 if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
1277 return 'No solr configuration found';
1278 }
1279
7a4a0bc8 1280 if (!$client = $this->get_search_client(false)) {
95c6aeaf
DM
1281 return get_string('engineserverstatus', 'search');
1282 }
1283
1284 try {
23fc1be8
DM
1285 if ($this->get_solr_major_version() < 4) {
1286 // Minimum solr 4.0.
1287 return get_string('minimumsolr4', 'search_solr');
1288 }
95c6aeaf 1289 } catch (\SolrClientException $ex) {
d0b4772c
DM
1290 debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1291 return get_string('engineserverstatus', 'search');
95c6aeaf 1292 } catch (\SolrServerException $ex) {
d0b4772c
DM
1293 debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1294 return get_string('engineserverstatus', 'search');
95c6aeaf
DM
1295 }
1296
95c6aeaf
DM
1297 return true;
1298 }
1299
23fc1be8
DM
1300 /**
1301 * Returns the solr server major version.
1302 *
1303 * @return int
1304 */
1305 public function get_solr_major_version() {
895e0059
DM
1306 if ($this->solrmajorversion !== null) {
1307 return $this->solrmajorversion;
1308 }
1309
d0b4772c
DM
1310 // We should really ping first the server to see if the specified indexname is valid but
1311 // we want to minimise solr server requests as they are expensive. system() emits a warning
1312 // if it can not connect to the configured index in the configured server.
1313 $systemdata = @$this->get_search_client()->system();
23fc1be8 1314 $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');
895e0059
DM
1315 $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));
1316
1317 return $this->solrmajorversion;
23fc1be8
DM
1318 }
1319
95c6aeaf
DM
1320 /**
1321 * Checks if the PHP Solr extension is available.
1322 *
1323 * @return bool
1324 */
1325 public function is_installed() {
1326 return function_exists('solr_get_version');
1327 }
1328
1329 /**
1330 * Returns the solr client instance.
1331 *
7a4a0bc8
EM
1332 * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.
1333 *
95c6aeaf
DM
1334 * @throws \core_search\engine_exception
1335 * @param bool $triggerexception
1336 * @return \SolrClient
1337 */
1338 protected function get_search_client($triggerexception = true) {
558b6306 1339 global $CFG;
95c6aeaf
DM
1340
1341 // Type comparison as it is set to false if not available.
1342 if ($this->client !== null) {
1343 return $this->client;
1344 }
1345
1346 $options = array(
1347 'hostname' => $this->config->server_hostname,
1348 'path' => '/solr/' . $this->config->indexname,
1349 'login' => !empty($this->config->server_username) ? $this->config->server_username : '',
1350 'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
1351 'port' => !empty($this->config->server_port) ? $this->config->server_port : '',
5dc4624c 1352 'secure' => !empty($this->config->secure) ? true : false,
95c6aeaf 1353 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
95c6aeaf 1354 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
5dc4624c 1355 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
95c6aeaf
DM
1356 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
1357 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
bfd6c78f 1358 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
95c6aeaf
DM
1359 );
1360
558b6306 1361 if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {
1362 $options['proxy_host'] = $CFG->proxyhost;
13719f2e
MS
1363 if (!empty($CFG->proxyport)) {
1364 $options['proxy_port'] = $CFG->proxyport;
1365 }
1366 if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {
1367 $options['proxy_login'] = $CFG->proxyuser;
1368 $options['proxy_password'] = $CFG->proxypassword;
1369 }
558b6306 1370 }
1371
e240a613
DM
1372 if (!class_exists('\SolrClient')) {
1373 throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');
1374 }
1375
7a4a0bc8 1376 $client = new \SolrClient($options);
95c6aeaf 1377
7a4a0bc8 1378 if ($client === false && $triggerexception) {
95c6aeaf
DM
1379 throw new \core_search\engine_exception('engineserverstatus', 'search');
1380 }
1381
7a4a0bc8
EM
1382 if ($this->cacheclient) {
1383 $this->client = $client;
1384 }
1385
1386 return $client;
95c6aeaf 1387 }
5dc4624c
EM
1388
1389 /**
1390 * Returns a curl object for conntecting to solr.
1391 *
1392 * @return \curl
1393 */
1394 public function get_curl_object() {
1395 if (!is_null($this->curl)) {
1396 return $this->curl;
1397 }
1398
402a8e7a
MG
1399 // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.
1400 $this->curl = new \curl(['ignoresecurity' => true]);
5dc4624c
EM
1401
1402 $options = array();
1403 // Build the SSL options. Based on pecl-solr and general testing.
1404 if (!empty($this->config->secure)) {
1405 if (!empty($this->config->ssl_cert)) {
1406 $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
1407 $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
1408 }
1409
1410 if (!empty($this->config->ssl_key)) {
1411 $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
1412 $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
1413 }
1414
1415 if (!empty($this->config->ssl_keypassword)) {
1416 $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
1417 }
1418
1419 if (!empty($this->config->ssl_cainfo)) {
1420 $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
1421 }
1422
1423 if (!empty($this->config->ssl_capath)) {
1424 $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
1425 }
1426 }
1427
411b5f3a 1428 // Set timeout as for Solr client.
1429 $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';
1430
5dc4624c
EM
1431 $this->curl->setopt($options);
1432
1433 if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
1434 $authorization = $this->config->server_username . ':' . $this->config->server_password;
c7203847 1435 $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));
5dc4624c
EM
1436 }
1437
1438 return $this->curl;
1439 }
1440
1441 /**
1442 * Return a Moodle url object for the server connection.
1443 *
1444 * @param string $path The solr path to append.
1445 * @return \moodle_url
1446 */
1447 public function get_connection_url($path) {
1448 // Must use the proper protocol, or SSL will fail.
1449 $protocol = !empty($this->config->secure) ? 'https' : 'http';
1450 $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
1451 if (!empty($this->config->server_port)) {
1452 $url .= ':' . $this->config->server_port;
1453 }
1454 $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
1455
1456 return new \moodle_url($url);
1457 }
4359ef18 1458
1459 /**
1460 * Solr includes group support in the execute_query function.
1461 *
1462 * @return bool True
1463 */
1464 public function supports_group_filtering() {
1465 return true;
1466 }
1467
1468 protected function update_schema($oldversion, $newversion) {
1469 // Construct schema.
679e8d8b 1470 $schema = new schema($this);
4359ef18 1471 $cansetup = $schema->can_setup_server();
1472 if ($cansetup !== true) {
1473 return $cansetup;
1474 }
1475
1476 switch ($newversion) {
1477 // This version just requires a setup call to add new fields.
1478 case 2017091700:
1479 $setup = true;
1480 break;
1481
1482 // If we don't know about the schema version we might not have implemented the
1483 // change correctly, so return.
1484 default:
1485 return get_string('schemaversionunknown', 'search');
1486 }
1487
1488 if ($setup) {
1489 $schema->setup();
1490 }
1491
1492 return true;
1493 }
fc440796 1494
1495 /**
1496 * Solr supports sort by location within course contexts or below.
1497 *
1498 * @param \context $context Context that the user requested search from
1499 * @return array Array from order name => display text
1500 */
1501 public function get_supported_orders(\context $context) {
1502 $orders = parent::get_supported_orders($context);
1503
1504 // If not within a course, no other kind of sorting supported.
1505 $coursecontext = $context->get_course_context(false);
1506 if ($coursecontext) {
1507 // Within a course or activity/block, support sort by location.
1508 $orders['location'] = get_string('order_location', 'search',
1509 $context->get_context_name());
1510 }
1511
1512 return $orders;
1513 }
222a97ce 1514
1515 /**
1516 * Solr supports search by user id.
1517 *
1518 * @return bool True
1519 */
1520 public function supports_users() {
1521 return true;
1522 }
7ba2a201 1523
0deb1946 1524 /**
1525 * Solr supports adding documents in a batch.
1526 *
1527 * @return bool True
1528 */
1529 public function supports_add_document_batch(): bool {
1530 return true;
1531 }
1532
7ba2a201 1533 /**
1534 * Solr supports deleting the index for a context.
1535 *
1536 * @param int $oldcontextid Context that has been deleted
1537 * @return bool True to indicate that any data was actually deleted
1538 * @throws \core_search\engine_exception
1539 */
1540 public function delete_index_for_context(int $oldcontextid) {
1541 $client = $this->get_search_client();
1542 try {
1543 $client->deleteByQuery('contextid:' . $oldcontextid);
1544 $client->commit(true);
1545 return true;
1546 } catch (\Exception $e) {
1547 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1548 }
1549 }
1550
1551 /**
1552 * Solr supports deleting the index for a course.
1553 *
1554 * @param int $oldcourseid
1555 * @return bool True to indicate that any data was actually deleted
1556 * @throws \core_search\engine_exception
1557 */
1558 public function delete_index_for_course(int $oldcourseid) {
1559 $client = $this->get_search_client();
1560 try {
1561 $client->deleteByQuery('courseid:' . $oldcourseid);
1562 $client->commit(true);
1563 return true;
1564 } catch (\Exception $e) {
1565 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1566 }
1567 }
679e8d8b 1568
1569 /**
1570 * Checks if an alternate configuration has been defined.
1571 *
1572 * @return bool True if alternate configuration is available
1573 */
1574 public function has_alternate_configuration(): bool {
1575 return !empty($this->config->alternateserver_hostname) &&
1576 !empty($this->config->alternateindexname) &&
1577 !empty($this->config->alternateserver_port);
1578 }
95c6aeaf 1579}