weekly release 4.0dev
[moodle.git] / search / engine / solr / classes / engine.php
CommitLineData
95c6aeaf
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Solr engine.
19 *
20 * @package search_solr
21 * @copyright 2015 Daniel Neis Araujo
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace search_solr;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Solr engine.
31 *
32 * @package search_solr
33 * @copyright 2015 Daniel Neis Araujo
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class engine extends \core_search\engine {
37
38 /**
39 * @var string The date format used by solr.
40 */
41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
42
43 /**
44 * @var int Commit documents interval (number of miliseconds).
45 */
46 const AUTOCOMMIT_WITHIN = 15000;
47
053118a1
EM
48 /**
49 * The maximum number of results to fetch at a time.
50 */
51 const QUERY_SIZE = 120;
52
95c6aeaf 53 /**
4894840d 54 * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending.
95c6aeaf 55 */
4894840d
EM
56 const FRAG_SIZE = 510;
57
58 /**
59 * Marker for the start of a highlight.
60 */
61 const HIGHLIGHT_START = '@@HI_S@@';
62
63 /**
64 * Marker for the end of a highlight.
65 */
66 const HIGHLIGHT_END = '@@HI_E@@';
95c6aeaf 67
fc440796 68 /** @var float Boost value for matching course in location-ordered searches */
69 const COURSE_BOOST = 1;
70
71 /** @var float Boost value for matching context (in addition to course boost) */
72 const CONTEXT_BOOST = 0.5;
73
95c6aeaf
DM
74 /**
75 * @var \SolrClient
76 */
77 protected $client = null;
78
7a4a0bc8
EM
79 /**
80 * @var bool True if we should reuse SolrClients, false if not.
81 */
82 protected $cacheclient = true;
83
5dc4624c
EM
84 /**
85 * @var \curl Direct curl object.
86 */
87 protected $curl = null;
88
95c6aeaf
DM
89 /**
90 * @var array Fields that can be highlighted.
91 */
4894840d 92 protected $highlightfields = array('title', 'content', 'description1', 'description2');
95c6aeaf 93
053118a1
EM
94 /**
95 * @var int Number of total docs reported by Sorl for the last query.
96 */
97 protected $totalenginedocs = 0;
98
99 /**
100 * @var int Number of docs we have processed for the last query.
101 */
102 protected $processeddocs = 0;
103
104 /**
105 * @var int Number of docs that have been skipped while processing the last query.
106 */
107 protected $skippeddocs = 0;
108
895e0059
DM
109 /**
110 * Solr server major version.
111 *
112 * @var int
113 */
114 protected $solrmajorversion = null;
115
7a4a0bc8
EM
116 /**
117 * Initialises the search engine configuration.
118 *
119 * @return void
120 */
121 public function __construct() {
122 parent::__construct();
123
124 $curlversion = curl_version();
125 if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) {
126 // There is a flaw with curl 7.35.0 that causes problems with client reuse.
127 $this->cacheclient = false;
128 }
129 }
130
95c6aeaf
DM
131 /**
132 * Prepares a Solr query, applies filters and executes it returning its results.
133 *
134 * @throws \core_search\engine_exception
4359ef18 135 * @param \stdClass $filters Containing query and filters.
136 * @param \stdClass $accessinfo Information about areas user can access.
053118a1 137 * @param int $limit The maximum number of results to return.
95c6aeaf
DM
138 * @return \core_search\document[] Results or false if no results
139 */
4359ef18 140 public function execute_query($filters, $accessinfo, $limit = 0) {
f6b425e2 141 global $USER;
95c6aeaf 142
053118a1
EM
143 if (empty($limit)) {
144 $limit = \core_search\manager::MAX_RESULTS;
145 }
95c6aeaf
DM
146
147 // If there is any problem we trigger the exception as soon as possible.
7a4a0bc8 148 $client = $this->get_search_client();
95c6aeaf 149
053118a1 150 // Create the query object.
4359ef18 151 $query = $this->create_user_query($filters, $accessinfo);
053118a1 152
cfa00fc5 153 // If the query cannot have results, return none.
154 if (!$query) {
155 return [];
156 }
157
053118a1
EM
158 // We expect good match rates, so for our first get, we will get a small number of records.
159 // This significantly speeds solr response time for first few pages.
160 $query->setRows(min($limit * 3, static::QUERY_SIZE));
161 $response = $this->get_query_response($query);
162
163 // Get count data out of the response, and reset our counters.
164 list($included, $found) = $this->get_response_counts($response);
165 $this->totalenginedocs = $found;
166 $this->processeddocs = 0;
167 $this->skippeddocs = 0;
168 if ($included == 0 || $this->totalenginedocs == 0) {
169 // No results.
170 return array();
171 }
172
173 // Get valid documents out of the response.
174 $results = $this->process_response($response, $limit);
175
176 // We have processed all the docs in the response at this point.
177 $this->processeddocs += $included;
178
179 // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying.
180 while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) {
181 // Offset the start of the query, and since we are making another call, get more per call.
182 $query->setStart($this->processeddocs);
183 $query->setRows(static::QUERY_SIZE);
184
185 $response = $this->get_query_response($query);
186 list($included, $found) = $this->get_response_counts($response);
187 if ($included == 0 || $found == 0) {
188 // No new results were found. Found being empty would be weird, so we will just return.
189 return $results;
190 }
191 $this->totalenginedocs = $found;
192
193 // Get the new response docs, limiting to remaining we need, then add it to the end of the results array.
194 $newdocs = $this->process_response($response, $limit - count($results));
195 $results = array_merge($results, $newdocs);
196
197 // Add to our processed docs count.
198 $this->processeddocs += $included;
199 }
200
201 return $results;
202 }
203
204 /**
205 * Takes a query and returns the response in SolrObject format.
206 *
207 * @param SolrQuery $query Solr query object.
208 * @return SolrObject|false Response document or false on error.
209 */
210 protected function get_query_response($query) {
211 try {
212 return $this->get_search_client()->query($query)->getResponse();
213 } catch (\SolrClientException $ex) {
214 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
215 $this->queryerror = $ex->getMessage();
216 return false;
217 } catch (\SolrServerException $ex) {
218 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
219 $this->queryerror = $ex->getMessage();
220 return false;
cd894f84 221 }
053118a1
EM
222 }
223
224 /**
225 * Returns the total number of documents available for the most recently call to execute_query.
226 *
227 * @return int
228 */
229 public function get_query_total_count() {
230 // Return the total engine count minus the docs we have determined are bad.
231 return $this->totalenginedocs - $this->skippeddocs;
232 }
233
234 /**
235 * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses.
236 *
237 * @param SolrDocument $response The response document from Solr.
238 * @return array A two part array. First how many response docs are in the response.
239 * Second, how many results are vailable in the engine.
240 */
241 protected function get_response_counts($response) {
242 $found = 0;
243 $included = 0;
244
245 if (isset($response->grouped->solr_filegroupingid->ngroups)) {
246 // Get the number of results for file grouped queries.
247 $found = $response->grouped->solr_filegroupingid->ngroups;
248 $included = count($response->grouped->solr_filegroupingid->groups);
249 } else if (isset($response->response->numFound)) {
250 // Get the number of results for standard queries.
251 $found = $response->response->numFound;
93b22672
DM
252 if ($found > 0 && is_array($response->response->docs)) {
253 $included = count($response->response->docs);
254 }
cd894f84 255 }
053118a1
EM
256
257 return array($included, $found);
258 }
259
260 /**
261 * Prepares a new query object with needed limits, filters, etc.
262 *
4359ef18 263 * @param \stdClass $filters Containing query and filters.
264 * @param \stdClass $accessinfo Information about contexts the user can access
cfa00fc5 265 * @return \SolrDisMaxQuery|null Query object or null if they can't get any results
053118a1 266 */
4359ef18 267 protected function create_user_query($filters, $accessinfo) {
053118a1
EM
268 global $USER;
269
270 // Let's keep these changes internal.
271 $data = clone $filters;
272
273 $query = new \SolrDisMaxQuery();
274
c2072891 275 $this->set_query($query, self::replace_underlines($data->q));
95c6aeaf
DM
276 $this->add_fields($query);
277
278 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
279 // we are really interested in caching contexts filters instead.
280 if (!empty($data->title)) {
281 $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
282 }
501801a2
EM
283 if (!empty($data->areaids)) {
284 // If areaids are specified, we want to get any that match.
285 $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')');
95c6aeaf 286 }
427e3cbc
EM
287 if (!empty($data->courseids)) {
288 $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')');
289 }
4359ef18 290 if (!empty($data->groupids)) {
291 $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')');
292 }
222a97ce 293 if (!empty($data->userids)) {
294 $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')');
295 }
95c6aeaf
DM
296
297 if (!empty($data->timestart) or !empty($data->timeend)) {
298 if (empty($data->timestart)) {
299 $data->timestart = '*';
300 } else {
301 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
302 }
303 if (empty($data->timeend)) {
304 $data->timeend = '*';
305 } else {
306 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
307 }
308
309 // No cache.
310 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
311 }
312
f6b425e2
EM
313 // Restrict to users who are supposed to be able to see a particular result.
314 $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
315
95c6aeaf
DM
316 // And finally restrict it to the context where the user can access, we want this one cached.
317 // If the user can access all contexts $usercontexts value is just true, we don't need to filter
318 // in that case.
4359ef18 319 if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) {
427e3cbc
EM
320 // Join all area contexts into a single array and implode.
321 $allcontexts = array();
4359ef18 322 foreach ($accessinfo->usercontexts as $areaid => $areacontexts) {
501801a2 323 if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) {
427e3cbc
EM
324 // Skip unused areas.
325 continue;
95c6aeaf 326 }
427e3cbc
EM
327 foreach ($areacontexts as $contextid) {
328 // Ensure they are unique.
329 $allcontexts[$contextid] = $contextid;
330 }
331 }
332 if (empty($allcontexts)) {
333 // This means there are no valid contexts for them, so they get no results.
cfa00fc5 334 return null;
95c6aeaf 335 }
427e3cbc 336 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
95c6aeaf
DM
337 }
338
4359ef18 339 if (!$accessinfo->everything && $accessinfo->separategroupscontexts) {
340 // Add another restriction to handle group ids. If there are any contexts using separate
341 // groups, then results in that context will not show unless you belong to the group.
342 // (Note: Access all groups is taken care of earlier, when computing these arrays.)
343
344 // This special exceptions list allows for particularly pig-headed developers to create
345 // multiple search areas within the same module, where one of them uses separate
346 // groups and the other uses visible groups. It is a little inefficient, but this should
347 // be rare.
348 $exceptions = '';
349 if ($accessinfo->visiblegroupscontextsareas) {
350 foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) {
351 $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' .
352 implode(' OR ', $areaids) . '))';
353 }
354 }
355
356 if ($accessinfo->usergroups) {
357 // Either the document has no groupid, or the groupid is one that the user
358 // belongs to, or the context is not one of the separate groups contexts.
359 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
360 'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' .
361 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
362 $exceptions);
363 } else {
364 // Either the document has no groupid, or the context is not a restricted one.
365 $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' .
366 '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' .
367 $exceptions);
368 }
369 }
370
053118a1
EM
371 if ($this->file_indexing_enabled()) {
372 // Now group records by solr_filegroupingid. Limit to 3 results per group.
373 $query->setGroup(true);
374 $query->setGroupLimit(3);
375 $query->setGroupNGroups(true);
376 $query->addGroupField('solr_filegroupingid');
e0867b22
EM
377 } else {
378 // Make sure we only get text files, in case the index has pre-existing files.
379 $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT);
95c6aeaf
DM
380 }
381
fc440796 382 // If ordering by location, add in boost for the relevant course or context ids.
383 if (!empty($filters->order) && $filters->order === 'location') {
384 $coursecontext = $filters->context->get_course_context();
385 $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST);
386 if ($filters->context->contextlevel !== CONTEXT_COURSE) {
387 // If it's a block or activity, also add a boost for the specific context id.
388 $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST);
389 }
390 }
391
053118a1 392 return $query;
95c6aeaf
DM
393 }
394
395 /**
396 * Prepares a new query by setting the query, start offset and rows to return.
3744ceb6 397 *
95c6aeaf 398 * @param SolrQuery $query
cd894f84 399 * @param object $q Containing query and filters.
95c6aeaf 400 */
053118a1 401 protected function set_query($query, $q) {
95c6aeaf
DM
402 // Set hightlighting.
403 $query->setHighlight(true);
404 foreach ($this->highlightfields as $field) {
405 $query->addHighlightField($field);
406 }
407 $query->setHighlightFragsize(static::FRAG_SIZE);
4894840d
EM
408 $query->setHighlightSimplePre(self::HIGHLIGHT_START);
409 $query->setHighlightSimplePost(self::HIGHLIGHT_END);
410 $query->setHighlightMergeContiguous(true);
95c6aeaf
DM
411
412 $query->setQuery($q);
413
414 // A reasonable max.
053118a1 415 $query->setRows(static::QUERY_SIZE);
95c6aeaf
DM
416 }
417
418 /**
419 * Sets fields to be returned in the result.
420 *
3744ceb6 421 * @param SolrDisMaxQuery|SolrQuery $query object.
95c6aeaf
DM
422 */
423 public function add_fields($query) {
424 $documentclass = $this->get_document_classname();
3744ceb6
EM
425 $fields = $documentclass::get_default_fields_definition();
426
427 $dismax = false;
546c0af5 428 if ($query instanceof \SolrDisMaxQuery) {
3744ceb6
EM
429 $dismax = true;
430 }
431
432 foreach ($fields as $key => $field) {
433 $query->addField($key);
434 if ($dismax && !empty($field['mainquery'])) {
435 // Add fields the main query should be run against.
46342114
MS
436 // Due to a regression in the PECL solr extension, https://bugs.php.net/bug.php?id=72740,
437 // a boost value is required, even if it is optional; to avoid boosting one among other fields,
438 // the explicit boost value will be the default one, for every field.
439 $query->addQueryField($key, 1);
3744ceb6 440 }
95c6aeaf
DM
441 }
442 }
443
444 /**
445 * Finds the key common to both highlighing and docs array returned from response.
446 * @param object $response containing results.
447 */
448 public function add_highlight_content($response) {
cd894f84
EM
449 if (!isset($response->highlighting)) {
450 // There is no highlighting to add.
451 return;
452 }
453
95c6aeaf
DM
454 $highlightedobject = $response->highlighting;
455 foreach ($response->response->docs as $doc) {
456 $x = $doc->id;
457 $highlighteddoc = $highlightedobject->$x;
458 $this->merge_highlight_field_values($doc, $highlighteddoc);
459 }
460 }
461
462 /**
463 * Adds the highlighting array values to docs array values.
464 *
465 * @throws \core_search\engine_exception
466 * @param object $doc containing the results.
467 * @param object $highlighteddoc containing the highlighted results values.
468 */
469 public function merge_highlight_field_values($doc, $highlighteddoc) {
470
471 foreach ($this->highlightfields as $field) {
472 if (!empty($doc->$field)) {
473
474 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
475 if (is_array($doc->{$field})) {
476 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
477 }
478
479 if (!empty($highlighteddoc->$field)) {
480 // Replace by the highlighted result.
481 $doc->$field = reset($highlighteddoc->$field);
482 }
483 }
484 }
485 }
486
487 /**
488 * Filters the response on Moodle side.
489 *
053118a1
EM
490 * @param SolrObject $response Solr object containing the response return from solr server.
491 * @param int $limit The maximum number of results to return. 0 for all.
492 * @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access.
95c6aeaf
DM
493 * @return array $results containing final results to be displayed.
494 */
053118a1 495 protected function process_response($response, $limit = 0, $skipaccesscheck = false) {
f6b425e2
EM
496 global $USER;
497
053118a1
EM
498 if (empty($response)) {
499 return array();
500 }
501
502 if (isset($response->grouped)) {
503 return $this->grouped_files_process_response($response, $limit);
504 }
505
f6b425e2
EM
506 $userid = $USER->id;
507 $noownerid = \core_search\manager::NO_OWNER_ID;
95c6aeaf 508
95c6aeaf
DM
509 $numgranted = 0;
510
511 if (!$docs = $response->response->docs) {
512 return array();
513 }
514
053118a1 515 $out = array();
95c6aeaf
DM
516 if (!empty($response->response->numFound)) {
517 $this->add_highlight_content($response);
518
519 // Iterate through the results checking its availability and whether they are available for the user or not.
520 foreach ($docs as $key => $docdata) {
f6b425e2
EM
521 if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
522 // If owneruserid is set, no other user should be able to access this record.
f6b425e2
EM
523 continue;
524 }
525
95c6aeaf 526 if (!$searcharea = $this->get_search_area($docdata->areaid)) {
95c6aeaf
DM
527 continue;
528 }
529
530 $docdata = $this->standarize_solr_obj($docdata);
531
053118a1
EM
532 if ($skipaccesscheck) {
533 $access = \core_search\manager::ACCESS_GRANTED;
534 } else {
535 $access = $searcharea->check_access($docdata['itemid']);
536 }
95c6aeaf
DM
537 switch ($access) {
538 case \core_search\manager::ACCESS_DELETED:
539 $this->delete_by_id($docdata['id']);
053118a1
EM
540 // Remove one from our processed and total counters, since we promptly deleted.
541 $this->processeddocs--;
542 $this->totalenginedocs--;
95c6aeaf
DM
543 break;
544 case \core_search\manager::ACCESS_DENIED:
053118a1 545 $this->skippeddocs++;
95c6aeaf
DM
546 break;
547 case \core_search\manager::ACCESS_GRANTED:
548 $numgranted++;
549
550 // Add the doc.
053118a1 551 $out[] = $this->to_document($searcharea, $docdata);
95c6aeaf
DM
552 break;
553 }
554
053118a1
EM
555 // Stop when we hit our limit.
556 if (!empty($limit) && count($out) >= $limit) {
95c6aeaf
DM
557 break;
558 }
559 }
560 }
561
053118a1 562 return $out;
95c6aeaf
DM
563 }
564
cd894f84
EM
565 /**
566 * Processes grouped file results into documents, with attached matching files.
567 *
053118a1
EM
568 * @param SolrObject $response The response returned from solr server
569 * @param int $limit The maximum number of results to return. 0 for all.
cd894f84
EM
570 * @return array Final results to be displayed.
571 */
053118a1 572 protected function grouped_files_process_response($response, $limit = 0) {
cd894f84
EM
573 // If we can't find the grouping, or there are no matches in the grouping, return empty.
574 if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) {
575 return array();
576 }
577
578 $numgranted = 0;
579 $orderedids = array();
580 $completedocs = array();
581 $incompletedocs = array();
582
583 $highlightingobj = $response->highlighting;
584
585 // Each group represents a "master document".
586 $groups = $response->grouped->solr_filegroupingid->groups;
587 foreach ($groups as $group) {
588 $groupid = $group->groupValue;
589 $groupdocs = $group->doclist->docs;
590 $firstdoc = reset($groupdocs);
591
592 if (!$searcharea = $this->get_search_area($firstdoc->areaid)) {
593 // Well, this is a problem.
594 continue;
595 }
596
597 // Check for access.
598 $access = $searcharea->check_access($firstdoc->itemid);
599 switch ($access) {
600 case \core_search\manager::ACCESS_DELETED:
601 // If deleted from Moodle, delete from index and then continue.
602 $this->delete_by_id($firstdoc->id);
053118a1
EM
603 // Remove one from our processed and total counters, since we promptly deleted.
604 $this->processeddocs--;
605 $this->totalenginedocs--;
cd894f84
EM
606 continue 2;
607 break;
608 case \core_search\manager::ACCESS_DENIED:
609 // This means we should just skip for the current user.
053118a1 610 $this->skippeddocs++;
cd894f84
EM
611 continue 2;
612 break;
613 }
614 $numgranted++;
615
616 $maindoc = false;
617 $fileids = array();
618 // Seperate the main document and any files returned.
619 foreach ($groupdocs as $groupdoc) {
620 if ($groupdoc->id == $groupid) {
621 $maindoc = $groupdoc;
622 } else if (isset($groupdoc->solr_fileid)) {
623 $fileids[] = $groupdoc->solr_fileid;
624 }
625 }
626
627 // Store the id of this group, in order, for later merging.
628 $orderedids[] = $groupid;
629
630 if (!$maindoc) {
631 // We don't have the main doc, store what we know for later building.
632 $incompletedocs[$groupid] = $fileids;
633 } else {
634 if (isset($highlightingobj->$groupid)) {
635 // Merge the highlighting for this doc.
636 $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid);
637 }
638 $docdata = $this->standarize_solr_obj($maindoc);
639 $doc = $this->to_document($searcharea, $docdata);
640 // Now we need to attach the result files to the doc.
641 foreach ($fileids as $fileid) {
642 $doc->add_stored_file($fileid);
643 }
644 $completedocs[$groupid] = $doc;
645 }
646
053118a1 647 if (!empty($limit) && $numgranted >= $limit) {
cd894f84
EM
648 // We have hit the max results, we will just ignore the rest.
649 break;
650 }
651 }
652
653 $incompletedocs = $this->get_missing_docs($incompletedocs);
654
655 $out = array();
656 // Now merge the complete and incomplete documents, in results order.
657 foreach ($orderedids as $docid) {
658 if (isset($completedocs[$docid])) {
659 $out[] = $completedocs[$docid];
660 } else if (isset($incompletedocs[$docid])) {
661 $out[] = $incompletedocs[$docid];
662 }
663 }
664
665 return $out;
666 }
667
668 /**
669 * Retreive any missing main documents and attach provided files.
670 *
671 * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value
672 * associated to the key should be an array of stored_files or stored file ids to attach to the result document.
673 *
674 * Return array also indexed by document id.
675 *
676 * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach.
677 * @return document[]
678 */
679 protected function get_missing_docs($missingdocs) {
680 if (empty($missingdocs)) {
681 return array();
682 }
683
684 $docids = array_keys($missingdocs);
685
686 // Build a custom query that will get all the missing documents.
687 $query = new \SolrQuery();
053118a1 688 $this->set_query($query, '*');
cd894f84 689 $this->add_fields($query);
053118a1 690 $query->setRows(count($docids));
cd894f84
EM
691 $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')');
692
053118a1
EM
693 $response = $this->get_query_response($query);
694 // We know the missing docs have already been checked for access, so don't recheck.
695 $results = $this->process_response($response, 0, true);
cd894f84
EM
696
697 $out = array();
698 foreach ($results as $result) {
699 $resultid = $result->get('id');
700 if (!isset($missingdocs[$resultid])) {
701 // We got a result we didn't expect. Skip it.
702 continue;
703 }
704 // Attach the files.
705 foreach ($missingdocs[$resultid] as $filedoc) {
706 $result->add_stored_file($filedoc);
707 }
708 $out[$resultid] = $result;
709 }
710
711 return $out;
712 }
713
95c6aeaf
DM
714 /**
715 * Returns a standard php array from a \SolrObject instance.
716 *
717 * @param \SolrObject $obj
718 * @return array The returned document as an array.
719 */
720 public function standarize_solr_obj(\SolrObject $obj) {
721 $properties = $obj->getPropertyNames();
722
723 $docdata = array();
724 foreach($properties as $name) {
725 // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
726 $name = trim($name);
727 $docdata[$name] = $obj->offsetGet($name);
728 }
729 return $docdata;
730 }
731
732 /**
733 * Adds a document to the search engine.
734 *
735 * This does not commit to the search engine.
736 *
091973db
EM
737 * @param document $document
738 * @param bool $fileindexing True if file indexing is to be used
739 * @return bool
95c6aeaf 740 */
091973db
EM
741 public function add_document($document, $fileindexing = false) {
742 $docdata = $document->export_for_engine();
743
cd894f84 744 if (!$this->add_solr_document($docdata)) {
091973db
EM
745 return false;
746 }
747
cd894f84
EM
748 if ($fileindexing) {
749 // This will take care of updating all attached files in the index.
750 $this->process_document_files($document);
751 }
752
091973db
EM
753 return true;
754 }
95c6aeaf 755
c2072891 756 /**
757 * Replaces underlines at edges of words in the content with spaces.
758 *
759 * For example '_frogs_' will become 'frogs', '_frogs and toads_' will become 'frogs and toads',
760 * and 'frogs_and_toads' will be left as 'frogs_and_toads'.
761 *
762 * The reason for this is that for italic content_to_text puts _italic_ underlines at the start
763 * and end of the italicised phrase (not between words). Solr treats underlines as part of the
764 * word, which means that if you search for a word in italic then you can't find it.
765 *
766 * @param string $str String to replace
767 * @return string Replaced string
768 */
769 protected static function replace_underlines(string $str): string {
770 return preg_replace('~\b_|_\b~', '', $str);
771 }
772
091973db
EM
773 /**
774 * Adds a text document to the search engine.
775 *
cd894f84 776 * @param array $doc
091973db
EM
777 * @return bool
778 */
cd894f84 779 protected function add_solr_document($doc) {
95c6aeaf 780 $solrdoc = new \SolrInputDocument();
c2072891 781
782 // Replace underlines in the content with spaces. The reason for this is that for italic
783 // text, content_to_text puts _italic_ underlines. Solr treats underlines as part of the
784 // word, which means that if you search for a word in italic then you can't find it.
785 if (array_key_exists('content', $doc)) {
786 $doc['content'] = self::replace_underlines($doc['content']);
787 }
788
95c6aeaf
DM
789 foreach ($doc as $field => $value) {
790 $solrdoc->addField($field, $value);
791 }
792
793 try {
794 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
091973db 795 return true;
95c6aeaf
DM
796 } catch (\SolrClientException $e) {
797 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
63d5007b
EM
798 } catch (\SolrServerException $e) {
799 // We only use the first line of the message, as it's a fully java stacktrace behind it.
800 $msg = strtok($e->getMessage(), "\n");
801 debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
95c6aeaf 802 }
091973db
EM
803
804 return false;
95c6aeaf
DM
805 }
806
cd894f84
EM
807 /**
808 * Index files attached to the docuemnt, ensuring the index matches the current document files.
809 *
810 * For documents that aren't known to be new, we check the index for existing files.
811 * - New files we will add.
812 * - Existing and unchanged files we will skip.
813 * - File that are in the index but not on the document will be deleted from the index.
814 * - Files that have changed will be re-indexed.
815 *
816 * @param document $document
817 */
818 protected function process_document_files($document) {
819 if (!$this->file_indexing_enabled()) {
820 return;
821 }
822
823 // Maximum rows to process at a time.
824 $rows = 500;
825
826 // Get the attached files.
827 $files = $document->get_files();
828
829 // If this isn't a new document, we need to check the exiting indexed files.
830 if (!$document->get_is_new()) {
831 // We do this progressively, so we can handle lots of files cleanly.
832 list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows);
833 $count = 0;
834 $idstodelete = array();
835
836 do {
837 // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones.
838 foreach ($indexedfiles as $indexedfile) {
839 $fileid = $indexedfile->solr_fileid;
840
841 if (isset($files[$fileid])) {
842 // Check for changes that would mean we need to re-index the file. If so, just leave in $files.
843 // Filelib does not guarantee time modified is updated, so we will check important values.
1aaead91 844 if ($indexedfile->modified != $files[$fileid]->get_timemodified()) {
cd894f84
EM
845 continue;
846 }
847 if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) {
848 continue;
849 }
850 if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) {
851 continue;
852 }
f6b4ec7b 853 if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE &&
cd894f84
EM
854 $this->file_is_indexable($files[$fileid])) {
855 // This means that the last time we indexed this file, filtering blocked it.
856 // Current settings say it is indexable, so we will allow it to be indexed.
857 continue;
858 }
859
860 // If the file is already indexed, we can just remove it from the files array and skip it.
861 unset($files[$fileid]);
862 } else {
863 // This means we have found a file that is no longer attached, so we need to delete from the index.
864 // We do it later, since this is progressive, and it could reorder results.
865 $idstodelete[] = $indexedfile->id;
866 }
867 }
868 $count += $rows;
869
870 if ($count < $numfound) {
871 // If we haven't hit the total count yet, fetch the next batch.
872 list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows);
873 }
874
875 } while ($count < $numfound);
876
877 // Delete files that are no longer attached.
878 foreach ($idstodelete as $id) {
879 // We directly delete the item using the client, as the engine delete_by_id won't work on file docs.
880 $this->get_search_client()->deleteById($id);
881 }
882 }
883
884 // Now we can actually index all the remaining files.
885 foreach ($files as $file) {
886 $this->add_stored_file($document, $file);
887 }
888 }
889
890 /**
891 * Get the currently indexed files for a particular document, returns the total count, and a subset of files.
892 *
893 * @param document $document
894 * @param int $start The row to start the results on. Zero indexed.
895 * @param int $rows The number of rows to fetch
896 * @return array A two element array, the first is the total number of availble results, the second is an array
897 * of documents for the current request.
898 */
899 protected function get_indexed_files($document, $start = 0, $rows = 500) {
900 // Build a custom query that will get any document files that are in our solr_filegroupingid.
901 $query = new \SolrQuery();
902
903 // We want to get all file records tied to a document.
904 // For efficiency, we are building our own, stripped down, query.
905 $query->setQuery('*');
906 $query->setRows($rows);
907 $query->setStart($start);
908 // We want a consistent sorting.
909 $query->addSortField('id');
910
911 // We only want the bare minimum of fields.
912 $query->addField('id');
913 $query->addField('modified');
914 $query->addField('title');
915 $query->addField('solr_fileid');
916 $query->addField('solr_filecontenthash');
f6b4ec7b 917 $query->addField('solr_fileindexstatus');
cd894f84
EM
918
919 $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')');
920 $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE);
921
053118a1
EM
922 $response = $this->get_query_response($query);
923 if (empty($response->response->numFound)) {
cd894f84
EM
924 return array(0, array());
925 }
053118a1
EM
926
927 return array($response->response->numFound, $this->convert_file_results($response));
cd894f84
EM
928 }
929
930 /**
931 * A very lightweight handler for getting information about already indexed files from a Solr response.
932 *
933 * @param SolrObject $responsedoc A Solr response document
934 * @return stdClass[] An array of objects that contain the basic information for file processing.
935 */
936 protected function convert_file_results($responsedoc) {
937 if (!$docs = $responsedoc->response->docs) {
938 return array();
939 }
940
941 $out = array();
942
943 foreach ($docs as $doc) {
944 // Copy the bare minimim needed info.
945 $result = new \stdClass();
946 $result->id = $doc->id;
947 $result->modified = document::import_time_from_engine($doc->modified);
948 $result->title = $doc->title;
949 $result->solr_fileid = $doc->solr_fileid;
950 $result->solr_filecontenthash = $doc->solr_filecontenthash;
f6b4ec7b 951 $result->solr_fileindexstatus = $doc->solr_fileindexstatus;
cd894f84
EM
952 $out[] = $result;
953 }
954
955 return $out;
956 }
957
958 /**
959 * Adds a file to the search engine.
960 *
961 * Notes about Solr and Tika indexing. We do not send the mime type, only the filename.
962 * Tika has much better content type detection than Moodle, and we will have many more doc failures
963 * if we try to send mime types.
964 *
965 * @param document $document
966 * @param \stored_file $storedfile
967 * @return void
968 */
969 protected function add_stored_file($document, $storedfile) {
970 $filedoc = $document->export_file_for_engine($storedfile);
971
972 if (!$this->file_is_indexable($storedfile)) {
973 // For files that we don't consider indexable, we will still place a reference in the search engine.
f6b4ec7b 974 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE;
cd894f84
EM
975 $this->add_solr_document($filedoc);
976 return;
977 }
978
979 $curl = $this->get_curl_object();
980
981 $url = $this->get_connection_url('/update/extract');
982
895e0059
DM
983 // Return results as XML.
984 $url->param('wt', 'xml');
985
cd894f84
EM
986 // This will prevent solr from automatically making fields for every tika output.
987 $url->param('uprefix', 'ignored_');
988
546c0af5
EM
989 // Control how content is captured. This will keep our file content clean of non-important metadata.
990 $url->param('captureAttr', 'true');
991 // Move the content to a field for indexing.
992 $url->param('fmap.content', 'solr_filecontent');
993
cd894f84
EM
994 // These are common fields that matches the standard *_point dynamic field and causes an error.
995 $url->param('fmap.media_white_point', 'ignored_mwp');
996 $url->param('fmap.media_black_point', 'ignored_mbp');
997
998 // Copy each key to the url with literal.
999 // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names.
1000 foreach ($filedoc as $key => $value) {
1001 // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours.
1002 $url->param('fmap.'.$key, 'ignored_'.$key);
1003 // Place data in a tmp field.
1004 $url->param('literal.mdltmp_'.$key, $value);
1005 // Then move to the final field.
1006 $url->param('fmap.mdltmp_'.$key, $key);
1007 }
1008
1009 // This sets the true filename for Tika.
1010 $url->param('resource.name', $storedfile->get_filename());
1011
1012 // A giant block of code that is really just error checking around the curl request.
1013 try {
1014 // Now actually do the request.
1015 $result = $curl->post($url->out(false), array('myfile' => $storedfile));
1016
1017 $code = $curl->get_errno();
1018 $info = $curl->get_info();
1019
1020 // Now error handling. It is just informational, since we aren't tracking per file/doc results.
1021 if ($code != 0) {
1022 // This means an internal cURL error occurred error is in result.
1023 $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.';
1024 debugging($message, DEBUG_DEVELOPER);
1025 } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) {
1026 // Unexpected HTTP response code.
1027 $message = 'Error while indexing file with document id '.$filedoc['id'];
1028 // Try to get error message out of msg or title if it exists.
1029 if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) {
1030 $message .= ': '.$matches[1];
1031 } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) {
1032 $message .= ': '.$matches[1];
1033 }
1034 // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter.
1035 if (CLI_SCRIPT && !PHPUNIT_TEST) {
1036 mtrace($message);
1037 }
1038 } else {
1039 // Check for the expected status field.
1040 if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) {
1041 // Now check for the expected status of 0, if not, error.
1042 if ((int)$matches[1] !== 0) {
1043 $message = 'Unexpected Solr status code '.(int)$matches[1];
1044 $message .= ' while indexing file with document id '.$filedoc['id'].'.';
1045 debugging($message, DEBUG_DEVELOPER);
1046 } else {
1047 // The document was successfully indexed.
1048 return;
1049 }
1050 } else {
1051 // We received an unprocessable response.
1052 $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': ';
1053 $message .= strtok($result, "\n");
1054 debugging($message, DEBUG_DEVELOPER);
1055 }
1056 }
1057 } catch (\Exception $e) {
1058 // There was an error, but we are not tracking per-file success, so we just continue on.
1059 debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER);
1060 }
1061
1062 // If we get here, the document was not indexed due to an error. So we will index just the base info without the file.
f6b4ec7b 1063 $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR;
cd894f84
EM
1064 $this->add_solr_document($filedoc);
1065 }
1066
1067 /**
1068 * Checks to see if a passed file is indexable.
1069 *
1070 * @param \stored_file $file The file to check
1071 * @return bool True if the file can be indexed
1072 */
1073 protected function file_is_indexable($file) {
1074 if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) {
1075 // The file is too big to index.
1076 return false;
1077 }
1078
1079 $mime = $file->get_mimetype();
1080
1081 if ($mime == 'application/vnd.moodle.backup') {
1082 // We don't index Moodle backup files. There is nothing usefully indexable in them.
1083 return false;
1084 }
1085
1086 return true;
1087 }
1088
95c6aeaf
DM
1089 /**
1090 * Commits all pending changes.
1091 *
1092 * @return void
1093 */
075fa912 1094 protected function commit() {
95c6aeaf
DM
1095 $this->get_search_client()->commit();
1096 }
1097
075fa912
EM
1098 /**
1099 * Do any area cleanup needed, and do anything to confirm contents.
1100 *
1101 * Return false to prevent the search area completed time and stats from being updated.
1102 *
0bd8383a 1103 * @param \core_search\base $searcharea The search area that was complete
075fa912
EM
1104 * @param int $numdocs The number of documents that were added to the index
1105 * @param bool $fullindex True if a full index is being performed
1106 * @return bool True means that data is considered indexed
1107 */
1108 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
1109 $this->commit();
1110
1111 return true;
1112 }
1113
cd894f84
EM
1114 /**
1115 * Return true if file indexing is supported and enabled. False otherwise.
1116 *
1117 * @return bool
1118 */
1119 public function file_indexing_enabled() {
1120 return (bool)$this->config->fileindexing;
1121 }
1122
95c6aeaf
DM
1123 /**
1124 * Defragments the index.
1125 *
1126 * @return void
1127 */
1128 public function optimize() {
bfd6c78f 1129 $this->get_search_client()->optimize(1, true, false);
95c6aeaf
DM
1130 }
1131
1132 /**
1133 * Deletes the specified document.
1134 *
1135 * @param string $id The document id to delete
1136 * @return void
1137 */
1138 public function delete_by_id($id) {
cd894f84
EM
1139 // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid.
1140 $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id);
075fa912 1141 $this->commit();
95c6aeaf
DM
1142 }
1143
1144 /**
1145 * Delete all area's documents.
1146 *
1147 * @param string $areaid
1148 * @return void
1149 */
1150 public function delete($areaid = null) {
1151 if ($areaid) {
1152 $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
1153 } else {
1154 $this->get_search_client()->deleteByQuery('*:*');
1155 }
075fa912 1156 $this->commit();
95c6aeaf
DM
1157 }
1158
1159 /**
1160 * Pings the Solr server using search_solr config
1161 *
1162 * @return true|string Returns true if all good or an error string.
1163 */
1164 public function is_server_ready() {
1165
23fc1be8
DM
1166 $configured = $this->is_server_configured();
1167 if ($configured !== true) {
1168 return $configured;
1169 }
1170
73fd5666 1171 // As part of the above we have already checked that we can contact the server. For pages
1172 // where performance is important, we skip doing a full schema check as well.
1173 if ($this->should_skip_schema_check()) {
1174 return true;
1175 }
1176
4359ef18 1177 // Update schema if required/possible.
1178 $schemalatest = $this->check_latest_schema();
1179 if ($schemalatest !== true) {
1180 return $schemalatest;
1181 }
1182
23fc1be8
DM
1183 // Check that the schema is already set up.
1184 try {
1185 $schema = new \search_solr\schema();
1186 $schema->validate_setup();
1187 } catch (\moodle_exception $e) {
1188 return $e->getMessage();
1189 }
1190
1191 return true;
1192 }
1193
1194 /**
1195 * Is the solr server properly configured?.
1196 *
1197 * @return true|string Returns true if all good or an error string.
1198 */
1199 public function is_server_configured() {
1200
95c6aeaf
DM
1201 if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
1202 return 'No solr configuration found';
1203 }
1204
7a4a0bc8 1205 if (!$client = $this->get_search_client(false)) {
95c6aeaf
DM
1206 return get_string('engineserverstatus', 'search');
1207 }
1208
1209 try {
23fc1be8
DM
1210 if ($this->get_solr_major_version() < 4) {
1211 // Minimum solr 4.0.
1212 return get_string('minimumsolr4', 'search_solr');
1213 }
95c6aeaf 1214 } catch (\SolrClientException $ex) {
d0b4772c
DM
1215 debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1216 return get_string('engineserverstatus', 'search');
95c6aeaf 1217 } catch (\SolrServerException $ex) {
d0b4772c
DM
1218 debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER);
1219 return get_string('engineserverstatus', 'search');
95c6aeaf
DM
1220 }
1221
95c6aeaf
DM
1222 return true;
1223 }
1224
23fc1be8
DM
1225 /**
1226 * Returns the solr server major version.
1227 *
1228 * @return int
1229 */
1230 public function get_solr_major_version() {
895e0059
DM
1231 if ($this->solrmajorversion !== null) {
1232 return $this->solrmajorversion;
1233 }
1234
d0b4772c
DM
1235 // We should really ping first the server to see if the specified indexname is valid but
1236 // we want to minimise solr server requests as they are expensive. system() emits a warning
1237 // if it can not connect to the configured index in the configured server.
1238 $systemdata = @$this->get_search_client()->system();
23fc1be8 1239 $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version');
895e0059
DM
1240 $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.')));
1241
1242 return $this->solrmajorversion;
23fc1be8
DM
1243 }
1244
95c6aeaf
DM
1245 /**
1246 * Checks if the PHP Solr extension is available.
1247 *
1248 * @return bool
1249 */
1250 public function is_installed() {
1251 return function_exists('solr_get_version');
1252 }
1253
1254 /**
1255 * Returns the solr client instance.
1256 *
7a4a0bc8
EM
1257 * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl.
1258 *
95c6aeaf
DM
1259 * @throws \core_search\engine_exception
1260 * @param bool $triggerexception
1261 * @return \SolrClient
1262 */
1263 protected function get_search_client($triggerexception = true) {
558b6306 1264 global $CFG;
95c6aeaf
DM
1265
1266 // Type comparison as it is set to false if not available.
1267 if ($this->client !== null) {
1268 return $this->client;
1269 }
1270
1271 $options = array(
1272 'hostname' => $this->config->server_hostname,
1273 'path' => '/solr/' . $this->config->indexname,
1274 'login' => !empty($this->config->server_username) ? $this->config->server_username : '',
1275 'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
1276 'port' => !empty($this->config->server_port) ? $this->config->server_port : '',
5dc4624c 1277 'secure' => !empty($this->config->secure) ? true : false,
95c6aeaf 1278 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
95c6aeaf 1279 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
5dc4624c 1280 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
95c6aeaf
DM
1281 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
1282 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
bfd6c78f 1283 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
95c6aeaf
DM
1284 );
1285
558b6306 1286 if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) {
1287 $options['proxy_host'] = $CFG->proxyhost;
13719f2e
MS
1288 if (!empty($CFG->proxyport)) {
1289 $options['proxy_port'] = $CFG->proxyport;
1290 }
1291 if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) {
1292 $options['proxy_login'] = $CFG->proxyuser;
1293 $options['proxy_password'] = $CFG->proxypassword;
1294 }
558b6306 1295 }
1296
e240a613
DM
1297 if (!class_exists('\SolrClient')) {
1298 throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr');
1299 }
1300
7a4a0bc8 1301 $client = new \SolrClient($options);
95c6aeaf 1302
7a4a0bc8 1303 if ($client === false && $triggerexception) {
95c6aeaf
DM
1304 throw new \core_search\engine_exception('engineserverstatus', 'search');
1305 }
1306
7a4a0bc8
EM
1307 if ($this->cacheclient) {
1308 $this->client = $client;
1309 }
1310
1311 return $client;
95c6aeaf 1312 }
5dc4624c
EM
1313
1314 /**
1315 * Returns a curl object for conntecting to solr.
1316 *
1317 * @return \curl
1318 */
1319 public function get_curl_object() {
1320 if (!is_null($this->curl)) {
1321 return $this->curl;
1322 }
1323
402a8e7a
MG
1324 // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports.
1325 $this->curl = new \curl(['ignoresecurity' => true]);
5dc4624c
EM
1326
1327 $options = array();
1328 // Build the SSL options. Based on pecl-solr and general testing.
1329 if (!empty($this->config->secure)) {
1330 if (!empty($this->config->ssl_cert)) {
1331 $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
1332 $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
1333 }
1334
1335 if (!empty($this->config->ssl_key)) {
1336 $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
1337 $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
1338 }
1339
1340 if (!empty($this->config->ssl_keypassword)) {
1341 $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
1342 }
1343
1344 if (!empty($this->config->ssl_cainfo)) {
1345 $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
1346 }
1347
1348 if (!empty($this->config->ssl_capath)) {
1349 $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
1350 }
1351 }
1352
411b5f3a 1353 // Set timeout as for Solr client.
1354 $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30';
1355
5dc4624c
EM
1356 $this->curl->setopt($options);
1357
1358 if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
1359 $authorization = $this->config->server_username . ':' . $this->config->server_password;
c7203847 1360 $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization));
5dc4624c
EM
1361 }
1362
1363 return $this->curl;
1364 }
1365
1366 /**
1367 * Return a Moodle url object for the server connection.
1368 *
1369 * @param string $path The solr path to append.
1370 * @return \moodle_url
1371 */
1372 public function get_connection_url($path) {
1373 // Must use the proper protocol, or SSL will fail.
1374 $protocol = !empty($this->config->secure) ? 'https' : 'http';
1375 $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
1376 if (!empty($this->config->server_port)) {
1377 $url .= ':' . $this->config->server_port;
1378 }
1379 $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
1380
1381 return new \moodle_url($url);
1382 }
4359ef18 1383
1384 /**
1385 * Solr includes group support in the execute_query function.
1386 *
1387 * @return bool True
1388 */
1389 public function supports_group_filtering() {
1390 return true;
1391 }
1392
1393 protected function update_schema($oldversion, $newversion) {
1394 // Construct schema.
1395 $schema = new schema();
1396 $cansetup = $schema->can_setup_server();
1397 if ($cansetup !== true) {
1398 return $cansetup;
1399 }
1400
1401 switch ($newversion) {
1402 // This version just requires a setup call to add new fields.
1403 case 2017091700:
1404 $setup = true;
1405 break;
1406
1407 // If we don't know about the schema version we might not have implemented the
1408 // change correctly, so return.
1409 default:
1410 return get_string('schemaversionunknown', 'search');
1411 }
1412
1413 if ($setup) {
1414 $schema->setup();
1415 }
1416
1417 return true;
1418 }
fc440796 1419
1420 /**
1421 * Solr supports sort by location within course contexts or below.
1422 *
1423 * @param \context $context Context that the user requested search from
1424 * @return array Array from order name => display text
1425 */
1426 public function get_supported_orders(\context $context) {
1427 $orders = parent::get_supported_orders($context);
1428
1429 // If not within a course, no other kind of sorting supported.
1430 $coursecontext = $context->get_course_context(false);
1431 if ($coursecontext) {
1432 // Within a course or activity/block, support sort by location.
1433 $orders['location'] = get_string('order_location', 'search',
1434 $context->get_context_name());
1435 }
1436
1437 return $orders;
1438 }
222a97ce 1439
1440 /**
1441 * Solr supports search by user id.
1442 *
1443 * @return bool True
1444 */
1445 public function supports_users() {
1446 return true;
1447 }
7ba2a201 1448
1449 /**
1450 * Solr supports deleting the index for a context.
1451 *
1452 * @param int $oldcontextid Context that has been deleted
1453 * @return bool True to indicate that any data was actually deleted
1454 * @throws \core_search\engine_exception
1455 */
1456 public function delete_index_for_context(int $oldcontextid) {
1457 $client = $this->get_search_client();
1458 try {
1459 $client->deleteByQuery('contextid:' . $oldcontextid);
1460 $client->commit(true);
1461 return true;
1462 } catch (\Exception $e) {
1463 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1464 }
1465 }
1466
1467 /**
1468 * Solr supports deleting the index for a course.
1469 *
1470 * @param int $oldcourseid
1471 * @return bool True to indicate that any data was actually deleted
1472 * @throws \core_search\engine_exception
1473 */
1474 public function delete_index_for_course(int $oldcourseid) {
1475 $client = $this->get_search_client();
1476 try {
1477 $client->deleteByQuery('courseid:' . $oldcourseid);
1478 $client->commit(true);
1479 return true;
1480 } catch (\Exception $e) {
1481 throw new \core_search\engine_exception('error_solr', 'search_solr', '', $e->getMessage());
1482 }
1483 }
95c6aeaf 1484}