Commit | Line | Data |
---|---|---|
95c6aeaf DM |
1 | <?php |
2 | // This file is part of Moodle - http://moodle.org/ | |
3 | // | |
4 | // Moodle is free software: you can redistribute it and/or modify | |
5 | // it under the terms of the GNU General Public License as published by | |
6 | // the Free Software Foundation, either version 3 of the License, or | |
7 | // (at your option) any later version. | |
8 | // | |
9 | // Moodle is distributed in the hope that it will be useful, | |
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | // GNU General Public License for more details. | |
13 | // | |
14 | // You should have received a copy of the GNU General Public License | |
15 | // along with Moodle. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | /** | |
18 | * Solr engine. | |
19 | * | |
20 | * @package search_solr | |
21 | * @copyright 2015 Daniel Neis Araujo | |
22 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
23 | */ | |
24 | ||
25 | namespace search_solr; | |
26 | ||
27 | defined('MOODLE_INTERNAL') || die(); | |
28 | ||
29 | /** | |
30 | * Solr engine. | |
31 | * | |
32 | * @package search_solr | |
33 | * @copyright 2015 Daniel Neis Araujo | |
34 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
35 | */ | |
36 | class engine extends \core_search\engine { | |
37 | ||
38 | /** | |
39 | * @var string The date format used by solr. | |
40 | */ | |
41 | const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; | |
42 | ||
43 | /** | |
44 | * @var int Commit documents interval (number of miliseconds). | |
45 | */ | |
46 | const AUTOCOMMIT_WITHIN = 15000; | |
47 | ||
053118a1 EM |
48 | /** |
49 | * The maximum number of results to fetch at a time. | |
50 | */ | |
51 | const QUERY_SIZE = 120; | |
52 | ||
95c6aeaf | 53 | /** |
4894840d | 54 | * Highlighting fragsize. Slightly larger than output size (500) to allow for ... appending. |
95c6aeaf | 55 | */ |
4894840d EM |
56 | const FRAG_SIZE = 510; |
57 | ||
58 | /** | |
59 | * Marker for the start of a highlight. | |
60 | */ | |
61 | const HIGHLIGHT_START = '@@HI_S@@'; | |
62 | ||
63 | /** | |
64 | * Marker for the end of a highlight. | |
65 | */ | |
66 | const HIGHLIGHT_END = '@@HI_E@@'; | |
95c6aeaf | 67 | |
fc440796 | 68 | /** @var float Boost value for matching course in location-ordered searches */ |
69 | const COURSE_BOOST = 1; | |
70 | ||
71 | /** @var float Boost value for matching context (in addition to course boost) */ | |
72 | const CONTEXT_BOOST = 0.5; | |
73 | ||
95c6aeaf DM |
74 | /** |
75 | * @var \SolrClient | |
76 | */ | |
77 | protected $client = null; | |
78 | ||
7a4a0bc8 EM |
79 | /** |
80 | * @var bool True if we should reuse SolrClients, false if not. | |
81 | */ | |
82 | protected $cacheclient = true; | |
83 | ||
5dc4624c EM |
84 | /** |
85 | * @var \curl Direct curl object. | |
86 | */ | |
87 | protected $curl = null; | |
88 | ||
95c6aeaf DM |
89 | /** |
90 | * @var array Fields that can be highlighted. | |
91 | */ | |
4894840d | 92 | protected $highlightfields = array('title', 'content', 'description1', 'description2'); |
95c6aeaf | 93 | |
053118a1 EM |
94 | /** |
95 | * @var int Number of total docs reported by Sorl for the last query. | |
96 | */ | |
97 | protected $totalenginedocs = 0; | |
98 | ||
99 | /** | |
100 | * @var int Number of docs we have processed for the last query. | |
101 | */ | |
102 | protected $processeddocs = 0; | |
103 | ||
104 | /** | |
105 | * @var int Number of docs that have been skipped while processing the last query. | |
106 | */ | |
107 | protected $skippeddocs = 0; | |
108 | ||
895e0059 DM |
109 | /** |
110 | * Solr server major version. | |
111 | * | |
112 | * @var int | |
113 | */ | |
114 | protected $solrmajorversion = null; | |
115 | ||
7a4a0bc8 EM |
116 | /** |
117 | * Initialises the search engine configuration. | |
118 | * | |
119 | * @return void | |
120 | */ | |
121 | public function __construct() { | |
122 | parent::__construct(); | |
123 | ||
124 | $curlversion = curl_version(); | |
125 | if (isset($curlversion['version']) && stripos($curlversion['version'], '7.35.') === 0) { | |
126 | // There is a flaw with curl 7.35.0 that causes problems with client reuse. | |
127 | $this->cacheclient = false; | |
128 | } | |
129 | } | |
130 | ||
95c6aeaf DM |
131 | /** |
132 | * Prepares a Solr query, applies filters and executes it returning its results. | |
133 | * | |
134 | * @throws \core_search\engine_exception | |
4359ef18 | 135 | * @param \stdClass $filters Containing query and filters. |
136 | * @param \stdClass $accessinfo Information about areas user can access. | |
053118a1 | 137 | * @param int $limit The maximum number of results to return. |
95c6aeaf DM |
138 | * @return \core_search\document[] Results or false if no results |
139 | */ | |
4359ef18 | 140 | public function execute_query($filters, $accessinfo, $limit = 0) { |
f6b425e2 | 141 | global $USER; |
95c6aeaf | 142 | |
053118a1 EM |
143 | if (empty($limit)) { |
144 | $limit = \core_search\manager::MAX_RESULTS; | |
145 | } | |
95c6aeaf DM |
146 | |
147 | // If there is any problem we trigger the exception as soon as possible. | |
7a4a0bc8 | 148 | $client = $this->get_search_client(); |
95c6aeaf | 149 | |
053118a1 | 150 | // Create the query object. |
4359ef18 | 151 | $query = $this->create_user_query($filters, $accessinfo); |
053118a1 | 152 | |
cfa00fc5 | 153 | // If the query cannot have results, return none. |
154 | if (!$query) { | |
155 | return []; | |
156 | } | |
157 | ||
053118a1 EM |
158 | // We expect good match rates, so for our first get, we will get a small number of records. |
159 | // This significantly speeds solr response time for first few pages. | |
160 | $query->setRows(min($limit * 3, static::QUERY_SIZE)); | |
161 | $response = $this->get_query_response($query); | |
162 | ||
163 | // Get count data out of the response, and reset our counters. | |
164 | list($included, $found) = $this->get_response_counts($response); | |
165 | $this->totalenginedocs = $found; | |
166 | $this->processeddocs = 0; | |
167 | $this->skippeddocs = 0; | |
168 | if ($included == 0 || $this->totalenginedocs == 0) { | |
169 | // No results. | |
170 | return array(); | |
171 | } | |
172 | ||
173 | // Get valid documents out of the response. | |
174 | $results = $this->process_response($response, $limit); | |
175 | ||
176 | // We have processed all the docs in the response at this point. | |
177 | $this->processeddocs += $included; | |
178 | ||
179 | // If we haven't reached the limit, and there are more docs left in Solr, lets keep trying. | |
180 | while (count($results) < $limit && ($this->totalenginedocs - $this->processeddocs) > 0) { | |
181 | // Offset the start of the query, and since we are making another call, get more per call. | |
182 | $query->setStart($this->processeddocs); | |
183 | $query->setRows(static::QUERY_SIZE); | |
184 | ||
185 | $response = $this->get_query_response($query); | |
186 | list($included, $found) = $this->get_response_counts($response); | |
187 | if ($included == 0 || $found == 0) { | |
188 | // No new results were found. Found being empty would be weird, so we will just return. | |
189 | return $results; | |
190 | } | |
191 | $this->totalenginedocs = $found; | |
192 | ||
193 | // Get the new response docs, limiting to remaining we need, then add it to the end of the results array. | |
194 | $newdocs = $this->process_response($response, $limit - count($results)); | |
195 | $results = array_merge($results, $newdocs); | |
196 | ||
197 | // Add to our processed docs count. | |
198 | $this->processeddocs += $included; | |
199 | } | |
200 | ||
201 | return $results; | |
202 | } | |
203 | ||
204 | /** | |
205 | * Takes a query and returns the response in SolrObject format. | |
206 | * | |
207 | * @param SolrQuery $query Solr query object. | |
208 | * @return SolrObject|false Response document or false on error. | |
209 | */ | |
210 | protected function get_query_response($query) { | |
211 | try { | |
212 | return $this->get_search_client()->query($query)->getResponse(); | |
213 | } catch (\SolrClientException $ex) { | |
214 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
215 | $this->queryerror = $ex->getMessage(); | |
216 | return false; | |
217 | } catch (\SolrServerException $ex) { | |
218 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
219 | $this->queryerror = $ex->getMessage(); | |
220 | return false; | |
cd894f84 | 221 | } |
053118a1 EM |
222 | } |
223 | ||
224 | /** | |
225 | * Returns the total number of documents available for the most recently call to execute_query. | |
226 | * | |
227 | * @return int | |
228 | */ | |
229 | public function get_query_total_count() { | |
230 | // Return the total engine count minus the docs we have determined are bad. | |
231 | return $this->totalenginedocs - $this->skippeddocs; | |
232 | } | |
233 | ||
234 | /** | |
235 | * Returns count information for a provided response. Will return 0, 0 for invalid or empty responses. | |
236 | * | |
237 | * @param SolrDocument $response The response document from Solr. | |
238 | * @return array A two part array. First how many response docs are in the response. | |
239 | * Second, how many results are vailable in the engine. | |
240 | */ | |
241 | protected function get_response_counts($response) { | |
242 | $found = 0; | |
243 | $included = 0; | |
244 | ||
245 | if (isset($response->grouped->solr_filegroupingid->ngroups)) { | |
246 | // Get the number of results for file grouped queries. | |
247 | $found = $response->grouped->solr_filegroupingid->ngroups; | |
248 | $included = count($response->grouped->solr_filegroupingid->groups); | |
249 | } else if (isset($response->response->numFound)) { | |
250 | // Get the number of results for standard queries. | |
251 | $found = $response->response->numFound; | |
93b22672 DM |
252 | if ($found > 0 && is_array($response->response->docs)) { |
253 | $included = count($response->response->docs); | |
254 | } | |
cd894f84 | 255 | } |
053118a1 EM |
256 | |
257 | return array($included, $found); | |
258 | } | |
259 | ||
260 | /** | |
261 | * Prepares a new query object with needed limits, filters, etc. | |
262 | * | |
4359ef18 | 263 | * @param \stdClass $filters Containing query and filters. |
264 | * @param \stdClass $accessinfo Information about contexts the user can access | |
cfa00fc5 | 265 | * @return \SolrDisMaxQuery|null Query object or null if they can't get any results |
053118a1 | 266 | */ |
4359ef18 | 267 | protected function create_user_query($filters, $accessinfo) { |
053118a1 EM |
268 | global $USER; |
269 | ||
270 | // Let's keep these changes internal. | |
271 | $data = clone $filters; | |
272 | ||
273 | $query = new \SolrDisMaxQuery(); | |
274 | ||
275 | $this->set_query($query, $data->q); | |
95c6aeaf DM |
276 | $this->add_fields($query); |
277 | ||
278 | // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters | |
279 | // we are really interested in caching contexts filters instead. | |
280 | if (!empty($data->title)) { | |
281 | $query->addFilterQuery('{!field cache=false f=title}' . $data->title); | |
282 | } | |
501801a2 EM |
283 | if (!empty($data->areaids)) { |
284 | // If areaids are specified, we want to get any that match. | |
285 | $query->addFilterQuery('{!cache=false}areaid:(' . implode(' OR ', $data->areaids) . ')'); | |
95c6aeaf | 286 | } |
427e3cbc EM |
287 | if (!empty($data->courseids)) { |
288 | $query->addFilterQuery('{!cache=false}courseid:(' . implode(' OR ', $data->courseids) . ')'); | |
289 | } | |
4359ef18 | 290 | if (!empty($data->groupids)) { |
291 | $query->addFilterQuery('{!cache=false}groupid:(' . implode(' OR ', $data->groupids) . ')'); | |
292 | } | |
222a97ce | 293 | if (!empty($data->userids)) { |
294 | $query->addFilterQuery('{!cache=false}userid:(' . implode(' OR ', $data->userids) . ')'); | |
295 | } | |
95c6aeaf DM |
296 | |
297 | if (!empty($data->timestart) or !empty($data->timeend)) { | |
298 | if (empty($data->timestart)) { | |
299 | $data->timestart = '*'; | |
300 | } else { | |
301 | $data->timestart = \search_solr\document::format_time_for_engine($data->timestart); | |
302 | } | |
303 | if (empty($data->timeend)) { | |
304 | $data->timeend = '*'; | |
305 | } else { | |
306 | $data->timeend = \search_solr\document::format_time_for_engine($data->timeend); | |
307 | } | |
308 | ||
309 | // No cache. | |
310 | $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']'); | |
311 | } | |
312 | ||
f6b425e2 EM |
313 | // Restrict to users who are supposed to be able to see a particular result. |
314 | $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')'); | |
315 | ||
95c6aeaf DM |
316 | // And finally restrict it to the context where the user can access, we want this one cached. |
317 | // If the user can access all contexts $usercontexts value is just true, we don't need to filter | |
318 | // in that case. | |
4359ef18 | 319 | if (!$accessinfo->everything && is_array($accessinfo->usercontexts)) { |
427e3cbc EM |
320 | // Join all area contexts into a single array and implode. |
321 | $allcontexts = array(); | |
4359ef18 | 322 | foreach ($accessinfo->usercontexts as $areaid => $areacontexts) { |
501801a2 | 323 | if (!empty($data->areaids) && !in_array($areaid, $data->areaids)) { |
427e3cbc EM |
324 | // Skip unused areas. |
325 | continue; | |
95c6aeaf | 326 | } |
427e3cbc EM |
327 | foreach ($areacontexts as $contextid) { |
328 | // Ensure they are unique. | |
329 | $allcontexts[$contextid] = $contextid; | |
330 | } | |
331 | } | |
332 | if (empty($allcontexts)) { | |
333 | // This means there are no valid contexts for them, so they get no results. | |
cfa00fc5 | 334 | return null; |
95c6aeaf | 335 | } |
427e3cbc | 336 | $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')'); |
95c6aeaf DM |
337 | } |
338 | ||
4359ef18 | 339 | if (!$accessinfo->everything && $accessinfo->separategroupscontexts) { |
340 | // Add another restriction to handle group ids. If there are any contexts using separate | |
341 | // groups, then results in that context will not show unless you belong to the group. | |
342 | // (Note: Access all groups is taken care of earlier, when computing these arrays.) | |
343 | ||
344 | // This special exceptions list allows for particularly pig-headed developers to create | |
345 | // multiple search areas within the same module, where one of them uses separate | |
346 | // groups and the other uses visible groups. It is a little inefficient, but this should | |
347 | // be rare. | |
348 | $exceptions = ''; | |
349 | if ($accessinfo->visiblegroupscontextsareas) { | |
350 | foreach ($accessinfo->visiblegroupscontextsareas as $contextid => $areaids) { | |
351 | $exceptions .= ' OR (contextid:' . $contextid . ' AND areaid:(' . | |
352 | implode(' OR ', $areaids) . '))'; | |
353 | } | |
354 | } | |
355 | ||
356 | if ($accessinfo->usergroups) { | |
357 | // Either the document has no groupid, or the groupid is one that the user | |
358 | // belongs to, or the context is not one of the separate groups contexts. | |
359 | $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . | |
360 | 'groupid:(' . implode(' OR ', $accessinfo->usergroups) . ') OR ' . | |
361 | '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . | |
362 | $exceptions); | |
363 | } else { | |
364 | // Either the document has no groupid, or the context is not a restricted one. | |
365 | $query->addFilterQuery('(*:* -groupid:[* TO *]) OR ' . | |
366 | '(*:* -contextid:(' . implode(' OR ', $accessinfo->separategroupscontexts) . '))' . | |
367 | $exceptions); | |
368 | } | |
369 | } | |
370 | ||
053118a1 EM |
371 | if ($this->file_indexing_enabled()) { |
372 | // Now group records by solr_filegroupingid. Limit to 3 results per group. | |
373 | $query->setGroup(true); | |
374 | $query->setGroupLimit(3); | |
375 | $query->setGroupNGroups(true); | |
376 | $query->addGroupField('solr_filegroupingid'); | |
e0867b22 EM |
377 | } else { |
378 | // Make sure we only get text files, in case the index has pre-existing files. | |
379 | $query->addFilterQuery('type:'.\core_search\manager::TYPE_TEXT); | |
95c6aeaf DM |
380 | } |
381 | ||
fc440796 | 382 | // If ordering by location, add in boost for the relevant course or context ids. |
383 | if (!empty($filters->order) && $filters->order === 'location') { | |
384 | $coursecontext = $filters->context->get_course_context(); | |
385 | $query->addBoostQuery('courseid', $coursecontext->instanceid, self::COURSE_BOOST); | |
386 | if ($filters->context->contextlevel !== CONTEXT_COURSE) { | |
387 | // If it's a block or activity, also add a boost for the specific context id. | |
388 | $query->addBoostQuery('contextid', $filters->context->id, self::CONTEXT_BOOST); | |
389 | } | |
390 | } | |
391 | ||
053118a1 | 392 | return $query; |
95c6aeaf DM |
393 | } |
394 | ||
395 | /** | |
396 | * Prepares a new query by setting the query, start offset and rows to return. | |
3744ceb6 | 397 | * |
95c6aeaf | 398 | * @param SolrQuery $query |
cd894f84 | 399 | * @param object $q Containing query and filters. |
95c6aeaf | 400 | */ |
053118a1 | 401 | protected function set_query($query, $q) { |
95c6aeaf DM |
402 | // Set hightlighting. |
403 | $query->setHighlight(true); | |
404 | foreach ($this->highlightfields as $field) { | |
405 | $query->addHighlightField($field); | |
406 | } | |
407 | $query->setHighlightFragsize(static::FRAG_SIZE); | |
4894840d EM |
408 | $query->setHighlightSimplePre(self::HIGHLIGHT_START); |
409 | $query->setHighlightSimplePost(self::HIGHLIGHT_END); | |
410 | $query->setHighlightMergeContiguous(true); | |
95c6aeaf DM |
411 | |
412 | $query->setQuery($q); | |
413 | ||
414 | // A reasonable max. | |
053118a1 | 415 | $query->setRows(static::QUERY_SIZE); |
95c6aeaf DM |
416 | } |
417 | ||
418 | /** | |
419 | * Sets fields to be returned in the result. | |
420 | * | |
3744ceb6 | 421 | * @param SolrDisMaxQuery|SolrQuery $query object. |
95c6aeaf DM |
422 | */ |
423 | public function add_fields($query) { | |
424 | $documentclass = $this->get_document_classname(); | |
3744ceb6 EM |
425 | $fields = $documentclass::get_default_fields_definition(); |
426 | ||
427 | $dismax = false; | |
546c0af5 | 428 | if ($query instanceof \SolrDisMaxQuery) { |
3744ceb6 EM |
429 | $dismax = true; |
430 | } | |
431 | ||
432 | foreach ($fields as $key => $field) { | |
433 | $query->addField($key); | |
434 | if ($dismax && !empty($field['mainquery'])) { | |
435 | // Add fields the main query should be run against. | |
436 | $query->addQueryField($key); | |
437 | } | |
95c6aeaf DM |
438 | } |
439 | } | |
440 | ||
441 | /** | |
442 | * Finds the key common to both highlighing and docs array returned from response. | |
443 | * @param object $response containing results. | |
444 | */ | |
445 | public function add_highlight_content($response) { | |
cd894f84 EM |
446 | if (!isset($response->highlighting)) { |
447 | // There is no highlighting to add. | |
448 | return; | |
449 | } | |
450 | ||
95c6aeaf DM |
451 | $highlightedobject = $response->highlighting; |
452 | foreach ($response->response->docs as $doc) { | |
453 | $x = $doc->id; | |
454 | $highlighteddoc = $highlightedobject->$x; | |
455 | $this->merge_highlight_field_values($doc, $highlighteddoc); | |
456 | } | |
457 | } | |
458 | ||
459 | /** | |
460 | * Adds the highlighting array values to docs array values. | |
461 | * | |
462 | * @throws \core_search\engine_exception | |
463 | * @param object $doc containing the results. | |
464 | * @param object $highlighteddoc containing the highlighted results values. | |
465 | */ | |
466 | public function merge_highlight_field_values($doc, $highlighteddoc) { | |
467 | ||
468 | foreach ($this->highlightfields as $field) { | |
469 | if (!empty($doc->$field)) { | |
470 | ||
471 | // Check that the returned value is not an array. No way we can make this work with multivalued solr fields. | |
472 | if (is_array($doc->{$field})) { | |
473 | throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field); | |
474 | } | |
475 | ||
476 | if (!empty($highlighteddoc->$field)) { | |
477 | // Replace by the highlighted result. | |
478 | $doc->$field = reset($highlighteddoc->$field); | |
479 | } | |
480 | } | |
481 | } | |
482 | } | |
483 | ||
484 | /** | |
485 | * Filters the response on Moodle side. | |
486 | * | |
053118a1 EM |
487 | * @param SolrObject $response Solr object containing the response return from solr server. |
488 | * @param int $limit The maximum number of results to return. 0 for all. | |
489 | * @param bool $skipaccesscheck Don't use check_access() on results. Only to be used when results have known access. | |
95c6aeaf DM |
490 | * @return array $results containing final results to be displayed. |
491 | */ | |
053118a1 | 492 | protected function process_response($response, $limit = 0, $skipaccesscheck = false) { |
f6b425e2 EM |
493 | global $USER; |
494 | ||
053118a1 EM |
495 | if (empty($response)) { |
496 | return array(); | |
497 | } | |
498 | ||
499 | if (isset($response->grouped)) { | |
500 | return $this->grouped_files_process_response($response, $limit); | |
501 | } | |
502 | ||
f6b425e2 EM |
503 | $userid = $USER->id; |
504 | $noownerid = \core_search\manager::NO_OWNER_ID; | |
95c6aeaf | 505 | |
95c6aeaf DM |
506 | $numgranted = 0; |
507 | ||
508 | if (!$docs = $response->response->docs) { | |
509 | return array(); | |
510 | } | |
511 | ||
053118a1 | 512 | $out = array(); |
95c6aeaf DM |
513 | if (!empty($response->response->numFound)) { |
514 | $this->add_highlight_content($response); | |
515 | ||
516 | // Iterate through the results checking its availability and whether they are available for the user or not. | |
517 | foreach ($docs as $key => $docdata) { | |
f6b425e2 EM |
518 | if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) { |
519 | // If owneruserid is set, no other user should be able to access this record. | |
f6b425e2 EM |
520 | continue; |
521 | } | |
522 | ||
95c6aeaf | 523 | if (!$searcharea = $this->get_search_area($docdata->areaid)) { |
95c6aeaf DM |
524 | continue; |
525 | } | |
526 | ||
527 | $docdata = $this->standarize_solr_obj($docdata); | |
528 | ||
053118a1 EM |
529 | if ($skipaccesscheck) { |
530 | $access = \core_search\manager::ACCESS_GRANTED; | |
531 | } else { | |
532 | $access = $searcharea->check_access($docdata['itemid']); | |
533 | } | |
95c6aeaf DM |
534 | switch ($access) { |
535 | case \core_search\manager::ACCESS_DELETED: | |
536 | $this->delete_by_id($docdata['id']); | |
053118a1 EM |
537 | // Remove one from our processed and total counters, since we promptly deleted. |
538 | $this->processeddocs--; | |
539 | $this->totalenginedocs--; | |
95c6aeaf DM |
540 | break; |
541 | case \core_search\manager::ACCESS_DENIED: | |
053118a1 | 542 | $this->skippeddocs++; |
95c6aeaf DM |
543 | break; |
544 | case \core_search\manager::ACCESS_GRANTED: | |
545 | $numgranted++; | |
546 | ||
547 | // Add the doc. | |
053118a1 | 548 | $out[] = $this->to_document($searcharea, $docdata); |
95c6aeaf DM |
549 | break; |
550 | } | |
551 | ||
053118a1 EM |
552 | // Stop when we hit our limit. |
553 | if (!empty($limit) && count($out) >= $limit) { | |
95c6aeaf DM |
554 | break; |
555 | } | |
556 | } | |
557 | } | |
558 | ||
053118a1 | 559 | return $out; |
95c6aeaf DM |
560 | } |
561 | ||
cd894f84 EM |
562 | /** |
563 | * Processes grouped file results into documents, with attached matching files. | |
564 | * | |
053118a1 EM |
565 | * @param SolrObject $response The response returned from solr server |
566 | * @param int $limit The maximum number of results to return. 0 for all. | |
cd894f84 EM |
567 | * @return array Final results to be displayed. |
568 | */ | |
053118a1 | 569 | protected function grouped_files_process_response($response, $limit = 0) { |
cd894f84 EM |
570 | // If we can't find the grouping, or there are no matches in the grouping, return empty. |
571 | if (!isset($response->grouped->solr_filegroupingid) || empty($response->grouped->solr_filegroupingid->matches)) { | |
572 | return array(); | |
573 | } | |
574 | ||
575 | $numgranted = 0; | |
576 | $orderedids = array(); | |
577 | $completedocs = array(); | |
578 | $incompletedocs = array(); | |
579 | ||
580 | $highlightingobj = $response->highlighting; | |
581 | ||
582 | // Each group represents a "master document". | |
583 | $groups = $response->grouped->solr_filegroupingid->groups; | |
584 | foreach ($groups as $group) { | |
585 | $groupid = $group->groupValue; | |
586 | $groupdocs = $group->doclist->docs; | |
587 | $firstdoc = reset($groupdocs); | |
588 | ||
589 | if (!$searcharea = $this->get_search_area($firstdoc->areaid)) { | |
590 | // Well, this is a problem. | |
591 | continue; | |
592 | } | |
593 | ||
594 | // Check for access. | |
595 | $access = $searcharea->check_access($firstdoc->itemid); | |
596 | switch ($access) { | |
597 | case \core_search\manager::ACCESS_DELETED: | |
598 | // If deleted from Moodle, delete from index and then continue. | |
599 | $this->delete_by_id($firstdoc->id); | |
053118a1 EM |
600 | // Remove one from our processed and total counters, since we promptly deleted. |
601 | $this->processeddocs--; | |
602 | $this->totalenginedocs--; | |
cd894f84 EM |
603 | continue 2; |
604 | break; | |
605 | case \core_search\manager::ACCESS_DENIED: | |
606 | // This means we should just skip for the current user. | |
053118a1 | 607 | $this->skippeddocs++; |
cd894f84 EM |
608 | continue 2; |
609 | break; | |
610 | } | |
611 | $numgranted++; | |
612 | ||
613 | $maindoc = false; | |
614 | $fileids = array(); | |
615 | // Seperate the main document and any files returned. | |
616 | foreach ($groupdocs as $groupdoc) { | |
617 | if ($groupdoc->id == $groupid) { | |
618 | $maindoc = $groupdoc; | |
619 | } else if (isset($groupdoc->solr_fileid)) { | |
620 | $fileids[] = $groupdoc->solr_fileid; | |
621 | } | |
622 | } | |
623 | ||
624 | // Store the id of this group, in order, for later merging. | |
625 | $orderedids[] = $groupid; | |
626 | ||
627 | if (!$maindoc) { | |
628 | // We don't have the main doc, store what we know for later building. | |
629 | $incompletedocs[$groupid] = $fileids; | |
630 | } else { | |
631 | if (isset($highlightingobj->$groupid)) { | |
632 | // Merge the highlighting for this doc. | |
633 | $this->merge_highlight_field_values($maindoc, $highlightingobj->$groupid); | |
634 | } | |
635 | $docdata = $this->standarize_solr_obj($maindoc); | |
636 | $doc = $this->to_document($searcharea, $docdata); | |
637 | // Now we need to attach the result files to the doc. | |
638 | foreach ($fileids as $fileid) { | |
639 | $doc->add_stored_file($fileid); | |
640 | } | |
641 | $completedocs[$groupid] = $doc; | |
642 | } | |
643 | ||
053118a1 | 644 | if (!empty($limit) && $numgranted >= $limit) { |
cd894f84 EM |
645 | // We have hit the max results, we will just ignore the rest. |
646 | break; | |
647 | } | |
648 | } | |
649 | ||
650 | $incompletedocs = $this->get_missing_docs($incompletedocs); | |
651 | ||
652 | $out = array(); | |
653 | // Now merge the complete and incomplete documents, in results order. | |
654 | foreach ($orderedids as $docid) { | |
655 | if (isset($completedocs[$docid])) { | |
656 | $out[] = $completedocs[$docid]; | |
657 | } else if (isset($incompletedocs[$docid])) { | |
658 | $out[] = $incompletedocs[$docid]; | |
659 | } | |
660 | } | |
661 | ||
662 | return $out; | |
663 | } | |
664 | ||
665 | /** | |
666 | * Retreive any missing main documents and attach provided files. | |
667 | * | |
668 | * The missingdocs array should be an array, indexed by document id, of main documents we need to retrieve. The value | |
669 | * associated to the key should be an array of stored_files or stored file ids to attach to the result document. | |
670 | * | |
671 | * Return array also indexed by document id. | |
672 | * | |
673 | * @param array() $missingdocs An array, indexed by document id, with arrays of files/ids to attach. | |
674 | * @return document[] | |
675 | */ | |
676 | protected function get_missing_docs($missingdocs) { | |
677 | if (empty($missingdocs)) { | |
678 | return array(); | |
679 | } | |
680 | ||
681 | $docids = array_keys($missingdocs); | |
682 | ||
683 | // Build a custom query that will get all the missing documents. | |
684 | $query = new \SolrQuery(); | |
053118a1 | 685 | $this->set_query($query, '*'); |
cd894f84 | 686 | $this->add_fields($query); |
053118a1 | 687 | $query->setRows(count($docids)); |
cd894f84 EM |
688 | $query->addFilterQuery('{!cache=false}id:(' . implode(' OR ', $docids) . ')'); |
689 | ||
053118a1 EM |
690 | $response = $this->get_query_response($query); |
691 | // We know the missing docs have already been checked for access, so don't recheck. | |
692 | $results = $this->process_response($response, 0, true); | |
cd894f84 EM |
693 | |
694 | $out = array(); | |
695 | foreach ($results as $result) { | |
696 | $resultid = $result->get('id'); | |
697 | if (!isset($missingdocs[$resultid])) { | |
698 | // We got a result we didn't expect. Skip it. | |
699 | continue; | |
700 | } | |
701 | // Attach the files. | |
702 | foreach ($missingdocs[$resultid] as $filedoc) { | |
703 | $result->add_stored_file($filedoc); | |
704 | } | |
705 | $out[$resultid] = $result; | |
706 | } | |
707 | ||
708 | return $out; | |
709 | } | |
710 | ||
95c6aeaf DM |
711 | /** |
712 | * Returns a standard php array from a \SolrObject instance. | |
713 | * | |
714 | * @param \SolrObject $obj | |
715 | * @return array The returned document as an array. | |
716 | */ | |
717 | public function standarize_solr_obj(\SolrObject $obj) { | |
718 | $properties = $obj->getPropertyNames(); | |
719 | ||
720 | $docdata = array(); | |
721 | foreach($properties as $name) { | |
722 | // http://php.net/manual/en/solrobject.getpropertynames.php#98018. | |
723 | $name = trim($name); | |
724 | $docdata[$name] = $obj->offsetGet($name); | |
725 | } | |
726 | return $docdata; | |
727 | } | |
728 | ||
729 | /** | |
730 | * Adds a document to the search engine. | |
731 | * | |
732 | * This does not commit to the search engine. | |
733 | * | |
091973db EM |
734 | * @param document $document |
735 | * @param bool $fileindexing True if file indexing is to be used | |
736 | * @return bool | |
95c6aeaf | 737 | */ |
091973db EM |
738 | public function add_document($document, $fileindexing = false) { |
739 | $docdata = $document->export_for_engine(); | |
740 | ||
cd894f84 | 741 | if (!$this->add_solr_document($docdata)) { |
091973db EM |
742 | return false; |
743 | } | |
744 | ||
cd894f84 EM |
745 | if ($fileindexing) { |
746 | // This will take care of updating all attached files in the index. | |
747 | $this->process_document_files($document); | |
748 | } | |
749 | ||
091973db EM |
750 | return true; |
751 | } | |
95c6aeaf | 752 | |
091973db EM |
753 | /** |
754 | * Adds a text document to the search engine. | |
755 | * | |
cd894f84 | 756 | * @param array $doc |
091973db EM |
757 | * @return bool |
758 | */ | |
cd894f84 | 759 | protected function add_solr_document($doc) { |
95c6aeaf DM |
760 | $solrdoc = new \SolrInputDocument(); |
761 | foreach ($doc as $field => $value) { | |
762 | $solrdoc->addField($field, $value); | |
763 | } | |
764 | ||
765 | try { | |
766 | $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN); | |
091973db | 767 | return true; |
95c6aeaf DM |
768 | } catch (\SolrClientException $e) { |
769 | debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER); | |
63d5007b EM |
770 | } catch (\SolrServerException $e) { |
771 | // We only use the first line of the message, as it's a fully java stacktrace behind it. | |
772 | $msg = strtok($e->getMessage(), "\n"); | |
773 | debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER); | |
95c6aeaf | 774 | } |
091973db EM |
775 | |
776 | return false; | |
95c6aeaf DM |
777 | } |
778 | ||
cd894f84 EM |
779 | /** |
780 | * Index files attached to the docuemnt, ensuring the index matches the current document files. | |
781 | * | |
782 | * For documents that aren't known to be new, we check the index for existing files. | |
783 | * - New files we will add. | |
784 | * - Existing and unchanged files we will skip. | |
785 | * - File that are in the index but not on the document will be deleted from the index. | |
786 | * - Files that have changed will be re-indexed. | |
787 | * | |
788 | * @param document $document | |
789 | */ | |
790 | protected function process_document_files($document) { | |
791 | if (!$this->file_indexing_enabled()) { | |
792 | return; | |
793 | } | |
794 | ||
795 | // Maximum rows to process at a time. | |
796 | $rows = 500; | |
797 | ||
798 | // Get the attached files. | |
799 | $files = $document->get_files(); | |
800 | ||
801 | // If this isn't a new document, we need to check the exiting indexed files. | |
802 | if (!$document->get_is_new()) { | |
803 | // We do this progressively, so we can handle lots of files cleanly. | |
804 | list($numfound, $indexedfiles) = $this->get_indexed_files($document, 0, $rows); | |
805 | $count = 0; | |
806 | $idstodelete = array(); | |
807 | ||
808 | do { | |
809 | // Go through each indexed file. We want to not index any stored and unchanged ones, delete any missing ones. | |
810 | foreach ($indexedfiles as $indexedfile) { | |
811 | $fileid = $indexedfile->solr_fileid; | |
812 | ||
813 | if (isset($files[$fileid])) { | |
814 | // Check for changes that would mean we need to re-index the file. If so, just leave in $files. | |
815 | // Filelib does not guarantee time modified is updated, so we will check important values. | |
1aaead91 | 816 | if ($indexedfile->modified != $files[$fileid]->get_timemodified()) { |
cd894f84 EM |
817 | continue; |
818 | } | |
819 | if (strcmp($indexedfile->title, $files[$fileid]->get_filename()) !== 0) { | |
820 | continue; | |
821 | } | |
822 | if ($indexedfile->solr_filecontenthash != $files[$fileid]->get_contenthash()) { | |
823 | continue; | |
824 | } | |
f6b4ec7b | 825 | if ($indexedfile->solr_fileindexstatus == document::INDEXED_FILE_FALSE && |
cd894f84 EM |
826 | $this->file_is_indexable($files[$fileid])) { |
827 | // This means that the last time we indexed this file, filtering blocked it. | |
828 | // Current settings say it is indexable, so we will allow it to be indexed. | |
829 | continue; | |
830 | } | |
831 | ||
832 | // If the file is already indexed, we can just remove it from the files array and skip it. | |
833 | unset($files[$fileid]); | |
834 | } else { | |
835 | // This means we have found a file that is no longer attached, so we need to delete from the index. | |
836 | // We do it later, since this is progressive, and it could reorder results. | |
837 | $idstodelete[] = $indexedfile->id; | |
838 | } | |
839 | } | |
840 | $count += $rows; | |
841 | ||
842 | if ($count < $numfound) { | |
843 | // If we haven't hit the total count yet, fetch the next batch. | |
844 | list($numfound, $indexedfiles) = $this->get_indexed_files($document, $count, $rows); | |
845 | } | |
846 | ||
847 | } while ($count < $numfound); | |
848 | ||
849 | // Delete files that are no longer attached. | |
850 | foreach ($idstodelete as $id) { | |
851 | // We directly delete the item using the client, as the engine delete_by_id won't work on file docs. | |
852 | $this->get_search_client()->deleteById($id); | |
853 | } | |
854 | } | |
855 | ||
856 | // Now we can actually index all the remaining files. | |
857 | foreach ($files as $file) { | |
858 | $this->add_stored_file($document, $file); | |
859 | } | |
860 | } | |
861 | ||
862 | /** | |
863 | * Get the currently indexed files for a particular document, returns the total count, and a subset of files. | |
864 | * | |
865 | * @param document $document | |
866 | * @param int $start The row to start the results on. Zero indexed. | |
867 | * @param int $rows The number of rows to fetch | |
868 | * @return array A two element array, the first is the total number of availble results, the second is an array | |
869 | * of documents for the current request. | |
870 | */ | |
871 | protected function get_indexed_files($document, $start = 0, $rows = 500) { | |
872 | // Build a custom query that will get any document files that are in our solr_filegroupingid. | |
873 | $query = new \SolrQuery(); | |
874 | ||
875 | // We want to get all file records tied to a document. | |
876 | // For efficiency, we are building our own, stripped down, query. | |
877 | $query->setQuery('*'); | |
878 | $query->setRows($rows); | |
879 | $query->setStart($start); | |
880 | // We want a consistent sorting. | |
881 | $query->addSortField('id'); | |
882 | ||
883 | // We only want the bare minimum of fields. | |
884 | $query->addField('id'); | |
885 | $query->addField('modified'); | |
886 | $query->addField('title'); | |
887 | $query->addField('solr_fileid'); | |
888 | $query->addField('solr_filecontenthash'); | |
f6b4ec7b | 889 | $query->addField('solr_fileindexstatus'); |
cd894f84 EM |
890 | |
891 | $query->addFilterQuery('{!cache=false}solr_filegroupingid:(' . $document->get('id') . ')'); | |
892 | $query->addFilterQuery('type:' . \core_search\manager::TYPE_FILE); | |
893 | ||
053118a1 EM |
894 | $response = $this->get_query_response($query); |
895 | if (empty($response->response->numFound)) { | |
cd894f84 EM |
896 | return array(0, array()); |
897 | } | |
053118a1 EM |
898 | |
899 | return array($response->response->numFound, $this->convert_file_results($response)); | |
cd894f84 EM |
900 | } |
901 | ||
902 | /** | |
903 | * A very lightweight handler for getting information about already indexed files from a Solr response. | |
904 | * | |
905 | * @param SolrObject $responsedoc A Solr response document | |
906 | * @return stdClass[] An array of objects that contain the basic information for file processing. | |
907 | */ | |
908 | protected function convert_file_results($responsedoc) { | |
909 | if (!$docs = $responsedoc->response->docs) { | |
910 | return array(); | |
911 | } | |
912 | ||
913 | $out = array(); | |
914 | ||
915 | foreach ($docs as $doc) { | |
916 | // Copy the bare minimim needed info. | |
917 | $result = new \stdClass(); | |
918 | $result->id = $doc->id; | |
919 | $result->modified = document::import_time_from_engine($doc->modified); | |
920 | $result->title = $doc->title; | |
921 | $result->solr_fileid = $doc->solr_fileid; | |
922 | $result->solr_filecontenthash = $doc->solr_filecontenthash; | |
f6b4ec7b | 923 | $result->solr_fileindexstatus = $doc->solr_fileindexstatus; |
cd894f84 EM |
924 | $out[] = $result; |
925 | } | |
926 | ||
927 | return $out; | |
928 | } | |
929 | ||
930 | /** | |
931 | * Adds a file to the search engine. | |
932 | * | |
933 | * Notes about Solr and Tika indexing. We do not send the mime type, only the filename. | |
934 | * Tika has much better content type detection than Moodle, and we will have many more doc failures | |
935 | * if we try to send mime types. | |
936 | * | |
937 | * @param document $document | |
938 | * @param \stored_file $storedfile | |
939 | * @return void | |
940 | */ | |
941 | protected function add_stored_file($document, $storedfile) { | |
942 | $filedoc = $document->export_file_for_engine($storedfile); | |
943 | ||
944 | if (!$this->file_is_indexable($storedfile)) { | |
945 | // For files that we don't consider indexable, we will still place a reference in the search engine. | |
f6b4ec7b | 946 | $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_FALSE; |
cd894f84 EM |
947 | $this->add_solr_document($filedoc); |
948 | return; | |
949 | } | |
950 | ||
951 | $curl = $this->get_curl_object(); | |
952 | ||
953 | $url = $this->get_connection_url('/update/extract'); | |
954 | ||
895e0059 DM |
955 | // Return results as XML. |
956 | $url->param('wt', 'xml'); | |
957 | ||
cd894f84 EM |
958 | // This will prevent solr from automatically making fields for every tika output. |
959 | $url->param('uprefix', 'ignored_'); | |
960 | ||
546c0af5 EM |
961 | // Control how content is captured. This will keep our file content clean of non-important metadata. |
962 | $url->param('captureAttr', 'true'); | |
963 | // Move the content to a field for indexing. | |
964 | $url->param('fmap.content', 'solr_filecontent'); | |
965 | ||
cd894f84 EM |
966 | // These are common fields that matches the standard *_point dynamic field and causes an error. |
967 | $url->param('fmap.media_white_point', 'ignored_mwp'); | |
968 | $url->param('fmap.media_black_point', 'ignored_mbp'); | |
969 | ||
970 | // Copy each key to the url with literal. | |
971 | // We place in a temp name then copy back to the true field, which prevents errors or Tika overwriting common field names. | |
972 | foreach ($filedoc as $key => $value) { | |
973 | // This will take any fields from tika that match our schema and discard them, so they don't overwrite ours. | |
974 | $url->param('fmap.'.$key, 'ignored_'.$key); | |
975 | // Place data in a tmp field. | |
976 | $url->param('literal.mdltmp_'.$key, $value); | |
977 | // Then move to the final field. | |
978 | $url->param('fmap.mdltmp_'.$key, $key); | |
979 | } | |
980 | ||
981 | // This sets the true filename for Tika. | |
982 | $url->param('resource.name', $storedfile->get_filename()); | |
983 | ||
984 | // A giant block of code that is really just error checking around the curl request. | |
985 | try { | |
986 | // Now actually do the request. | |
987 | $result = $curl->post($url->out(false), array('myfile' => $storedfile)); | |
988 | ||
989 | $code = $curl->get_errno(); | |
990 | $info = $curl->get_info(); | |
991 | ||
992 | // Now error handling. It is just informational, since we aren't tracking per file/doc results. | |
993 | if ($code != 0) { | |
994 | // This means an internal cURL error occurred error is in result. | |
995 | $message = 'Curl error '.$code.' while indexing file with document id '.$filedoc['id'].': '.$result.'.'; | |
996 | debugging($message, DEBUG_DEVELOPER); | |
997 | } else if (isset($info['http_code']) && ($info['http_code'] !== 200)) { | |
998 | // Unexpected HTTP response code. | |
999 | $message = 'Error while indexing file with document id '.$filedoc['id']; | |
1000 | // Try to get error message out of msg or title if it exists. | |
1001 | if (preg_match('|<str [^>]*name="msg"[^>]*>(.*?)</str>|i', $result, $matches)) { | |
1002 | $message .= ': '.$matches[1]; | |
1003 | } else if (preg_match('|<title[^>]*>([^>]*)</title>|i', $result, $matches)) { | |
1004 | $message .= ': '.$matches[1]; | |
1005 | } | |
1006 | // This is a common error, happening whenever a file fails to index for any reason, so we will make it quieter. | |
1007 | if (CLI_SCRIPT && !PHPUNIT_TEST) { | |
1008 | mtrace($message); | |
1009 | } | |
1010 | } else { | |
1011 | // Check for the expected status field. | |
1012 | if (preg_match('|<int [^>]*name="status"[^>]*>(\d*)</int>|i', $result, $matches)) { | |
1013 | // Now check for the expected status of 0, if not, error. | |
1014 | if ((int)$matches[1] !== 0) { | |
1015 | $message = 'Unexpected Solr status code '.(int)$matches[1]; | |
1016 | $message .= ' while indexing file with document id '.$filedoc['id'].'.'; | |
1017 | debugging($message, DEBUG_DEVELOPER); | |
1018 | } else { | |
1019 | // The document was successfully indexed. | |
1020 | return; | |
1021 | } | |
1022 | } else { | |
1023 | // We received an unprocessable response. | |
1024 | $message = 'Unexpected Solr response while indexing file with document id '.$filedoc['id'].': '; | |
1025 | $message .= strtok($result, "\n"); | |
1026 | debugging($message, DEBUG_DEVELOPER); | |
1027 | } | |
1028 | } | |
1029 | } catch (\Exception $e) { | |
1030 | // There was an error, but we are not tracking per-file success, so we just continue on. | |
1031 | debugging('Unknown exception while indexing file "'.$storedfile->get_filename().'".', DEBUG_DEVELOPER); | |
1032 | } | |
1033 | ||
1034 | // If we get here, the document was not indexed due to an error. So we will index just the base info without the file. | |
f6b4ec7b | 1035 | $filedoc['solr_fileindexstatus'] = document::INDEXED_FILE_ERROR; |
cd894f84 EM |
1036 | $this->add_solr_document($filedoc); |
1037 | } | |
1038 | ||
1039 | /** | |
1040 | * Checks to see if a passed file is indexable. | |
1041 | * | |
1042 | * @param \stored_file $file The file to check | |
1043 | * @return bool True if the file can be indexed | |
1044 | */ | |
1045 | protected function file_is_indexable($file) { | |
1046 | if (!empty($this->config->maxindexfilekb) && ($file->get_filesize() > ($this->config->maxindexfilekb * 1024))) { | |
1047 | // The file is too big to index. | |
1048 | return false; | |
1049 | } | |
1050 | ||
1051 | $mime = $file->get_mimetype(); | |
1052 | ||
1053 | if ($mime == 'application/vnd.moodle.backup') { | |
1054 | // We don't index Moodle backup files. There is nothing usefully indexable in them. | |
1055 | return false; | |
1056 | } | |
1057 | ||
1058 | return true; | |
1059 | } | |
1060 | ||
95c6aeaf DM |
1061 | /** |
1062 | * Commits all pending changes. | |
1063 | * | |
1064 | * @return void | |
1065 | */ | |
075fa912 | 1066 | protected function commit() { |
95c6aeaf DM |
1067 | $this->get_search_client()->commit(); |
1068 | } | |
1069 | ||
075fa912 EM |
1070 | /** |
1071 | * Do any area cleanup needed, and do anything to confirm contents. | |
1072 | * | |
1073 | * Return false to prevent the search area completed time and stats from being updated. | |
1074 | * | |
0bd8383a | 1075 | * @param \core_search\base $searcharea The search area that was complete |
075fa912 EM |
1076 | * @param int $numdocs The number of documents that were added to the index |
1077 | * @param bool $fullindex True if a full index is being performed | |
1078 | * @return bool True means that data is considered indexed | |
1079 | */ | |
1080 | public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) { | |
1081 | $this->commit(); | |
1082 | ||
1083 | return true; | |
1084 | } | |
1085 | ||
cd894f84 EM |
1086 | /** |
1087 | * Return true if file indexing is supported and enabled. False otherwise. | |
1088 | * | |
1089 | * @return bool | |
1090 | */ | |
1091 | public function file_indexing_enabled() { | |
1092 | return (bool)$this->config->fileindexing; | |
1093 | } | |
1094 | ||
95c6aeaf DM |
1095 | /** |
1096 | * Defragments the index. | |
1097 | * | |
1098 | * @return void | |
1099 | */ | |
1100 | public function optimize() { | |
bfd6c78f | 1101 | $this->get_search_client()->optimize(1, true, false); |
95c6aeaf DM |
1102 | } |
1103 | ||
1104 | /** | |
1105 | * Deletes the specified document. | |
1106 | * | |
1107 | * @param string $id The document id to delete | |
1108 | * @return void | |
1109 | */ | |
1110 | public function delete_by_id($id) { | |
cd894f84 EM |
1111 | // We need to make sure we delete the item and all related files, which can be done with solr_filegroupingid. |
1112 | $this->get_search_client()->deleteByQuery('solr_filegroupingid:' . $id); | |
075fa912 | 1113 | $this->commit(); |
95c6aeaf DM |
1114 | } |
1115 | ||
1116 | /** | |
1117 | * Delete all area's documents. | |
1118 | * | |
1119 | * @param string $areaid | |
1120 | * @return void | |
1121 | */ | |
1122 | public function delete($areaid = null) { | |
1123 | if ($areaid) { | |
1124 | $this->get_search_client()->deleteByQuery('areaid:' . $areaid); | |
1125 | } else { | |
1126 | $this->get_search_client()->deleteByQuery('*:*'); | |
1127 | } | |
075fa912 | 1128 | $this->commit(); |
95c6aeaf DM |
1129 | } |
1130 | ||
1131 | /** | |
1132 | * Pings the Solr server using search_solr config | |
1133 | * | |
1134 | * @return true|string Returns true if all good or an error string. | |
1135 | */ | |
1136 | public function is_server_ready() { | |
1137 | ||
23fc1be8 DM |
1138 | $configured = $this->is_server_configured(); |
1139 | if ($configured !== true) { | |
1140 | return $configured; | |
1141 | } | |
1142 | ||
73fd5666 | 1143 | // As part of the above we have already checked that we can contact the server. For pages |
1144 | // where performance is important, we skip doing a full schema check as well. | |
1145 | if ($this->should_skip_schema_check()) { | |
1146 | return true; | |
1147 | } | |
1148 | ||
4359ef18 | 1149 | // Update schema if required/possible. |
1150 | $schemalatest = $this->check_latest_schema(); | |
1151 | if ($schemalatest !== true) { | |
1152 | return $schemalatest; | |
1153 | } | |
1154 | ||
23fc1be8 DM |
1155 | // Check that the schema is already set up. |
1156 | try { | |
1157 | $schema = new \search_solr\schema(); | |
1158 | $schema->validate_setup(); | |
1159 | } catch (\moodle_exception $e) { | |
1160 | return $e->getMessage(); | |
1161 | } | |
1162 | ||
1163 | return true; | |
1164 | } | |
1165 | ||
1166 | /** | |
1167 | * Is the solr server properly configured?. | |
1168 | * | |
1169 | * @return true|string Returns true if all good or an error string. | |
1170 | */ | |
1171 | public function is_server_configured() { | |
1172 | ||
95c6aeaf DM |
1173 | if (empty($this->config->server_hostname) || empty($this->config->indexname)) { |
1174 | return 'No solr configuration found'; | |
1175 | } | |
1176 | ||
7a4a0bc8 | 1177 | if (!$client = $this->get_search_client(false)) { |
95c6aeaf DM |
1178 | return get_string('engineserverstatus', 'search'); |
1179 | } | |
1180 | ||
1181 | try { | |
23fc1be8 DM |
1182 | if ($this->get_solr_major_version() < 4) { |
1183 | // Minimum solr 4.0. | |
1184 | return get_string('minimumsolr4', 'search_solr'); | |
1185 | } | |
95c6aeaf | 1186 | } catch (\SolrClientException $ex) { |
d0b4772c DM |
1187 | debugging('Solr client error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); |
1188 | return get_string('engineserverstatus', 'search'); | |
95c6aeaf | 1189 | } catch (\SolrServerException $ex) { |
d0b4772c DM |
1190 | debugging('Solr server error: ' . html_to_text($ex->getMessage()), DEBUG_DEVELOPER); |
1191 | return get_string('engineserverstatus', 'search'); | |
95c6aeaf DM |
1192 | } |
1193 | ||
95c6aeaf DM |
1194 | return true; |
1195 | } | |
1196 | ||
23fc1be8 DM |
1197 | /** |
1198 | * Returns the solr server major version. | |
1199 | * | |
1200 | * @return int | |
1201 | */ | |
1202 | public function get_solr_major_version() { | |
895e0059 DM |
1203 | if ($this->solrmajorversion !== null) { |
1204 | return $this->solrmajorversion; | |
1205 | } | |
1206 | ||
d0b4772c DM |
1207 | // We should really ping first the server to see if the specified indexname is valid but |
1208 | // we want to minimise solr server requests as they are expensive. system() emits a warning | |
1209 | // if it can not connect to the configured index in the configured server. | |
1210 | $systemdata = @$this->get_search_client()->system(); | |
23fc1be8 | 1211 | $solrversion = $systemdata->getResponse()->offsetGet('lucene')->offsetGet('solr-spec-version'); |
895e0059 DM |
1212 | $this->solrmajorversion = intval(substr($solrversion, 0, strpos($solrversion, '.'))); |
1213 | ||
1214 | return $this->solrmajorversion; | |
23fc1be8 DM |
1215 | } |
1216 | ||
95c6aeaf DM |
1217 | /** |
1218 | * Checks if the PHP Solr extension is available. | |
1219 | * | |
1220 | * @return bool | |
1221 | */ | |
1222 | public function is_installed() { | |
1223 | return function_exists('solr_get_version'); | |
1224 | } | |
1225 | ||
1226 | /** | |
1227 | * Returns the solr client instance. | |
1228 | * | |
7a4a0bc8 EM |
1229 | * We don't reuse SolrClient if we are on libcurl 7.35.0, due to a bug in that version of curl. |
1230 | * | |
95c6aeaf DM |
1231 | * @throws \core_search\engine_exception |
1232 | * @param bool $triggerexception | |
1233 | * @return \SolrClient | |
1234 | */ | |
1235 | protected function get_search_client($triggerexception = true) { | |
558b6306 | 1236 | global $CFG; |
95c6aeaf DM |
1237 | |
1238 | // Type comparison as it is set to false if not available. | |
1239 | if ($this->client !== null) { | |
1240 | return $this->client; | |
1241 | } | |
1242 | ||
1243 | $options = array( | |
1244 | 'hostname' => $this->config->server_hostname, | |
1245 | 'path' => '/solr/' . $this->config->indexname, | |
1246 | 'login' => !empty($this->config->server_username) ? $this->config->server_username : '', | |
1247 | 'password' => !empty($this->config->server_password) ? $this->config->server_password : '', | |
1248 | 'port' => !empty($this->config->server_port) ? $this->config->server_port : '', | |
5dc4624c | 1249 | 'secure' => !empty($this->config->secure) ? true : false, |
95c6aeaf | 1250 | 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '', |
95c6aeaf | 1251 | 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '', |
5dc4624c | 1252 | 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '', |
95c6aeaf DM |
1253 | 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '', |
1254 | 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '', | |
bfd6c78f | 1255 | 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30' |
95c6aeaf DM |
1256 | ); |
1257 | ||
558b6306 | 1258 | if ($CFG->proxyhost && !is_proxybypass('http://' . $this->config->server_hostname . '/')) { |
1259 | $options['proxy_host'] = $CFG->proxyhost; | |
13719f2e MS |
1260 | if (!empty($CFG->proxyport)) { |
1261 | $options['proxy_port'] = $CFG->proxyport; | |
1262 | } | |
1263 | if (!empty($CFG->proxyuser) && !empty($CFG->proxypassword)) { | |
1264 | $options['proxy_login'] = $CFG->proxyuser; | |
1265 | $options['proxy_password'] = $CFG->proxypassword; | |
1266 | } | |
558b6306 | 1267 | } |
1268 | ||
e240a613 DM |
1269 | if (!class_exists('\SolrClient')) { |
1270 | throw new \core_search\engine_exception('enginenotinstalled', 'search', '', 'solr'); | |
1271 | } | |
1272 | ||
7a4a0bc8 | 1273 | $client = new \SolrClient($options); |
95c6aeaf | 1274 | |
7a4a0bc8 | 1275 | if ($client === false && $triggerexception) { |
95c6aeaf DM |
1276 | throw new \core_search\engine_exception('engineserverstatus', 'search'); |
1277 | } | |
1278 | ||
7a4a0bc8 EM |
1279 | if ($this->cacheclient) { |
1280 | $this->client = $client; | |
1281 | } | |
1282 | ||
1283 | return $client; | |
95c6aeaf | 1284 | } |
5dc4624c EM |
1285 | |
1286 | /** | |
1287 | * Returns a curl object for conntecting to solr. | |
1288 | * | |
1289 | * @return \curl | |
1290 | */ | |
1291 | public function get_curl_object() { | |
1292 | if (!is_null($this->curl)) { | |
1293 | return $this->curl; | |
1294 | } | |
1295 | ||
402a8e7a MG |
1296 | // Connection to Solr is allowed to use 'localhost' and other potentially blocked hosts/ports. |
1297 | $this->curl = new \curl(['ignoresecurity' => true]); | |
5dc4624c EM |
1298 | |
1299 | $options = array(); | |
1300 | // Build the SSL options. Based on pecl-solr and general testing. | |
1301 | if (!empty($this->config->secure)) { | |
1302 | if (!empty($this->config->ssl_cert)) { | |
1303 | $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert; | |
1304 | $options['CURLOPT_SSLCERTTYPE'] = 'PEM'; | |
1305 | } | |
1306 | ||
1307 | if (!empty($this->config->ssl_key)) { | |
1308 | $options['CURLOPT_SSLKEY'] = $this->config->ssl_key; | |
1309 | $options['CURLOPT_SSLKEYTYPE'] = 'PEM'; | |
1310 | } | |
1311 | ||
1312 | if (!empty($this->config->ssl_keypassword)) { | |
1313 | $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword; | |
1314 | } | |
1315 | ||
1316 | if (!empty($this->config->ssl_cainfo)) { | |
1317 | $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo; | |
1318 | } | |
1319 | ||
1320 | if (!empty($this->config->ssl_capath)) { | |
1321 | $options['CURLOPT_CAPATH'] = $this->config->ssl_capath; | |
1322 | } | |
1323 | } | |
1324 | ||
411b5f3a | 1325 | // Set timeout as for Solr client. |
1326 | $options['CURLOPT_TIMEOUT'] = !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'; | |
1327 | ||
5dc4624c EM |
1328 | $this->curl->setopt($options); |
1329 | ||
1330 | if (!empty($this->config->server_username) && !empty($this->config->server_password)) { | |
1331 | $authorization = $this->config->server_username . ':' . $this->config->server_password; | |
c7203847 | 1332 | $this->curl->setHeader('Authorization: Basic ' . base64_encode($authorization)); |
5dc4624c EM |
1333 | } |
1334 | ||
1335 | return $this->curl; | |
1336 | } | |
1337 | ||
1338 | /** | |
1339 | * Return a Moodle url object for the server connection. | |
1340 | * | |
1341 | * @param string $path The solr path to append. | |
1342 | * @return \moodle_url | |
1343 | */ | |
1344 | public function get_connection_url($path) { | |
1345 | // Must use the proper protocol, or SSL will fail. | |
1346 | $protocol = !empty($this->config->secure) ? 'https' : 'http'; | |
1347 | $url = $protocol . '://' . rtrim($this->config->server_hostname, '/'); | |
1348 | if (!empty($this->config->server_port)) { | |
1349 | $url .= ':' . $this->config->server_port; | |
1350 | } | |
1351 | $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/'); | |
1352 | ||
1353 | return new \moodle_url($url); | |
1354 | } | |
4359ef18 | 1355 | |
1356 | /** | |
1357 | * Solr includes group support in the execute_query function. | |
1358 | * | |
1359 | * @return bool True | |
1360 | */ | |
1361 | public function supports_group_filtering() { | |
1362 | return true; | |
1363 | } | |
1364 | ||
1365 | protected function update_schema($oldversion, $newversion) { | |
1366 | // Construct schema. | |
1367 | $schema = new schema(); | |
1368 | $cansetup = $schema->can_setup_server(); | |
1369 | if ($cansetup !== true) { | |
1370 | return $cansetup; | |
1371 | } | |
1372 | ||
1373 | switch ($newversion) { | |
1374 | // This version just requires a setup call to add new fields. | |
1375 | case 2017091700: | |
1376 | $setup = true; | |
1377 | break; | |
1378 | ||
1379 | // If we don't know about the schema version we might not have implemented the | |
1380 | // change correctly, so return. | |
1381 | default: | |
1382 | return get_string('schemaversionunknown', 'search'); | |
1383 | } | |
1384 | ||
1385 | if ($setup) { | |
1386 | $schema->setup(); | |
1387 | } | |
1388 | ||
1389 | return true; | |
1390 | } | |
fc440796 | 1391 | |
1392 | /** | |
1393 | * Solr supports sort by location within course contexts or below. | |
1394 | * | |
1395 | * @param \context $context Context that the user requested search from | |
1396 | * @return array Array from order name => display text | |
1397 | */ | |
1398 | public function get_supported_orders(\context $context) { | |
1399 | $orders = parent::get_supported_orders($context); | |
1400 | ||
1401 | // If not within a course, no other kind of sorting supported. | |
1402 | $coursecontext = $context->get_course_context(false); | |
1403 | if ($coursecontext) { | |
1404 | // Within a course or activity/block, support sort by location. | |
1405 | $orders['location'] = get_string('order_location', 'search', | |
1406 | $context->get_context_name()); | |
1407 | } | |
1408 | ||
1409 | return $orders; | |
1410 | } | |
222a97ce | 1411 | |
1412 | /** | |
1413 | * Solr supports search by user id. | |
1414 | * | |
1415 | * @return bool True | |
1416 | */ | |
1417 | public function supports_users() { | |
1418 | return true; | |
1419 | } | |
95c6aeaf | 1420 | } |