Commit | Line | Data |
---|---|---|
95c6aeaf DM |
1 | <?php |
2 | // This file is part of Moodle - http://moodle.org/ | |
3 | // | |
4 | // Moodle is free software: you can redistribute it and/or modify | |
5 | // it under the terms of the GNU General Public License as published by | |
6 | // the Free Software Foundation, either version 3 of the License, or | |
7 | // (at your option) any later version. | |
8 | // | |
9 | // Moodle is distributed in the hope that it will be useful, | |
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | // GNU General Public License for more details. | |
13 | // | |
14 | // You should have received a copy of the GNU General Public License | |
15 | // along with Moodle. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | /** | |
18 | * Solr engine. | |
19 | * | |
20 | * @package search_solr | |
21 | * @copyright 2015 Daniel Neis Araujo | |
22 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
23 | */ | |
24 | ||
25 | namespace search_solr; | |
26 | ||
27 | defined('MOODLE_INTERNAL') || die(); | |
28 | ||
29 | /** | |
30 | * Solr engine. | |
31 | * | |
32 | * @package search_solr | |
33 | * @copyright 2015 Daniel Neis Araujo | |
34 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
35 | */ | |
36 | class engine extends \core_search\engine { | |
37 | ||
38 | /** | |
39 | * @var string The date format used by solr. | |
40 | */ | |
41 | const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; | |
42 | ||
43 | /** | |
44 | * @var int Commit documents interval (number of miliseconds). | |
45 | */ | |
46 | const AUTOCOMMIT_WITHIN = 15000; | |
47 | ||
48 | /** | |
49 | * @var int Highlighting fragsize. | |
50 | */ | |
51 | const FRAG_SIZE = 500; | |
52 | ||
53 | /** | |
54 | * @var \SolrClient | |
55 | */ | |
56 | protected $client = null; | |
57 | ||
5dc4624c EM |
58 | /** |
59 | * @var \curl Direct curl object. | |
60 | */ | |
61 | protected $curl = null; | |
62 | ||
95c6aeaf DM |
63 | /** |
64 | * @var array Fields that can be highlighted. | |
65 | */ | |
66 | protected $highlightfields = array('content', 'description1', 'description2'); | |
67 | ||
68 | /** | |
69 | * Prepares a Solr query, applies filters and executes it returning its results. | |
70 | * | |
71 | * @throws \core_search\engine_exception | |
f6b425e2 EM |
72 | * @param stdClass $filters Containing query and filters. |
73 | * @param array $usercontexts Contexts where the user has access. True if the user can access all contexts. | |
95c6aeaf DM |
74 | * @return \core_search\document[] Results or false if no results |
75 | */ | |
76 | public function execute_query($filters, $usercontexts) { | |
f6b425e2 | 77 | global $USER; |
95c6aeaf DM |
78 | |
79 | // Let's keep these changes internal. | |
80 | $data = clone $filters; | |
81 | ||
82 | // If there is any problem we trigger the exception as soon as possible. | |
83 | $this->client = $this->get_search_client(); | |
84 | ||
85 | $serverstatus = $this->is_server_ready(); | |
86 | if ($serverstatus !== true) { | |
87 | throw new \core_search\engine_exception('engineserverstatus', 'search'); | |
88 | } | |
89 | ||
90 | $query = new \SolrQuery(); | |
91 | $this->set_query($query, $data->q); | |
92 | $this->add_fields($query); | |
93 | ||
94 | // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters | |
95 | // we are really interested in caching contexts filters instead. | |
96 | if (!empty($data->title)) { | |
97 | $query->addFilterQuery('{!field cache=false f=title}' . $data->title); | |
98 | } | |
99 | if (!empty($data->areaid)) { | |
100 | // Even if it is only supposed to contain PARAM_ALPHANUMEXT, better to prevent. | |
101 | $query->addFilterQuery('{!field cache=false f=areaid}' . $data->areaid); | |
102 | } | |
103 | ||
104 | if (!empty($data->timestart) or !empty($data->timeend)) { | |
105 | if (empty($data->timestart)) { | |
106 | $data->timestart = '*'; | |
107 | } else { | |
108 | $data->timestart = \search_solr\document::format_time_for_engine($data->timestart); | |
109 | } | |
110 | if (empty($data->timeend)) { | |
111 | $data->timeend = '*'; | |
112 | } else { | |
113 | $data->timeend = \search_solr\document::format_time_for_engine($data->timeend); | |
114 | } | |
115 | ||
116 | // No cache. | |
117 | $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']'); | |
118 | } | |
119 | ||
f6b425e2 EM |
120 | // Restrict to users who are supposed to be able to see a particular result. |
121 | $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')'); | |
122 | ||
95c6aeaf DM |
123 | // And finally restrict it to the context where the user can access, we want this one cached. |
124 | // If the user can access all contexts $usercontexts value is just true, we don't need to filter | |
125 | // in that case. | |
126 | if ($usercontexts && is_array($usercontexts)) { | |
127 | if (!empty($data->areaid)) { | |
128 | $query->addFilterQuery('contextid:(' . implode(' OR ', $usercontexts[$data->areaid]) . ')'); | |
129 | } else { | |
130 | // Join all area contexts into a single array and implode. | |
131 | $allcontexts = array(); | |
132 | foreach ($usercontexts as $areacontexts) { | |
133 | foreach ($areacontexts as $contextid) { | |
134 | // Ensure they are unique. | |
135 | $allcontexts[$contextid] = $contextid; | |
136 | } | |
137 | } | |
138 | $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')'); | |
139 | } | |
140 | } | |
141 | ||
142 | try { | |
143 | return $this->query_response($this->client->query($query)); | |
144 | } catch (\SolrClientException $ex) { | |
145 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
146 | $this->queryerror = $ex->getMessage(); | |
147 | return array(); | |
148 | } catch (\SolrServerException $ex) { | |
149 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
150 | $this->queryerror = $ex->getMessage(); | |
151 | return array(); | |
152 | } | |
153 | ||
154 | } | |
155 | ||
156 | /** | |
157 | * Prepares a new query by setting the query, start offset and rows to return. | |
158 | * @param SolrQuery $query | |
159 | * @param object $q Containing query and filters. | |
160 | */ | |
161 | protected function set_query($query, $q) { | |
162 | ||
163 | // Set hightlighting. | |
164 | $query->setHighlight(true); | |
165 | foreach ($this->highlightfields as $field) { | |
166 | $query->addHighlightField($field); | |
167 | } | |
168 | $query->setHighlightFragsize(static::FRAG_SIZE); | |
169 | $query->setHighlightSimplePre('__'); | |
170 | $query->setHighlightSimplePost('__'); | |
171 | ||
172 | $query->setQuery($q); | |
173 | ||
174 | // A reasonable max. | |
175 | $query->setRows(\core_search\manager::MAX_RESULTS); | |
176 | } | |
177 | ||
178 | /** | |
179 | * Sets fields to be returned in the result. | |
180 | * | |
181 | * @param SolrQuery $query object. | |
182 | */ | |
183 | public function add_fields($query) { | |
184 | $documentclass = $this->get_document_classname(); | |
185 | $fields = array_keys($documentclass::get_default_fields_definition()); | |
186 | foreach ($fields as $field) { | |
187 | $query->addField($field); | |
188 | } | |
189 | } | |
190 | ||
191 | /** | |
192 | * Finds the key common to both highlighing and docs array returned from response. | |
193 | * @param object $response containing results. | |
194 | */ | |
195 | public function add_highlight_content($response) { | |
196 | $highlightedobject = $response->highlighting; | |
197 | foreach ($response->response->docs as $doc) { | |
198 | $x = $doc->id; | |
199 | $highlighteddoc = $highlightedobject->$x; | |
200 | $this->merge_highlight_field_values($doc, $highlighteddoc); | |
201 | } | |
202 | } | |
203 | ||
204 | /** | |
205 | * Adds the highlighting array values to docs array values. | |
206 | * | |
207 | * @throws \core_search\engine_exception | |
208 | * @param object $doc containing the results. | |
209 | * @param object $highlighteddoc containing the highlighted results values. | |
210 | */ | |
211 | public function merge_highlight_field_values($doc, $highlighteddoc) { | |
212 | ||
213 | foreach ($this->highlightfields as $field) { | |
214 | if (!empty($doc->$field)) { | |
215 | ||
216 | // Check that the returned value is not an array. No way we can make this work with multivalued solr fields. | |
217 | if (is_array($doc->{$field})) { | |
218 | throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field); | |
219 | } | |
220 | ||
221 | if (!empty($highlighteddoc->$field)) { | |
222 | // Replace by the highlighted result. | |
223 | $doc->$field = reset($highlighteddoc->$field); | |
224 | } | |
225 | } | |
226 | } | |
227 | } | |
228 | ||
229 | /** | |
230 | * Filters the response on Moodle side. | |
231 | * | |
232 | * @param object $queryresponse containing the response return from solr server. | |
233 | * @return array $results containing final results to be displayed. | |
234 | */ | |
235 | public function query_response($queryresponse) { | |
f6b425e2 EM |
236 | global $USER; |
237 | ||
238 | $userid = $USER->id; | |
239 | $noownerid = \core_search\manager::NO_OWNER_ID; | |
95c6aeaf DM |
240 | |
241 | $response = $queryresponse->getResponse(); | |
242 | $numgranted = 0; | |
243 | ||
244 | if (!$docs = $response->response->docs) { | |
245 | return array(); | |
246 | } | |
247 | ||
248 | if (!empty($response->response->numFound)) { | |
249 | $this->add_highlight_content($response); | |
250 | ||
251 | // Iterate through the results checking its availability and whether they are available for the user or not. | |
252 | foreach ($docs as $key => $docdata) { | |
f6b425e2 EM |
253 | if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) { |
254 | // If owneruserid is set, no other user should be able to access this record. | |
255 | unset($docs[$key]); | |
256 | continue; | |
257 | } | |
258 | ||
95c6aeaf DM |
259 | if (!$searcharea = $this->get_search_area($docdata->areaid)) { |
260 | unset($docs[$key]); | |
261 | continue; | |
262 | } | |
263 | ||
264 | $docdata = $this->standarize_solr_obj($docdata); | |
265 | ||
266 | $access = $searcharea->check_access($docdata['itemid']); | |
267 | switch ($access) { | |
268 | case \core_search\manager::ACCESS_DELETED: | |
269 | $this->delete_by_id($docdata['id']); | |
270 | unset($docs[$key]); | |
271 | break; | |
272 | case \core_search\manager::ACCESS_DENIED: | |
273 | unset($docs[$key]); | |
274 | break; | |
275 | case \core_search\manager::ACCESS_GRANTED: | |
276 | $numgranted++; | |
277 | ||
278 | // Add the doc. | |
279 | $docs[$key] = $this->to_document($searcharea, $docdata); | |
280 | break; | |
281 | } | |
282 | ||
283 | // This should never happen. | |
284 | if ($numgranted >= \core_search\manager::MAX_RESULTS) { | |
285 | $docs = array_slice($docs, 0, \core_search\manager::MAX_RESULTS, true); | |
286 | break; | |
287 | } | |
288 | } | |
289 | } | |
290 | ||
291 | return $docs; | |
292 | } | |
293 | ||
294 | /** | |
295 | * Returns a standard php array from a \SolrObject instance. | |
296 | * | |
297 | * @param \SolrObject $obj | |
298 | * @return array The returned document as an array. | |
299 | */ | |
300 | public function standarize_solr_obj(\SolrObject $obj) { | |
301 | $properties = $obj->getPropertyNames(); | |
302 | ||
303 | $docdata = array(); | |
304 | foreach($properties as $name) { | |
305 | // http://php.net/manual/en/solrobject.getpropertynames.php#98018. | |
306 | $name = trim($name); | |
307 | $docdata[$name] = $obj->offsetGet($name); | |
308 | } | |
309 | return $docdata; | |
310 | } | |
311 | ||
312 | /** | |
313 | * Adds a document to the search engine. | |
314 | * | |
315 | * This does not commit to the search engine. | |
316 | * | |
317 | * @param array $doc | |
318 | * @return void | |
319 | */ | |
320 | public function add_document($doc) { | |
321 | ||
322 | $solrdoc = new \SolrInputDocument(); | |
323 | foreach ($doc as $field => $value) { | |
324 | $solrdoc->addField($field, $value); | |
325 | } | |
326 | ||
327 | try { | |
328 | $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN); | |
329 | } catch (\SolrClientException $e) { | |
330 | debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER); | |
63d5007b EM |
331 | } catch (\SolrServerException $e) { |
332 | // We only use the first line of the message, as it's a fully java stacktrace behind it. | |
333 | $msg = strtok($e->getMessage(), "\n"); | |
334 | debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER); | |
95c6aeaf DM |
335 | } |
336 | } | |
337 | ||
338 | /** | |
339 | * Commits all pending changes. | |
340 | * | |
341 | * @return void | |
342 | */ | |
075fa912 | 343 | protected function commit() { |
95c6aeaf DM |
344 | $this->get_search_client()->commit(); |
345 | } | |
346 | ||
075fa912 EM |
347 | /** |
348 | * Do any area cleanup needed, and do anything to confirm contents. | |
349 | * | |
350 | * Return false to prevent the search area completed time and stats from being updated. | |
351 | * | |
352 | * @param \core_search\area\base $searcharea The search area that was complete | |
353 | * @param int $numdocs The number of documents that were added to the index | |
354 | * @param bool $fullindex True if a full index is being performed | |
355 | * @return bool True means that data is considered indexed | |
356 | */ | |
357 | public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) { | |
358 | $this->commit(); | |
359 | ||
360 | return true; | |
361 | } | |
362 | ||
95c6aeaf DM |
363 | /** |
364 | * Defragments the index. | |
365 | * | |
366 | * @return void | |
367 | */ | |
368 | public function optimize() { | |
bfd6c78f | 369 | $this->get_search_client()->optimize(1, true, false); |
95c6aeaf DM |
370 | } |
371 | ||
372 | /** | |
373 | * Deletes the specified document. | |
374 | * | |
375 | * @param string $id The document id to delete | |
376 | * @return void | |
377 | */ | |
378 | public function delete_by_id($id) { | |
379 | $this->get_search_client()->deleteById($id); | |
075fa912 | 380 | $this->commit(); |
95c6aeaf DM |
381 | } |
382 | ||
383 | /** | |
384 | * Delete all area's documents. | |
385 | * | |
386 | * @param string $areaid | |
387 | * @return void | |
388 | */ | |
389 | public function delete($areaid = null) { | |
390 | if ($areaid) { | |
391 | $this->get_search_client()->deleteByQuery('areaid:' . $areaid); | |
392 | } else { | |
393 | $this->get_search_client()->deleteByQuery('*:*'); | |
394 | } | |
075fa912 | 395 | $this->commit(); |
95c6aeaf DM |
396 | } |
397 | ||
398 | /** | |
399 | * Pings the Solr server using search_solr config | |
400 | * | |
401 | * @return true|string Returns true if all good or an error string. | |
402 | */ | |
403 | public function is_server_ready() { | |
404 | ||
405 | if (empty($this->config->server_hostname) || empty($this->config->indexname)) { | |
406 | return 'No solr configuration found'; | |
407 | } | |
408 | ||
409 | if (!$this->client = $this->get_search_client(false)) { | |
410 | return get_string('engineserverstatus', 'search'); | |
411 | } | |
412 | ||
413 | try { | |
414 | @$this->client->ping(); | |
415 | } catch (\SolrClientException $ex) { | |
416 | return 'Solr client error: ' . $ex->getMessage(); | |
417 | } catch (\SolrServerException $ex) { | |
418 | return 'Solr server error: ' . $ex->getMessage(); | |
419 | } | |
420 | ||
421 | // Check that setup schema has already run. | |
422 | try { | |
423 | $schema = new \search_solr\schema(); | |
424 | $schema->validate_setup(); | |
425 | } catch (\moodle_exception $e) { | |
426 | return $e->getMessage(); | |
427 | } | |
428 | ||
429 | return true; | |
430 | } | |
431 | ||
432 | /** | |
433 | * Checks if the PHP Solr extension is available. | |
434 | * | |
435 | * @return bool | |
436 | */ | |
437 | public function is_installed() { | |
438 | return function_exists('solr_get_version'); | |
439 | } | |
440 | ||
441 | /** | |
442 | * Returns the solr client instance. | |
443 | * | |
444 | * @throws \core_search\engine_exception | |
445 | * @param bool $triggerexception | |
446 | * @return \SolrClient | |
447 | */ | |
448 | protected function get_search_client($triggerexception = true) { | |
449 | ||
450 | // Type comparison as it is set to false if not available. | |
451 | if ($this->client !== null) { | |
452 | return $this->client; | |
453 | } | |
454 | ||
455 | $options = array( | |
456 | 'hostname' => $this->config->server_hostname, | |
457 | 'path' => '/solr/' . $this->config->indexname, | |
458 | 'login' => !empty($this->config->server_username) ? $this->config->server_username : '', | |
459 | 'password' => !empty($this->config->server_password) ? $this->config->server_password : '', | |
460 | 'port' => !empty($this->config->server_port) ? $this->config->server_port : '', | |
5dc4624c | 461 | 'secure' => !empty($this->config->secure) ? true : false, |
95c6aeaf | 462 | 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '', |
95c6aeaf | 463 | 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '', |
5dc4624c | 464 | 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '', |
95c6aeaf DM |
465 | 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '', |
466 | 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '', | |
bfd6c78f | 467 | 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30' |
95c6aeaf DM |
468 | ); |
469 | ||
470 | $this->client = new \SolrClient($options); | |
471 | ||
472 | if ($this->client === false && $triggerexception) { | |
473 | throw new \core_search\engine_exception('engineserverstatus', 'search'); | |
474 | } | |
475 | ||
476 | return $this->client; | |
477 | } | |
5dc4624c EM |
478 | |
479 | /** | |
480 | * Returns a curl object for conntecting to solr. | |
481 | * | |
482 | * @return \curl | |
483 | */ | |
484 | public function get_curl_object() { | |
485 | if (!is_null($this->curl)) { | |
486 | return $this->curl; | |
487 | } | |
488 | ||
489 | $this->curl = new \curl(); | |
490 | ||
491 | $options = array(); | |
492 | // Build the SSL options. Based on pecl-solr and general testing. | |
493 | if (!empty($this->config->secure)) { | |
494 | if (!empty($this->config->ssl_cert)) { | |
495 | $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert; | |
496 | $options['CURLOPT_SSLCERTTYPE'] = 'PEM'; | |
497 | } | |
498 | ||
499 | if (!empty($this->config->ssl_key)) { | |
500 | $options['CURLOPT_SSLKEY'] = $this->config->ssl_key; | |
501 | $options['CURLOPT_SSLKEYTYPE'] = 'PEM'; | |
502 | } | |
503 | ||
504 | if (!empty($this->config->ssl_keypassword)) { | |
505 | $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword; | |
506 | } | |
507 | ||
508 | if (!empty($this->config->ssl_cainfo)) { | |
509 | $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo; | |
510 | } | |
511 | ||
512 | if (!empty($this->config->ssl_capath)) { | |
513 | $options['CURLOPT_CAPATH'] = $this->config->ssl_capath; | |
514 | } | |
515 | } | |
516 | ||
517 | $this->curl->setopt($options); | |
518 | ||
519 | if (!empty($this->config->server_username) && !empty($this->config->server_password)) { | |
520 | $authorization = $this->config->server_username . ':' . $this->config->server_password; | |
521 | $this->curl->setHeader('Authorization', 'Basic ' . base64_encode($authorization)); | |
522 | } | |
523 | ||
524 | return $this->curl; | |
525 | } | |
526 | ||
527 | /** | |
528 | * Return a Moodle url object for the server connection. | |
529 | * | |
530 | * @param string $path The solr path to append. | |
531 | * @return \moodle_url | |
532 | */ | |
533 | public function get_connection_url($path) { | |
534 | // Must use the proper protocol, or SSL will fail. | |
535 | $protocol = !empty($this->config->secure) ? 'https' : 'http'; | |
536 | $url = $protocol . '://' . rtrim($this->config->server_hostname, '/'); | |
537 | if (!empty($this->config->server_port)) { | |
538 | $url .= ':' . $this->config->server_port; | |
539 | } | |
540 | $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/'); | |
541 | ||
542 | return new \moodle_url($url); | |
543 | } | |
95c6aeaf | 544 | } |