MDL-53515 search: Extend search API to allow file indexing
[moodle.git] / search / engine / solr / classes / engine.php
CommitLineData
95c6aeaf
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Solr engine.
19 *
20 * @package search_solr
21 * @copyright 2015 Daniel Neis Araujo
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace search_solr;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Solr engine.
31 *
32 * @package search_solr
33 * @copyright 2015 Daniel Neis Araujo
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class engine extends \core_search\engine {
37
38 /**
39 * @var string The date format used by solr.
40 */
41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
42
43 /**
44 * @var int Commit documents interval (number of miliseconds).
45 */
46 const AUTOCOMMIT_WITHIN = 15000;
47
48 /**
49 * @var int Highlighting fragsize.
50 */
51 const FRAG_SIZE = 500;
52
53 /**
54 * @var \SolrClient
55 */
56 protected $client = null;
57
5dc4624c
EM
58 /**
59 * @var \curl Direct curl object.
60 */
61 protected $curl = null;
62
95c6aeaf
DM
63 /**
64 * @var array Fields that can be highlighted.
65 */
66 protected $highlightfields = array('content', 'description1', 'description2');
67
68 /**
69 * Prepares a Solr query, applies filters and executes it returning its results.
70 *
71 * @throws \core_search\engine_exception
f6b425e2
EM
72 * @param stdClass $filters Containing query and filters.
73 * @param array $usercontexts Contexts where the user has access. True if the user can access all contexts.
95c6aeaf
DM
74 * @return \core_search\document[] Results or false if no results
75 */
76 public function execute_query($filters, $usercontexts) {
f6b425e2 77 global $USER;
95c6aeaf
DM
78
79 // Let's keep these changes internal.
80 $data = clone $filters;
81
82 // If there is any problem we trigger the exception as soon as possible.
83 $this->client = $this->get_search_client();
84
85 $serverstatus = $this->is_server_ready();
86 if ($serverstatus !== true) {
87 throw new \core_search\engine_exception('engineserverstatus', 'search');
88 }
89
90 $query = new \SolrQuery();
91 $this->set_query($query, $data->q);
92 $this->add_fields($query);
93
94 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
95 // we are really interested in caching contexts filters instead.
96 if (!empty($data->title)) {
97 $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
98 }
99 if (!empty($data->areaid)) {
100 // Even if it is only supposed to contain PARAM_ALPHANUMEXT, better to prevent.
101 $query->addFilterQuery('{!field cache=false f=areaid}' . $data->areaid);
102 }
103
104 if (!empty($data->timestart) or !empty($data->timeend)) {
105 if (empty($data->timestart)) {
106 $data->timestart = '*';
107 } else {
108 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
109 }
110 if (empty($data->timeend)) {
111 $data->timeend = '*';
112 } else {
113 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
114 }
115
116 // No cache.
117 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
118 }
119
f6b425e2
EM
120 // Restrict to users who are supposed to be able to see a particular result.
121 $query->addFilterQuery('owneruserid:(' . \core_search\manager::NO_OWNER_ID . ' OR ' . $USER->id . ')');
122
95c6aeaf
DM
123 // And finally restrict it to the context where the user can access, we want this one cached.
124 // If the user can access all contexts $usercontexts value is just true, we don't need to filter
125 // in that case.
126 if ($usercontexts && is_array($usercontexts)) {
127 if (!empty($data->areaid)) {
128 $query->addFilterQuery('contextid:(' . implode(' OR ', $usercontexts[$data->areaid]) . ')');
129 } else {
130 // Join all area contexts into a single array and implode.
131 $allcontexts = array();
132 foreach ($usercontexts as $areacontexts) {
133 foreach ($areacontexts as $contextid) {
134 // Ensure they are unique.
135 $allcontexts[$contextid] = $contextid;
136 }
137 }
138 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
139 }
140 }
141
142 try {
143 return $this->query_response($this->client->query($query));
144 } catch (\SolrClientException $ex) {
145 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
146 $this->queryerror = $ex->getMessage();
147 return array();
148 } catch (\SolrServerException $ex) {
149 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
150 $this->queryerror = $ex->getMessage();
151 return array();
152 }
153
154 }
155
156 /**
157 * Prepares a new query by setting the query, start offset and rows to return.
158 * @param SolrQuery $query
159 * @param object $q Containing query and filters.
160 */
161 protected function set_query($query, $q) {
162
163 // Set hightlighting.
164 $query->setHighlight(true);
165 foreach ($this->highlightfields as $field) {
166 $query->addHighlightField($field);
167 }
168 $query->setHighlightFragsize(static::FRAG_SIZE);
169 $query->setHighlightSimplePre('__');
170 $query->setHighlightSimplePost('__');
171
172 $query->setQuery($q);
173
174 // A reasonable max.
175 $query->setRows(\core_search\manager::MAX_RESULTS);
176 }
177
178 /**
179 * Sets fields to be returned in the result.
180 *
181 * @param SolrQuery $query object.
182 */
183 public function add_fields($query) {
184 $documentclass = $this->get_document_classname();
185 $fields = array_keys($documentclass::get_default_fields_definition());
186 foreach ($fields as $field) {
187 $query->addField($field);
188 }
189 }
190
191 /**
192 * Finds the key common to both highlighing and docs array returned from response.
193 * @param object $response containing results.
194 */
195 public function add_highlight_content($response) {
196 $highlightedobject = $response->highlighting;
197 foreach ($response->response->docs as $doc) {
198 $x = $doc->id;
199 $highlighteddoc = $highlightedobject->$x;
200 $this->merge_highlight_field_values($doc, $highlighteddoc);
201 }
202 }
203
204 /**
205 * Adds the highlighting array values to docs array values.
206 *
207 * @throws \core_search\engine_exception
208 * @param object $doc containing the results.
209 * @param object $highlighteddoc containing the highlighted results values.
210 */
211 public function merge_highlight_field_values($doc, $highlighteddoc) {
212
213 foreach ($this->highlightfields as $field) {
214 if (!empty($doc->$field)) {
215
216 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
217 if (is_array($doc->{$field})) {
218 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
219 }
220
221 if (!empty($highlighteddoc->$field)) {
222 // Replace by the highlighted result.
223 $doc->$field = reset($highlighteddoc->$field);
224 }
225 }
226 }
227 }
228
229 /**
230 * Filters the response on Moodle side.
231 *
232 * @param object $queryresponse containing the response return from solr server.
233 * @return array $results containing final results to be displayed.
234 */
235 public function query_response($queryresponse) {
f6b425e2
EM
236 global $USER;
237
238 $userid = $USER->id;
239 $noownerid = \core_search\manager::NO_OWNER_ID;
95c6aeaf
DM
240
241 $response = $queryresponse->getResponse();
242 $numgranted = 0;
243
244 if (!$docs = $response->response->docs) {
245 return array();
246 }
247
248 if (!empty($response->response->numFound)) {
249 $this->add_highlight_content($response);
250
251 // Iterate through the results checking its availability and whether they are available for the user or not.
252 foreach ($docs as $key => $docdata) {
f6b425e2
EM
253 if ($docdata['owneruserid'] != $noownerid && $docdata['owneruserid'] != $userid) {
254 // If owneruserid is set, no other user should be able to access this record.
255 unset($docs[$key]);
256 continue;
257 }
258
95c6aeaf
DM
259 if (!$searcharea = $this->get_search_area($docdata->areaid)) {
260 unset($docs[$key]);
261 continue;
262 }
263
264 $docdata = $this->standarize_solr_obj($docdata);
265
266 $access = $searcharea->check_access($docdata['itemid']);
267 switch ($access) {
268 case \core_search\manager::ACCESS_DELETED:
269 $this->delete_by_id($docdata['id']);
270 unset($docs[$key]);
271 break;
272 case \core_search\manager::ACCESS_DENIED:
273 unset($docs[$key]);
274 break;
275 case \core_search\manager::ACCESS_GRANTED:
276 $numgranted++;
277
278 // Add the doc.
279 $docs[$key] = $this->to_document($searcharea, $docdata);
280 break;
281 }
282
283 // This should never happen.
284 if ($numgranted >= \core_search\manager::MAX_RESULTS) {
285 $docs = array_slice($docs, 0, \core_search\manager::MAX_RESULTS, true);
286 break;
287 }
288 }
289 }
290
291 return $docs;
292 }
293
294 /**
295 * Returns a standard php array from a \SolrObject instance.
296 *
297 * @param \SolrObject $obj
298 * @return array The returned document as an array.
299 */
300 public function standarize_solr_obj(\SolrObject $obj) {
301 $properties = $obj->getPropertyNames();
302
303 $docdata = array();
304 foreach($properties as $name) {
305 // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
306 $name = trim($name);
307 $docdata[$name] = $obj->offsetGet($name);
308 }
309 return $docdata;
310 }
311
312 /**
313 * Adds a document to the search engine.
314 *
315 * This does not commit to the search engine.
316 *
091973db
EM
317 * @param document $document
318 * @param bool $fileindexing True if file indexing is to be used
319 * @return bool
95c6aeaf 320 */
091973db
EM
321 public function add_document($document, $fileindexing = false) {
322 $docdata = $document->export_for_engine();
323
324 if (!$this->add_text_document($docdata)) {
325 return false;
326 }
327
328 return true;
329 }
95c6aeaf 330
091973db
EM
331 /**
332 * Adds a text document to the search engine.
333 *
334 * @param array $filedoc
335 * @return bool
336 */
337 protected function add_text_document($doc) {
95c6aeaf
DM
338 $solrdoc = new \SolrInputDocument();
339 foreach ($doc as $field => $value) {
340 $solrdoc->addField($field, $value);
341 }
342
343 try {
344 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
091973db 345 return true;
95c6aeaf
DM
346 } catch (\SolrClientException $e) {
347 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
63d5007b
EM
348 } catch (\SolrServerException $e) {
349 // We only use the first line of the message, as it's a fully java stacktrace behind it.
350 $msg = strtok($e->getMessage(), "\n");
351 debugging('Solr server error adding document with id ' . $doc['id'] . ': ' . $msg, DEBUG_DEVELOPER);
95c6aeaf 352 }
091973db
EM
353
354 return false;
95c6aeaf
DM
355 }
356
357 /**
358 * Commits all pending changes.
359 *
360 * @return void
361 */
075fa912 362 protected function commit() {
95c6aeaf
DM
363 $this->get_search_client()->commit();
364 }
365
075fa912
EM
366 /**
367 * Do any area cleanup needed, and do anything to confirm contents.
368 *
369 * Return false to prevent the search area completed time and stats from being updated.
370 *
371 * @param \core_search\area\base $searcharea The search area that was complete
372 * @param int $numdocs The number of documents that were added to the index
373 * @param bool $fullindex True if a full index is being performed
374 * @return bool True means that data is considered indexed
375 */
376 public function area_index_complete($searcharea, $numdocs = 0, $fullindex = false) {
377 $this->commit();
378
379 return true;
380 }
381
95c6aeaf
DM
382 /**
383 * Defragments the index.
384 *
385 * @return void
386 */
387 public function optimize() {
bfd6c78f 388 $this->get_search_client()->optimize(1, true, false);
95c6aeaf
DM
389 }
390
391 /**
392 * Deletes the specified document.
393 *
394 * @param string $id The document id to delete
395 * @return void
396 */
397 public function delete_by_id($id) {
398 $this->get_search_client()->deleteById($id);
075fa912 399 $this->commit();
95c6aeaf
DM
400 }
401
402 /**
403 * Delete all area's documents.
404 *
405 * @param string $areaid
406 * @return void
407 */
408 public function delete($areaid = null) {
409 if ($areaid) {
410 $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
411 } else {
412 $this->get_search_client()->deleteByQuery('*:*');
413 }
075fa912 414 $this->commit();
95c6aeaf
DM
415 }
416
417 /**
418 * Pings the Solr server using search_solr config
419 *
420 * @return true|string Returns true if all good or an error string.
421 */
422 public function is_server_ready() {
423
424 if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
425 return 'No solr configuration found';
426 }
427
428 if (!$this->client = $this->get_search_client(false)) {
429 return get_string('engineserverstatus', 'search');
430 }
431
432 try {
433 @$this->client->ping();
434 } catch (\SolrClientException $ex) {
435 return 'Solr client error: ' . $ex->getMessage();
436 } catch (\SolrServerException $ex) {
437 return 'Solr server error: ' . $ex->getMessage();
438 }
439
440 // Check that setup schema has already run.
441 try {
442 $schema = new \search_solr\schema();
443 $schema->validate_setup();
444 } catch (\moodle_exception $e) {
445 return $e->getMessage();
446 }
447
448 return true;
449 }
450
451 /**
452 * Checks if the PHP Solr extension is available.
453 *
454 * @return bool
455 */
456 public function is_installed() {
457 return function_exists('solr_get_version');
458 }
459
460 /**
461 * Returns the solr client instance.
462 *
463 * @throws \core_search\engine_exception
464 * @param bool $triggerexception
465 * @return \SolrClient
466 */
467 protected function get_search_client($triggerexception = true) {
468
469 // Type comparison as it is set to false if not available.
470 if ($this->client !== null) {
471 return $this->client;
472 }
473
474 $options = array(
475 'hostname' => $this->config->server_hostname,
476 'path' => '/solr/' . $this->config->indexname,
477 'login' => !empty($this->config->server_username) ? $this->config->server_username : '',
478 'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
479 'port' => !empty($this->config->server_port) ? $this->config->server_port : '',
5dc4624c 480 'secure' => !empty($this->config->secure) ? true : false,
95c6aeaf 481 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
95c6aeaf 482 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
5dc4624c 483 'ssl_keypassword' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
95c6aeaf
DM
484 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
485 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
bfd6c78f 486 'timeout' => !empty($this->config->server_timeout) ? $this->config->server_timeout : '30'
95c6aeaf
DM
487 );
488
489 $this->client = new \SolrClient($options);
490
491 if ($this->client === false && $triggerexception) {
492 throw new \core_search\engine_exception('engineserverstatus', 'search');
493 }
494
495 return $this->client;
496 }
5dc4624c
EM
497
498 /**
499 * Returns a curl object for conntecting to solr.
500 *
501 * @return \curl
502 */
503 public function get_curl_object() {
504 if (!is_null($this->curl)) {
505 return $this->curl;
506 }
507
508 $this->curl = new \curl();
509
510 $options = array();
511 // Build the SSL options. Based on pecl-solr and general testing.
512 if (!empty($this->config->secure)) {
513 if (!empty($this->config->ssl_cert)) {
514 $options['CURLOPT_SSLCERT'] = $this->config->ssl_cert;
515 $options['CURLOPT_SSLCERTTYPE'] = 'PEM';
516 }
517
518 if (!empty($this->config->ssl_key)) {
519 $options['CURLOPT_SSLKEY'] = $this->config->ssl_key;
520 $options['CURLOPT_SSLKEYTYPE'] = 'PEM';
521 }
522
523 if (!empty($this->config->ssl_keypassword)) {
524 $options['CURLOPT_KEYPASSWD'] = $this->config->ssl_keypassword;
525 }
526
527 if (!empty($this->config->ssl_cainfo)) {
528 $options['CURLOPT_CAINFO'] = $this->config->ssl_cainfo;
529 }
530
531 if (!empty($this->config->ssl_capath)) {
532 $options['CURLOPT_CAPATH'] = $this->config->ssl_capath;
533 }
534 }
535
536 $this->curl->setopt($options);
537
538 if (!empty($this->config->server_username) && !empty($this->config->server_password)) {
539 $authorization = $this->config->server_username . ':' . $this->config->server_password;
540 $this->curl->setHeader('Authorization', 'Basic ' . base64_encode($authorization));
541 }
542
543 return $this->curl;
544 }
545
546 /**
547 * Return a Moodle url object for the server connection.
548 *
549 * @param string $path The solr path to append.
550 * @return \moodle_url
551 */
552 public function get_connection_url($path) {
553 // Must use the proper protocol, or SSL will fail.
554 $protocol = !empty($this->config->secure) ? 'https' : 'http';
555 $url = $protocol . '://' . rtrim($this->config->server_hostname, '/');
556 if (!empty($this->config->server_port)) {
557 $url .= ':' . $this->config->server_port;
558 }
559 $url .= '/solr/' . $this->config->indexname . '/' . ltrim($path, '/');
560
561 return new \moodle_url($url);
562 }
95c6aeaf 563}