MDL-31989 search_solr: Solr search engine
[moodle.git] / search / engine / solr / classes / engine.php
CommitLineData
95c6aeaf
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Solr engine.
19 *
20 * @package search_solr
21 * @copyright 2015 Daniel Neis Araujo
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace search_solr;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Solr engine.
31 *
32 * @package search_solr
33 * @copyright 2015 Daniel Neis Araujo
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class engine extends \core_search\engine {
37
38 /**
39 * @var string The date format used by solr.
40 */
41 const DATE_FORMAT = 'Y-m-d\TH:i:s\Z';
42
43 /**
44 * @var int Commit documents interval (number of miliseconds).
45 */
46 const AUTOCOMMIT_WITHIN = 15000;
47
48 /**
49 * @var int Highlighting fragsize.
50 */
51 const FRAG_SIZE = 500;
52
53 /**
54 * @var \SolrClient
55 */
56 protected $client = null;
57
58 /**
59 * @var array Fields that can be highlighted.
60 */
61 protected $highlightfields = array('content', 'description1', 'description2');
62
63 /**
64 * Prepares a Solr query, applies filters and executes it returning its results.
65 *
66 * @throws \core_search\engine_exception
67 * @param stdClass $filters Containing query and filters.
68 * @param array $usercontexts Contexts where the user has access. True if the user can access all contexts.
69 * @return \core_search\document[] Results or false if no results
70 */
71 public function execute_query($filters, $usercontexts) {
72
73 // Let's keep these changes internal.
74 $data = clone $filters;
75
76 // If there is any problem we trigger the exception as soon as possible.
77 $this->client = $this->get_search_client();
78
79 $serverstatus = $this->is_server_ready();
80 if ($serverstatus !== true) {
81 throw new \core_search\engine_exception('engineserverstatus', 'search');
82 }
83
84 $query = new \SolrQuery();
85 $this->set_query($query, $data->q);
86 $this->add_fields($query);
87
88 // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters
89 // we are really interested in caching contexts filters instead.
90 if (!empty($data->title)) {
91 $query->addFilterQuery('{!field cache=false f=title}' . $data->title);
92 }
93 if (!empty($data->areaid)) {
94 // Even if it is only supposed to contain PARAM_ALPHANUMEXT, better to prevent.
95 $query->addFilterQuery('{!field cache=false f=areaid}' . $data->areaid);
96 }
97
98 if (!empty($data->timestart) or !empty($data->timeend)) {
99 if (empty($data->timestart)) {
100 $data->timestart = '*';
101 } else {
102 $data->timestart = \search_solr\document::format_time_for_engine($data->timestart);
103 }
104 if (empty($data->timeend)) {
105 $data->timeend = '*';
106 } else {
107 $data->timeend = \search_solr\document::format_time_for_engine($data->timeend);
108 }
109
110 // No cache.
111 $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']');
112 }
113
114 // And finally restrict it to the context where the user can access, we want this one cached.
115 // If the user can access all contexts $usercontexts value is just true, we don't need to filter
116 // in that case.
117 if ($usercontexts && is_array($usercontexts)) {
118 if (!empty($data->areaid)) {
119 $query->addFilterQuery('contextid:(' . implode(' OR ', $usercontexts[$data->areaid]) . ')');
120 } else {
121 // Join all area contexts into a single array and implode.
122 $allcontexts = array();
123 foreach ($usercontexts as $areacontexts) {
124 foreach ($areacontexts as $contextid) {
125 // Ensure they are unique.
126 $allcontexts[$contextid] = $contextid;
127 }
128 }
129 $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')');
130 }
131 }
132
133 try {
134 return $this->query_response($this->client->query($query));
135 } catch (\SolrClientException $ex) {
136 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
137 $this->queryerror = $ex->getMessage();
138 return array();
139 } catch (\SolrServerException $ex) {
140 debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER);
141 $this->queryerror = $ex->getMessage();
142 return array();
143 }
144
145 }
146
147 /**
148 * Prepares a new query by setting the query, start offset and rows to return.
149 * @param SolrQuery $query
150 * @param object $q Containing query and filters.
151 */
152 protected function set_query($query, $q) {
153
154 // Set hightlighting.
155 $query->setHighlight(true);
156 foreach ($this->highlightfields as $field) {
157 $query->addHighlightField($field);
158 }
159 $query->setHighlightFragsize(static::FRAG_SIZE);
160 $query->setHighlightSimplePre('__');
161 $query->setHighlightSimplePost('__');
162
163 $query->setQuery($q);
164
165 // A reasonable max.
166 $query->setRows(\core_search\manager::MAX_RESULTS);
167 }
168
169 /**
170 * Sets fields to be returned in the result.
171 *
172 * @param SolrQuery $query object.
173 */
174 public function add_fields($query) {
175 $documentclass = $this->get_document_classname();
176 $fields = array_keys($documentclass::get_default_fields_definition());
177 foreach ($fields as $field) {
178 $query->addField($field);
179 }
180 }
181
182 /**
183 * Finds the key common to both highlighing and docs array returned from response.
184 * @param object $response containing results.
185 */
186 public function add_highlight_content($response) {
187 $highlightedobject = $response->highlighting;
188 foreach ($response->response->docs as $doc) {
189 $x = $doc->id;
190 $highlighteddoc = $highlightedobject->$x;
191 $this->merge_highlight_field_values($doc, $highlighteddoc);
192 }
193 }
194
195 /**
196 * Adds the highlighting array values to docs array values.
197 *
198 * @throws \core_search\engine_exception
199 * @param object $doc containing the results.
200 * @param object $highlighteddoc containing the highlighted results values.
201 */
202 public function merge_highlight_field_values($doc, $highlighteddoc) {
203
204 foreach ($this->highlightfields as $field) {
205 if (!empty($doc->$field)) {
206
207 // Check that the returned value is not an array. No way we can make this work with multivalued solr fields.
208 if (is_array($doc->{$field})) {
209 throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field);
210 }
211
212 if (!empty($highlighteddoc->$field)) {
213 // Replace by the highlighted result.
214 $doc->$field = reset($highlighteddoc->$field);
215 }
216 }
217 }
218 }
219
220 /**
221 * Filters the response on Moodle side.
222 *
223 * @param object $queryresponse containing the response return from solr server.
224 * @return array $results containing final results to be displayed.
225 */
226 public function query_response($queryresponse) {
227
228 $response = $queryresponse->getResponse();
229 $numgranted = 0;
230
231 if (!$docs = $response->response->docs) {
232 return array();
233 }
234
235 if (!empty($response->response->numFound)) {
236 $this->add_highlight_content($response);
237
238 // Iterate through the results checking its availability and whether they are available for the user or not.
239 foreach ($docs as $key => $docdata) {
240 if (!$searcharea = $this->get_search_area($docdata->areaid)) {
241 unset($docs[$key]);
242 continue;
243 }
244
245 $docdata = $this->standarize_solr_obj($docdata);
246
247 $access = $searcharea->check_access($docdata['itemid']);
248 switch ($access) {
249 case \core_search\manager::ACCESS_DELETED:
250 $this->delete_by_id($docdata['id']);
251 unset($docs[$key]);
252 break;
253 case \core_search\manager::ACCESS_DENIED:
254 unset($docs[$key]);
255 break;
256 case \core_search\manager::ACCESS_GRANTED:
257 $numgranted++;
258
259 // Add the doc.
260 $docs[$key] = $this->to_document($searcharea, $docdata);
261 break;
262 }
263
264 // This should never happen.
265 if ($numgranted >= \core_search\manager::MAX_RESULTS) {
266 $docs = array_slice($docs, 0, \core_search\manager::MAX_RESULTS, true);
267 break;
268 }
269 }
270 }
271
272 return $docs;
273 }
274
275 /**
276 * Returns a standard php array from a \SolrObject instance.
277 *
278 * @param \SolrObject $obj
279 * @return array The returned document as an array.
280 */
281 public function standarize_solr_obj(\SolrObject $obj) {
282 $properties = $obj->getPropertyNames();
283
284 $docdata = array();
285 foreach($properties as $name) {
286 // http://php.net/manual/en/solrobject.getpropertynames.php#98018.
287 $name = trim($name);
288 $docdata[$name] = $obj->offsetGet($name);
289 }
290 return $docdata;
291 }
292
293 /**
294 * Adds a document to the search engine.
295 *
296 * This does not commit to the search engine.
297 *
298 * @param array $doc
299 * @return void
300 */
301 public function add_document($doc) {
302
303 $solrdoc = new \SolrInputDocument();
304 foreach ($doc as $field => $value) {
305 $solrdoc->addField($field, $value);
306 }
307
308 try {
309 $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN);
310 } catch (\SolrClientException $e) {
311 debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER);
312 }
313 }
314
315 /**
316 * Commits all pending changes.
317 *
318 * @return void
319 */
320 public function commit() {
321 $this->get_search_client()->commit();
322 }
323
324 /**
325 * Defragments the index.
326 *
327 * @return void
328 */
329 public function optimize() {
330 $this->get_search_client()->optimize();
331 }
332
333 /**
334 * Deletes the specified document.
335 *
336 * @param string $id The document id to delete
337 * @return void
338 */
339 public function delete_by_id($id) {
340 $this->get_search_client()->deleteById($id);
341 }
342
343 /**
344 * Delete all area's documents.
345 *
346 * @param string $areaid
347 * @return void
348 */
349 public function delete($areaid = null) {
350 if ($areaid) {
351 $this->get_search_client()->deleteByQuery('areaid:' . $areaid);
352 } else {
353 $this->get_search_client()->deleteByQuery('*:*');
354 }
355 }
356
357 /**
358 * Pings the Solr server using search_solr config
359 *
360 * @return true|string Returns true if all good or an error string.
361 */
362 public function is_server_ready() {
363
364 if (empty($this->config->server_hostname) || empty($this->config->indexname)) {
365 return 'No solr configuration found';
366 }
367
368 if (!$this->client = $this->get_search_client(false)) {
369 return get_string('engineserverstatus', 'search');
370 }
371
372 try {
373 @$this->client->ping();
374 } catch (\SolrClientException $ex) {
375 return 'Solr client error: ' . $ex->getMessage();
376 } catch (\SolrServerException $ex) {
377 return 'Solr server error: ' . $ex->getMessage();
378 }
379
380 // Check that setup schema has already run.
381 try {
382 $schema = new \search_solr\schema();
383 $schema->validate_setup();
384 } catch (\moodle_exception $e) {
385 return $e->getMessage();
386 }
387
388 return true;
389 }
390
391 /**
392 * Checks if the PHP Solr extension is available.
393 *
394 * @return bool
395 */
396 public function is_installed() {
397 return function_exists('solr_get_version');
398 }
399
400 /**
401 * Returns the solr client instance.
402 *
403 * @throws \core_search\engine_exception
404 * @param bool $triggerexception
405 * @return \SolrClient
406 */
407 protected function get_search_client($triggerexception = true) {
408
409 // Type comparison as it is set to false if not available.
410 if ($this->client !== null) {
411 return $this->client;
412 }
413
414 $options = array(
415 'hostname' => $this->config->server_hostname,
416 'path' => '/solr/' . $this->config->indexname,
417 'login' => !empty($this->config->server_username) ? $this->config->server_username : '',
418 'password' => !empty($this->config->server_password) ? $this->config->server_password : '',
419 'port' => !empty($this->config->server_port) ? $this->config->server_port : '',
420 'issecure' => !empty($this->config->secure) ? $this->config->secure : '',
421 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '',
422 'ssl_cert_only' => !empty($this->config->ssl_cert_only) ? $this->config->ssl_cert_only : '',
423 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '',
424 'ssl_password' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '',
425 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '',
426 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '',
427 );
428
429 $this->client = new \SolrClient($options);
430
431 if ($this->client === false && $triggerexception) {
432 throw new \core_search\engine_exception('engineserverstatus', 'search');
433 }
434
435 return $this->client;
436 }
437}