Commit | Line | Data |
---|---|---|
95c6aeaf DM |
1 | <?php |
2 | // This file is part of Moodle - http://moodle.org/ | |
3 | // | |
4 | // Moodle is free software: you can redistribute it and/or modify | |
5 | // it under the terms of the GNU General Public License as published by | |
6 | // the Free Software Foundation, either version 3 of the License, or | |
7 | // (at your option) any later version. | |
8 | // | |
9 | // Moodle is distributed in the hope that it will be useful, | |
10 | // but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | // GNU General Public License for more details. | |
13 | // | |
14 | // You should have received a copy of the GNU General Public License | |
15 | // along with Moodle. If not, see <http://www.gnu.org/licenses/>. | |
16 | ||
17 | /** | |
18 | * Solr engine. | |
19 | * | |
20 | * @package search_solr | |
21 | * @copyright 2015 Daniel Neis Araujo | |
22 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
23 | */ | |
24 | ||
25 | namespace search_solr; | |
26 | ||
27 | defined('MOODLE_INTERNAL') || die(); | |
28 | ||
29 | /** | |
30 | * Solr engine. | |
31 | * | |
32 | * @package search_solr | |
33 | * @copyright 2015 Daniel Neis Araujo | |
34 | * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later | |
35 | */ | |
36 | class engine extends \core_search\engine { | |
37 | ||
38 | /** | |
39 | * @var string The date format used by solr. | |
40 | */ | |
41 | const DATE_FORMAT = 'Y-m-d\TH:i:s\Z'; | |
42 | ||
43 | /** | |
44 | * @var int Commit documents interval (number of miliseconds). | |
45 | */ | |
46 | const AUTOCOMMIT_WITHIN = 15000; | |
47 | ||
48 | /** | |
49 | * @var int Highlighting fragsize. | |
50 | */ | |
51 | const FRAG_SIZE = 500; | |
52 | ||
53 | /** | |
54 | * @var \SolrClient | |
55 | */ | |
56 | protected $client = null; | |
57 | ||
58 | /** | |
59 | * @var array Fields that can be highlighted. | |
60 | */ | |
61 | protected $highlightfields = array('content', 'description1', 'description2'); | |
62 | ||
63 | /** | |
64 | * Prepares a Solr query, applies filters and executes it returning its results. | |
65 | * | |
66 | * @throws \core_search\engine_exception | |
67 | * @param stdClass $filters Containing query and filters. | |
68 | * @param array $usercontexts Contexts where the user has access. True if the user can access all contexts. | |
69 | * @return \core_search\document[] Results or false if no results | |
70 | */ | |
71 | public function execute_query($filters, $usercontexts) { | |
72 | ||
73 | // Let's keep these changes internal. | |
74 | $data = clone $filters; | |
75 | ||
76 | // If there is any problem we trigger the exception as soon as possible. | |
77 | $this->client = $this->get_search_client(); | |
78 | ||
79 | $serverstatus = $this->is_server_ready(); | |
80 | if ($serverstatus !== true) { | |
81 | throw new \core_search\engine_exception('engineserverstatus', 'search'); | |
82 | } | |
83 | ||
84 | $query = new \SolrQuery(); | |
85 | $this->set_query($query, $data->q); | |
86 | $this->add_fields($query); | |
87 | ||
88 | // Search filters applied, we don't cache these filters as we don't want to pollute the cache with tmp filters | |
89 | // we are really interested in caching contexts filters instead. | |
90 | if (!empty($data->title)) { | |
91 | $query->addFilterQuery('{!field cache=false f=title}' . $data->title); | |
92 | } | |
93 | if (!empty($data->areaid)) { | |
94 | // Even if it is only supposed to contain PARAM_ALPHANUMEXT, better to prevent. | |
95 | $query->addFilterQuery('{!field cache=false f=areaid}' . $data->areaid); | |
96 | } | |
97 | ||
98 | if (!empty($data->timestart) or !empty($data->timeend)) { | |
99 | if (empty($data->timestart)) { | |
100 | $data->timestart = '*'; | |
101 | } else { | |
102 | $data->timestart = \search_solr\document::format_time_for_engine($data->timestart); | |
103 | } | |
104 | if (empty($data->timeend)) { | |
105 | $data->timeend = '*'; | |
106 | } else { | |
107 | $data->timeend = \search_solr\document::format_time_for_engine($data->timeend); | |
108 | } | |
109 | ||
110 | // No cache. | |
111 | $query->addFilterQuery('{!cache=false}modified:[' . $data->timestart . ' TO ' . $data->timeend . ']'); | |
112 | } | |
113 | ||
114 | // And finally restrict it to the context where the user can access, we want this one cached. | |
115 | // If the user can access all contexts $usercontexts value is just true, we don't need to filter | |
116 | // in that case. | |
117 | if ($usercontexts && is_array($usercontexts)) { | |
118 | if (!empty($data->areaid)) { | |
119 | $query->addFilterQuery('contextid:(' . implode(' OR ', $usercontexts[$data->areaid]) . ')'); | |
120 | } else { | |
121 | // Join all area contexts into a single array and implode. | |
122 | $allcontexts = array(); | |
123 | foreach ($usercontexts as $areacontexts) { | |
124 | foreach ($areacontexts as $contextid) { | |
125 | // Ensure they are unique. | |
126 | $allcontexts[$contextid] = $contextid; | |
127 | } | |
128 | } | |
129 | $query->addFilterQuery('contextid:(' . implode(' OR ', $allcontexts) . ')'); | |
130 | } | |
131 | } | |
132 | ||
133 | try { | |
134 | return $this->query_response($this->client->query($query)); | |
135 | } catch (\SolrClientException $ex) { | |
136 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
137 | $this->queryerror = $ex->getMessage(); | |
138 | return array(); | |
139 | } catch (\SolrServerException $ex) { | |
140 | debugging('Error executing the provided query: ' . $ex->getMessage(), DEBUG_DEVELOPER); | |
141 | $this->queryerror = $ex->getMessage(); | |
142 | return array(); | |
143 | } | |
144 | ||
145 | } | |
146 | ||
147 | /** | |
148 | * Prepares a new query by setting the query, start offset and rows to return. | |
149 | * @param SolrQuery $query | |
150 | * @param object $q Containing query and filters. | |
151 | */ | |
152 | protected function set_query($query, $q) { | |
153 | ||
154 | // Set hightlighting. | |
155 | $query->setHighlight(true); | |
156 | foreach ($this->highlightfields as $field) { | |
157 | $query->addHighlightField($field); | |
158 | } | |
159 | $query->setHighlightFragsize(static::FRAG_SIZE); | |
160 | $query->setHighlightSimplePre('__'); | |
161 | $query->setHighlightSimplePost('__'); | |
162 | ||
163 | $query->setQuery($q); | |
164 | ||
165 | // A reasonable max. | |
166 | $query->setRows(\core_search\manager::MAX_RESULTS); | |
167 | } | |
168 | ||
169 | /** | |
170 | * Sets fields to be returned in the result. | |
171 | * | |
172 | * @param SolrQuery $query object. | |
173 | */ | |
174 | public function add_fields($query) { | |
175 | $documentclass = $this->get_document_classname(); | |
176 | $fields = array_keys($documentclass::get_default_fields_definition()); | |
177 | foreach ($fields as $field) { | |
178 | $query->addField($field); | |
179 | } | |
180 | } | |
181 | ||
182 | /** | |
183 | * Finds the key common to both highlighing and docs array returned from response. | |
184 | * @param object $response containing results. | |
185 | */ | |
186 | public function add_highlight_content($response) { | |
187 | $highlightedobject = $response->highlighting; | |
188 | foreach ($response->response->docs as $doc) { | |
189 | $x = $doc->id; | |
190 | $highlighteddoc = $highlightedobject->$x; | |
191 | $this->merge_highlight_field_values($doc, $highlighteddoc); | |
192 | } | |
193 | } | |
194 | ||
195 | /** | |
196 | * Adds the highlighting array values to docs array values. | |
197 | * | |
198 | * @throws \core_search\engine_exception | |
199 | * @param object $doc containing the results. | |
200 | * @param object $highlighteddoc containing the highlighted results values. | |
201 | */ | |
202 | public function merge_highlight_field_values($doc, $highlighteddoc) { | |
203 | ||
204 | foreach ($this->highlightfields as $field) { | |
205 | if (!empty($doc->$field)) { | |
206 | ||
207 | // Check that the returned value is not an array. No way we can make this work with multivalued solr fields. | |
208 | if (is_array($doc->{$field})) { | |
209 | throw new \core_search\engine_exception('multivaluedfield', 'search_solr', '', $field); | |
210 | } | |
211 | ||
212 | if (!empty($highlighteddoc->$field)) { | |
213 | // Replace by the highlighted result. | |
214 | $doc->$field = reset($highlighteddoc->$field); | |
215 | } | |
216 | } | |
217 | } | |
218 | } | |
219 | ||
220 | /** | |
221 | * Filters the response on Moodle side. | |
222 | * | |
223 | * @param object $queryresponse containing the response return from solr server. | |
224 | * @return array $results containing final results to be displayed. | |
225 | */ | |
226 | public function query_response($queryresponse) { | |
227 | ||
228 | $response = $queryresponse->getResponse(); | |
229 | $numgranted = 0; | |
230 | ||
231 | if (!$docs = $response->response->docs) { | |
232 | return array(); | |
233 | } | |
234 | ||
235 | if (!empty($response->response->numFound)) { | |
236 | $this->add_highlight_content($response); | |
237 | ||
238 | // Iterate through the results checking its availability and whether they are available for the user or not. | |
239 | foreach ($docs as $key => $docdata) { | |
240 | if (!$searcharea = $this->get_search_area($docdata->areaid)) { | |
241 | unset($docs[$key]); | |
242 | continue; | |
243 | } | |
244 | ||
245 | $docdata = $this->standarize_solr_obj($docdata); | |
246 | ||
247 | $access = $searcharea->check_access($docdata['itemid']); | |
248 | switch ($access) { | |
249 | case \core_search\manager::ACCESS_DELETED: | |
250 | $this->delete_by_id($docdata['id']); | |
251 | unset($docs[$key]); | |
252 | break; | |
253 | case \core_search\manager::ACCESS_DENIED: | |
254 | unset($docs[$key]); | |
255 | break; | |
256 | case \core_search\manager::ACCESS_GRANTED: | |
257 | $numgranted++; | |
258 | ||
259 | // Add the doc. | |
260 | $docs[$key] = $this->to_document($searcharea, $docdata); | |
261 | break; | |
262 | } | |
263 | ||
264 | // This should never happen. | |
265 | if ($numgranted >= \core_search\manager::MAX_RESULTS) { | |
266 | $docs = array_slice($docs, 0, \core_search\manager::MAX_RESULTS, true); | |
267 | break; | |
268 | } | |
269 | } | |
270 | } | |
271 | ||
272 | return $docs; | |
273 | } | |
274 | ||
275 | /** | |
276 | * Returns a standard php array from a \SolrObject instance. | |
277 | * | |
278 | * @param \SolrObject $obj | |
279 | * @return array The returned document as an array. | |
280 | */ | |
281 | public function standarize_solr_obj(\SolrObject $obj) { | |
282 | $properties = $obj->getPropertyNames(); | |
283 | ||
284 | $docdata = array(); | |
285 | foreach($properties as $name) { | |
286 | // http://php.net/manual/en/solrobject.getpropertynames.php#98018. | |
287 | $name = trim($name); | |
288 | $docdata[$name] = $obj->offsetGet($name); | |
289 | } | |
290 | return $docdata; | |
291 | } | |
292 | ||
293 | /** | |
294 | * Adds a document to the search engine. | |
295 | * | |
296 | * This does not commit to the search engine. | |
297 | * | |
298 | * @param array $doc | |
299 | * @return void | |
300 | */ | |
301 | public function add_document($doc) { | |
302 | ||
303 | $solrdoc = new \SolrInputDocument(); | |
304 | foreach ($doc as $field => $value) { | |
305 | $solrdoc->addField($field, $value); | |
306 | } | |
307 | ||
308 | try { | |
309 | $result = $this->get_search_client()->addDocument($solrdoc, true, static::AUTOCOMMIT_WITHIN); | |
310 | } catch (\SolrClientException $e) { | |
311 | debugging('Solr client error adding document with id ' . $doc['id'] . ': ' . $e->getMessage(), DEBUG_DEVELOPER); | |
312 | } | |
313 | } | |
314 | ||
315 | /** | |
316 | * Commits all pending changes. | |
317 | * | |
318 | * @return void | |
319 | */ | |
320 | public function commit() { | |
321 | $this->get_search_client()->commit(); | |
322 | } | |
323 | ||
324 | /** | |
325 | * Defragments the index. | |
326 | * | |
327 | * @return void | |
328 | */ | |
329 | public function optimize() { | |
330 | $this->get_search_client()->optimize(); | |
331 | } | |
332 | ||
333 | /** | |
334 | * Deletes the specified document. | |
335 | * | |
336 | * @param string $id The document id to delete | |
337 | * @return void | |
338 | */ | |
339 | public function delete_by_id($id) { | |
340 | $this->get_search_client()->deleteById($id); | |
341 | } | |
342 | ||
343 | /** | |
344 | * Delete all area's documents. | |
345 | * | |
346 | * @param string $areaid | |
347 | * @return void | |
348 | */ | |
349 | public function delete($areaid = null) { | |
350 | if ($areaid) { | |
351 | $this->get_search_client()->deleteByQuery('areaid:' . $areaid); | |
352 | } else { | |
353 | $this->get_search_client()->deleteByQuery('*:*'); | |
354 | } | |
355 | } | |
356 | ||
357 | /** | |
358 | * Pings the Solr server using search_solr config | |
359 | * | |
360 | * @return true|string Returns true if all good or an error string. | |
361 | */ | |
362 | public function is_server_ready() { | |
363 | ||
364 | if (empty($this->config->server_hostname) || empty($this->config->indexname)) { | |
365 | return 'No solr configuration found'; | |
366 | } | |
367 | ||
368 | if (!$this->client = $this->get_search_client(false)) { | |
369 | return get_string('engineserverstatus', 'search'); | |
370 | } | |
371 | ||
372 | try { | |
373 | @$this->client->ping(); | |
374 | } catch (\SolrClientException $ex) { | |
375 | return 'Solr client error: ' . $ex->getMessage(); | |
376 | } catch (\SolrServerException $ex) { | |
377 | return 'Solr server error: ' . $ex->getMessage(); | |
378 | } | |
379 | ||
380 | // Check that setup schema has already run. | |
381 | try { | |
382 | $schema = new \search_solr\schema(); | |
383 | $schema->validate_setup(); | |
384 | } catch (\moodle_exception $e) { | |
385 | return $e->getMessage(); | |
386 | } | |
387 | ||
388 | return true; | |
389 | } | |
390 | ||
391 | /** | |
392 | * Checks if the PHP Solr extension is available. | |
393 | * | |
394 | * @return bool | |
395 | */ | |
396 | public function is_installed() { | |
397 | return function_exists('solr_get_version'); | |
398 | } | |
399 | ||
400 | /** | |
401 | * Returns the solr client instance. | |
402 | * | |
403 | * @throws \core_search\engine_exception | |
404 | * @param bool $triggerexception | |
405 | * @return \SolrClient | |
406 | */ | |
407 | protected function get_search_client($triggerexception = true) { | |
408 | ||
409 | // Type comparison as it is set to false if not available. | |
410 | if ($this->client !== null) { | |
411 | return $this->client; | |
412 | } | |
413 | ||
414 | $options = array( | |
415 | 'hostname' => $this->config->server_hostname, | |
416 | 'path' => '/solr/' . $this->config->indexname, | |
417 | 'login' => !empty($this->config->server_username) ? $this->config->server_username : '', | |
418 | 'password' => !empty($this->config->server_password) ? $this->config->server_password : '', | |
419 | 'port' => !empty($this->config->server_port) ? $this->config->server_port : '', | |
420 | 'issecure' => !empty($this->config->secure) ? $this->config->secure : '', | |
421 | 'ssl_cert' => !empty($this->config->ssl_cert) ? $this->config->ssl_cert : '', | |
422 | 'ssl_cert_only' => !empty($this->config->ssl_cert_only) ? $this->config->ssl_cert_only : '', | |
423 | 'ssl_key' => !empty($this->config->ssl_key) ? $this->config->ssl_key : '', | |
424 | 'ssl_password' => !empty($this->config->ssl_keypassword) ? $this->config->ssl_keypassword : '', | |
425 | 'ssl_cainfo' => !empty($this->config->ssl_cainfo) ? $this->config->ssl_cainfo : '', | |
426 | 'ssl_capath' => !empty($this->config->ssl_capath) ? $this->config->ssl_capath : '', | |
427 | ); | |
428 | ||
429 | $this->client = new \SolrClient($options); | |
430 | ||
431 | if ($this->client === false && $triggerexception) { | |
432 | throw new \core_search\engine_exception('engineserverstatus', 'search'); | |
433 | } | |
434 | ||
435 | return $this->client; | |
436 | } | |
437 | } |