MDL-55725 search: Index proper time modified with indexed files
[moodle.git] / search / engine / solr / classes / document.php
CommitLineData
95c6aeaf
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Document representation.
19 *
20 * @package search_solr
21 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace search_solr;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
30 * Respresents a document to index.
31 *
32 * @copyright 2015 David Monllao {@link http://www.davidmonllao.com}
33 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
34 */
35class document extends \core_search\document {
cd894f84
EM
36 /**
37 * Indicates the file contents were not indexed due to an error.
38 */
39 const INDEXED_FILE_ERROR = -1;
40
41 /**
42 * Indicates the file contents were not indexed due filtering/settings.
43 */
44 const INDEXED_FILE_FALSE = 0;
45
46 /**
47 * Indicates the file contents are indexed with the record.
48 */
49 const INDEXED_FILE_TRUE = 1;
50
51 /**
52 * Any fields that are engine specifc. These are fields that are solely used by a seach engine plugin
53 * for internal purposes.
54 *
55 * @var array
56 */
57 protected static $enginefields = array(
58 'solr_filegroupingid' => array(
59 'type' => 'string',
60 'stored' => true,
61 'indexed' => true
62 ),
63 'solr_fileid' => array(
64 'type' => 'string',
65 'stored' => true,
546c0af5 66 'indexed' => true
cd894f84
EM
67 ),
68 'solr_filecontenthash' => array(
69 'type' => 'string',
70 'stored' => true,
546c0af5 71 'indexed' => true
cd894f84 72 ),
546c0af5 73 // Stores the status of file indexing.
f6b4ec7b 74 'solr_fileindexstatus' => array(
cd894f84
EM
75 'type' => 'int',
76 'stored' => true,
77 'indexed' => true
546c0af5
EM
78 ),
79 // Field to index, but not store, file contents.
80 'solr_filecontent' => array(
81 'type' => 'text',
82 'stored' => false,
83 'indexed' => true,
84 'mainquery' => true
cd894f84
EM
85 )
86 );
95c6aeaf
DM
87
88 /**
89 * Formats the timestamp according to the search engine needs.
90 *
91 * @param int $timestamp
92 * @return string
93 */
94 public static function format_time_for_engine($timestamp) {
95 return gmdate(\search_solr\engine::DATE_FORMAT, $timestamp);
96 }
97
98 /**
99 * Formats the timestamp according to the search engine needs.
100 *
101 * @param int $timestamp
102 * @return string
103 */
104 public static function format_string_for_engine($string) {
105 // 2^15 default. We could convert this to a setting as is possible to
106 // change the max in solr.
08c21a1b 107 return \core_text::str_max_bytes($string, 32766);
95c6aeaf
DM
108 }
109
110 /**
111 * Returns a timestamp from the value stored in the search engine.
112 *
113 * @param string $time
114 * @return int
115 */
116 public static function import_time_from_engine($time) {
117 return strtotime($time);
118 }
119
120 /**
121 * Overwritten to use markdown format as we use markdown for solr highlighting.
122 *
123 * @return int
124 */
125 protected function get_text_format() {
4894840d
EM
126 return FORMAT_HTML;
127 }
128
129 /**
130 * Formats a text string coming from the search engine.
131 *
132 * @param string $text Text to format
133 * @return string HTML text to be renderer
134 */
135 protected function format_text($text) {
136 // Since we allow output for highlighting, we need to encode html entities.
137 // This ensures plaintext html chars don't become valid html.
138 $out = s($text);
139
140 $startcount = 0;
141 $endcount = 0;
142
143 // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words.
144 $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|';
145 $out = preg_replace($regex, '$1', $out);
146
147 // Now replace our start and end highlight markers.
148 $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount);
149 $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount);
150
151 // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers.
152 while ($startcount > $endcount) {
153 $out .= '</span>';
154 $endcount++;
155 }
156 while ($startcount < $endcount) {
157 $out = '<span class="highlight">' . $out;
158 $endcount++;
159 }
160
161 return parent::format_text($out);
95c6aeaf 162 }
cd894f84
EM
163
164 /**
165 * Apply any defaults to unset fields before export. Called after document building, but before export.
166 *
167 * Sub-classes of this should make sure to call parent::apply_defaults().
168 */
169 protected function apply_defaults() {
170 parent::apply_defaults();
171
172 // We want to set the solr_filegroupingid to id if it isn't set.
173 if (!isset($this->data['solr_filegroupingid'])) {
174 $this->data['solr_filegroupingid'] = $this->data['id'];
175 }
176 }
177
178 /**
179 * Export the data for the given file in relation to this document.
180 *
181 * @param \stored_file $file The stored file we are talking about.
182 * @return array
183 */
184 public function export_file_for_engine($file) {
185 $data = $this->export_for_engine();
186
187 // Content is index in the main document.
188 unset($data['content']);
189 unset($data['description1']);
190 unset($data['description2']);
191
192 // Going to append the fileid to give it a unique id.
193 $data['id'] = $data['id'].'-solrfile'.$file->get_id();
194 $data['type'] = \core_search\manager::TYPE_FILE;
195 $data['solr_fileid'] = $file->get_id();
196 $data['solr_filecontenthash'] = $file->get_contenthash();
f6b4ec7b 197 $data['solr_fileindexstatus'] = self::INDEXED_FILE_TRUE;
cd894f84 198 $data['title'] = $file->get_filename();
1aaead91 199 $data['modified'] = self::format_time_for_engine($file->get_timemodified());
cd894f84
EM
200
201 return $data;
202 }
95c6aeaf 203}