MDL-58859 analytics: Analytics API added to core
[moodle.git] / analytics / classes / local / analyser / base.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 *
19 * @package core_analytics
20 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
21 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
22 */
23
24namespace core_analytics\local\analyser;
25
26defined('MOODLE_INTERNAL') || die();
27
28/**
29 *
30 * @package core_analytics
31 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
32 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
33 */
34abstract class base {
35
36 protected $modelid;
37
38 protected $target;
39 protected $indicators;
40 protected $timesplittings;
41
42 protected $options;
43
44 protected $log;
45
46 public function __construct($modelid, \core_analytics\local\target\base $target, $indicators, $timesplittings, $options) {
47 $this->modelid = $modelid;
48 $this->target = $target;
49 $this->indicators = $indicators;
50 $this->timesplittings = $timesplittings;
51
52 if (empty($options['evaluation'])) {
53 $options['evaluation'] = false;
54 }
55 $this->options = $options;
56
57 // Checks if the analyser satisfies the indicators requirements.
58 $this->check_indicators_requirements();
59
60 $this->log = array();
61 }
62
63 /**
64 * This function returns the list of samples that can be calculated.
65 *
66 * @param \core_analytics\analysable $analysable
67 * @return array array[0] = int[], array[1] = array
68 */
69 abstract protected function get_all_samples(\core_analytics\analysable $analysable);
70
71 abstract public function get_samples($sampleids);
72
73 abstract protected function get_samples_origin();
74
75 /**
76 * moodle/analytics:listinsights will be required at this level to access the sample predictions.
77 *
78 * @param int $sampleid
79 * @return \context
80 */
81 abstract public function sample_access_context($sampleid);
82
83 abstract public function sample_description($sampleid, $contextid, $sampledata);
84
85 protected function provided_sample_data() {
86 return array($this->get_samples_origin());
87 }
88
89 /**
90 * Main analyser method which processes the site analysables.
91 *
92 * \core_analytics\local\analyser\by_course and \core_analytics\local\analyser\sitewide are implementing
93 * this method returning site courses (by_course) and the whole system (sitewide) as analysables.
94 * In most of the cases you should have enough extending from one of these classes so you don't need
95 * to reimplement this method.
96 *
97 * @return \stored_file[]
98 */
99 abstract public function get_analysable_data($includetarget);
100
101 public function get_labelled_data() {
102 return $this->get_analysable_data(true);
103 }
104
105 public function get_unlabelled_data() {
106 return $this->get_analysable_data(false);
107 }
108
109 /**
110 * Checks if the analyser satisfies all the model indicators requirements.
111 *
112 * @throws \core_analytics\requirements_exception
113 * @return void
114 */
115 protected function check_indicators_requirements() {
116
117 foreach ($this->indicators as $indicator) {
118 $missingrequired = $this->check_indicator_requirements($indicator);
119 if ($missingrequired !== true) {
120 throw new \core_analytics\requirements_exception(get_class($indicator) . ' indicator requires ' .
121 json_encode($missingrequired) . ' sample data which is not provided by ' . get_class($this));
122 }
123 }
124 }
125
126 /**
127 * check_indicator_requirements
128 *
129 * @param \core_analytics\local\indicator\base $indicator
130 * @return true|string[] True if all good, missing requirements list otherwise
131 */
132 public function check_indicator_requirements(\core_analytics\local\indicator\base $indicator) {
133
134 $providedsampledata = $this->provided_sample_data();
135
136 $requiredsampledata = $indicator::required_sample_data();
137 if (empty($requiredsampledata)) {
138 // The indicator does not need any sample data.
139 return true;
140 }
141 $missingrequired = array_diff($requiredsampledata, $providedsampledata);
142
143 if (empty($missingrequired)) {
144 return true;
145 }
146
147 return $missingrequired;
148 }
149
150 /**
151 * Processes an analysable
152 *
153 * This method returns the general analysable status, an array of files by time splitting method and
154 * an error message if there is any problem.
155 *
156 * @param \core_analytics\analysable $analysable
157 * @param bool $includetarget
158 * @return \stored_file[] Files by time splitting method
159 */
160 public function process_analysable($analysable, $includetarget) {
161
162 // Default returns.
163 $files = array();
164 $message = null;
165
166 // Target instances scope is per-analysable (it can't be lower as calculations run once per
167 // analysable, not time splitting method nor time range).
168 $target = forward_static_call(array($this->target, 'instance'));
169
170 // We need to check that the analysable is valid for the target even if we don't include targets
171 // as we still need to discard invalid analysables for the target.
172 $result = $target->is_valid_analysable($analysable, $includetarget);
173 if ($result !== true) {
174 $a = new \stdClass();
175 $a->analysableid = $analysable->get_id();
176 $a->result = $result;
177 $this->log[] = get_string('analysablenotvalidfortarget', 'analytics', $a);
178 return array();
179 }
180
181 // Process all provided time splitting methods.
182 $results = array();
183 foreach ($this->timesplittings as $timesplitting) {
184
185 // For evaluation purposes we don't need to be that strict about how updated the data is,
186 // if this analyser was analysed less that 1 week ago we skip generating a new one. This
187 // helps scale the evaluation process as sites with tons of courses may a lot of time to
188 // complete an evaluation.
189 if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) {
190
191 $previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
192 $analysable->get_id(), $timesplitting->get_id());
193 $boundary = time() - WEEKSECS;
194 if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
195 // Recover the previous analysed file and avoid generating a new one.
196
197 // Don't bother filling a result object as it is only useful when there are no files generated.
198 $files[$timesplitting->get_id()] = $previousanalysis;
199 continue;
200 }
201 }
202
203 if ($includetarget) {
204 $result = $this->process_time_splitting($timesplitting, $analysable, $target);
205 } else {
206 $result = $this->process_time_splitting($timesplitting, $analysable);
207 }
208
209 if (!empty($result->file)) {
210 $files[$timesplitting->get_id()] = $result->file;
211 }
212 $results[] = $result;
213 }
214
215 if (empty($files)) {
216 $errors = array();
217 foreach ($results as $timesplittingid => $result) {
218 $errors[] = $timesplittingid . ': ' . $result->message;
219 }
220
221 $a = new \stdClass();
222 $a->analysableid = $analysable->get_id();
223 $a->errors = implode(', ', $errors);
224 $this->log[] = get_string('analysablenotused', 'analytics', $a);
225 }
226
227 return $files;
228 }
229
230 public function get_logs() {
231 return $this->log;
232 }
233
234 protected function process_time_splitting($timesplitting, $analysable, $target = false) {
235
236 $result = new \stdClass();
237
238 if (!$timesplitting->is_valid_analysable($analysable)) {
239 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
240 $result->message = get_string('invalidanalysablefortimesplitting', 'analytics',
241 $timesplitting->get_name());
242 return $result;
243 }
244 $timesplitting->set_analysable($analysable);
245
246 if (CLI_SCRIPT && !PHPUNIT_TEST) {
247 mtrace('Analysing id "' . $analysable->get_id() . '" with "' . $timesplitting->get_name() . '" time splitting method...');
248 }
249
250 // What is a sample is defined by the analyser, it can be an enrolment, a course, a user, a question
251 // attempt... it is on what we will base indicators calculations.
252 list($sampleids, $samplesdata) = $this->get_all_samples($analysable);
253
254 if (count($sampleids) === 0) {
255 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
256 $result->message = get_string('nodata', 'analytics');
257 return $result;
258 }
259
260 if ($target) {
261 // All ranges are used when we are calculating data for training.
262 $ranges = $timesplitting->get_all_ranges();
263 } else {
264 // Only some ranges can be used for prediction (it depends on the time range where we are right now).
265 $ranges = $this->get_prediction_ranges($timesplitting);
266 }
267
268 // There is no need to keep track of the evaluated samples and ranges as we always evaluate the whole dataset.
269 if ($this->options['evaluation'] === false) {
270
271 if (empty($ranges)) {
272 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
273 $result->message = get_string('nonewdata', 'analytics');
274 return $result;
275 }
276
277 // We skip all samples that are already part of a training dataset, even if they have noe been used for training yet.
278 $sampleids = $this->filter_out_train_samples($sampleids, $timesplitting);
279
280 if (count($sampleids) === 0) {
281 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
282 $result->message = get_string('nonewdata', 'analytics');
283 return $result;
284 }
285
286 // TODO We may be interested in limiting $samplesdata contents to $sampleids after filtering out some sampleids.
287
288 // Only when processing data for predictions.
289 if ($target === false) {
290 // We also filter out ranges that have already been used for predictions.
291 $ranges = $this->filter_out_prediction_ranges($ranges, $timesplitting);
292 }
293
294 if (count($ranges) === 0) {
295 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
296 $result->message = get_string('nonewtimeranges', 'analytics');
297 return $result;
298 }
299 }
300
301 $dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(), $timesplitting->get_id(),
302 $this->options['evaluation'], !empty($target));
303
304 // Flag the model + analysable + timesplitting as being analysed (prevent concurrent executions).
305 $dataset->init_process();
306
307 foreach ($this->indicators as $key => $indicator) {
308 // The analyser attaches the main entities the sample depends on and are provided to the
309 // indicator to calculate the sample.
310 $this->indicators[$key]->set_sample_data($samplesdata);
311 }
312
313 // Here we start the memory intensive process that will last until $data var is
314 // unset (until the method is finished basically).
315 $data = $timesplitting->calculate($sampleids, $this->get_samples_origin(), $this->indicators, $ranges, $target);
316
317 if (!$data) {
318 $result->status = \core_analytics\model::ANALYSE_REJECTED_RANGE_PROCESSOR;
319 $result->message = get_string('novaliddata', 'analytics');
320 return $result;
321 }
322
323 // Write all calculated data to a file.
324 $file = $dataset->store($data);
325
326 // Flag the model + analysable + timesplitting as analysed.
327 $dataset->close_process();
328
329 // No need to keep track of analysed stuff when evaluating.
330 if ($this->options['evaluation'] === false) {
331 // Save the samples that have been already analysed so they are not analysed again in future.
332
333 if ($target) {
334 $this->save_train_samples($sampleids, $timesplitting, $file);
335 } else {
336 $this->save_prediction_ranges($ranges, $timesplitting);
337 }
338 }
339
340 $result->status = \core_analytics\model::OK;
341 $result->message = get_string('successfullyanalysed', 'analytics');
342 $result->file = $file;
343 return $result;
344 }
345
346 protected function get_prediction_ranges($timesplitting) {
347
348 $now = time();
349
350 // We already provided the analysable to the time splitting method, there is no need to feed it back.
351 $predictionranges = array();
352 foreach ($timesplitting->get_all_ranges() as $rangeindex => $range) {
353 if ($timesplitting->ready_to_predict($range)) {
354 // We need to maintain the same indexes.
355 $predictionranges[$rangeindex] = $range;
356 }
357 }
358
359 return $predictionranges;
360 }
361
362 protected function filter_out_train_samples($sampleids, $timesplitting) {
363 global $DB;
364
365 $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
366 'timesplitting' => $timesplitting->get_id());
367
368 $trainingsamples = $DB->get_records('analytics_train_samples', $params);
369
370 // Skip each file trained samples.
371 foreach ($trainingsamples as $trainingfile) {
372
373 $usedsamples = json_decode($trainingfile->sampleids, true);
374
375 if (!empty($usedsamples)) {
376 // Reset $sampleids to $sampleids minus this file's $usedsamples.
377 $sampleids = array_diff_key($sampleids, $usedsamples);
378 }
379 }
380
381 return $sampleids;
382 }
383
384 protected function filter_out_prediction_ranges($ranges, $timesplitting) {
385 global $DB;
386
387 $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
388 'timesplitting' => $timesplitting->get_id());
389
390 $predictedranges = $DB->get_records('analytics_predict_ranges', $params);
391 foreach ($predictedranges as $predictedrange) {
392 if (!empty($ranges[$predictedrange->rangeindex])) {
393 unset($ranges[$predictedrange->rangeindex]);
394 }
395 }
396
397 return $ranges;
398
399 }
400
401 protected function save_train_samples($sampleids, $timesplitting, $file) {
402 global $DB;
403
404 $trainingsamples = new \stdClass();
405 $trainingsamples->modelid = $this->modelid;
406 $trainingsamples->analysableid = $timesplitting->get_analysable()->get_id();
407 $trainingsamples->timesplitting = $timesplitting->get_id();
408 $trainingsamples->fileid = $file->get_id();
409
410 // TODO We just need the keys, we can save some space by removing the values.
411 $trainingsamples->sampleids = json_encode($sampleids);
412 $trainingsamples->timecreated = time();
413
414 return $DB->insert_record('analytics_train_samples', $trainingsamples);
415 }
416
417 protected function save_prediction_ranges($ranges, $timesplitting) {
418 global $DB;
419
420 $predictionrange = new \stdClass();
421 $predictionrange->modelid = $this->modelid;
422 $predictionrange->analysableid = $timesplitting->get_analysable()->get_id();
423 $predictionrange->timesplitting = $timesplitting->get_id();
424 $predictionrange->timecreated = time();
425
426 foreach ($ranges as $rangeindex => $unused) {
427 $predictionrange->rangeindex = $rangeindex;
428 $DB->insert_record('analytics_predict_ranges', $predictionrange);
429 }
430 }
431}