MDL-57791 analytics: Second review round
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
369389c9 56 const EVALUATE_LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
369389c9
DM
61 const EVALUATE_NOT_ENOUGH_DATA = 8;
62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc
DM
82
83 /**
84 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
85 */
369389c9 86 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
87
88 /**
89 * Number of evaluation repetitions.
90 */
369389c9
DM
91 const EVALUATION_ITERATIONS = 10;
92
93 /**
94 * @var \stdClass
95 */
96 protected $model = null;
97
98 /**
99 * @var \core_analytics\local\analyser\base
100 */
101 protected $analyser = null;
102
103 /**
104 * @var \core_analytics\local\target\base
105 */
106 protected $target = null;
107
108 /**
109 * @var \core_analytics\local\indicator\base[]
110 */
111 protected $indicators = null;
112
113 /**
114 * Unique Model id created from site info and last model modification.
115 *
116 * @var string
117 */
118 protected $uniqueid = null;
119
120 /**
1cc2b4ba 121 * Constructor.
369389c9 122 *
1cc2b4ba 123 * @param int|\stdClass $model
369389c9
DM
124 * @return void
125 */
126 public function __construct($model) {
127 global $DB;
128
129 if (is_scalar($model)) {
1611308b 130 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
131 if (!$model) {
132 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
133 }
369389c9
DM
134 }
135 $this->model = $model;
136 }
137
138 /**
1cc2b4ba 139 * Returns the model id.
369389c9
DM
140 *
141 * @return int
142 */
143 public function get_id() {
144 return $this->model->id;
145 }
146
147 /**
1cc2b4ba 148 * Returns a plain \stdClass with the model data.
369389c9
DM
149 *
150 * @return \stdClass
151 */
152 public function get_model_obj() {
153 return $this->model;
154 }
155
156 /**
1cc2b4ba 157 * Returns the model target.
369389c9
DM
158 *
159 * @return \core_analytics\local\target\base
160 */
161 public function get_target() {
162 if ($this->target !== null) {
163 return $this->target;
164 }
165 $instance = \core_analytics\manager::get_target($this->model->target);
166 $this->target = $instance;
167
168 return $this->target;
169 }
170
171 /**
1cc2b4ba 172 * Returns the model indicators.
369389c9
DM
173 *
174 * @return \core_analytics\local\indicator\base[]
175 */
176 public function get_indicators() {
177 if ($this->indicators !== null) {
178 return $this->indicators;
179 }
180
181 $fullclassnames = json_decode($this->model->indicators);
182
183 if (!is_array($fullclassnames)) {
184 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
185 }
186
187 $this->indicators = array();
188 foreach ($fullclassnames as $fullclassname) {
189 $instance = \core_analytics\manager::get_indicator($fullclassname);
190 if ($instance) {
191 $this->indicators[$fullclassname] = $instance;
192 } else {
193 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
194 }
195 }
196
197 return $this->indicators;
198 }
199
200 /**
201 * Returns the list of indicators that could potentially be used by the model target.
202 *
203 * It includes the indicators that are part of the model.
204 *
a40952d3 205 * @return \core_analytics\local\indicator\base[]
369389c9
DM
206 */
207 public function get_potential_indicators() {
208
209 $indicators = \core_analytics\manager::get_all_indicators();
210
211 if (empty($this->analyser)) {
212 $this->init_analyser(array('evaluation' => true));
213 }
214
215 foreach ($indicators as $classname => $indicator) {
216 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
217 unset($indicators[$classname]);
218 }
219 }
220 return $indicators;
221 }
222
223 /**
1cc2b4ba 224 * Returns the model analyser (defined by the model target).
369389c9
DM
225 *
226 * @return \core_analytics\local\analyser\base
227 */
228 public function get_analyser() {
229 if ($this->analyser !== null) {
230 return $this->analyser;
231 }
232
233 // Default initialisation with no options.
234 $this->init_analyser();
235
236 return $this->analyser;
237 }
238
239 /**
1cc2b4ba 240 * Initialises the model analyser.
369389c9 241 *
1cc2b4ba 242 * @throws \coding_exception
369389c9
DM
243 * @param array $options
244 * @return void
245 */
246 protected function init_analyser($options = array()) {
247
248 $target = $this->get_target();
249 $indicators = $this->get_indicators();
250
251 if (empty($target)) {
252 throw new \moodle_exception('errornotarget', 'analytics');
253 }
254
255 if (!empty($options['evaluation'])) {
256 // The evaluation process will run using all available time splitting methods unless one is specified.
257 if (!empty($options['timesplitting'])) {
258 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
259 $timesplittings = array($timesplitting->get_id() => $timesplitting);
260 } else {
261 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
262 }
263 } else {
264
265 if (empty($this->model->timesplitting)) {
266 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
267 }
268
269 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
270 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
271 }
272
273 if (empty($timesplittings)) {
274 throw new \moodle_exception('errornotimesplittings', 'analytics');
275 }
276
277 $classname = $target->get_analyser_class();
278 if (!class_exists($classname)) {
279 throw \coding_exception($classname . ' class does not exists');
280 }
281
282 // Returns a \core_analytics\local\analyser\base class.
283 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
284 }
285
286 /**
1cc2b4ba 287 * Returns the model time splitting method.
369389c9 288 *
1cc2b4ba 289 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
290 */
291 public function get_time_splitting() {
292 if (empty($this->model->timesplitting)) {
293 return false;
294 }
295 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
296 }
297
298 /**
a40952d3 299 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
300 *
301 * @param \core_analytics\local\target\base $target
302 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 303 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
304 * @return \core_analytics\model
305 */
a40952d3 306 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
307 global $USER, $DB;
308
1611308b
DM
309 \core_analytics\manager::check_can_manage_models();
310
369389c9
DM
311 $indicatorclasses = self::indicator_classes($indicators);
312
313 $now = time();
314
315 $modelobj = new \stdClass();
b0c24929 316 $modelobj->target = $target->get_id();
369389c9
DM
317 $modelobj->indicators = json_encode($indicatorclasses);
318 $modelobj->version = $now;
319 $modelobj->timecreated = $now;
320 $modelobj->timemodified = $now;
321 $modelobj->usermodified = $USER->id;
322
323 $id = $DB->insert_record('analytics_models', $modelobj);
324
325 // Get db defaults.
326 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
327
a40952d3
DM
328 $model = new static($modelobj);
329
330 if ($timesplittingid) {
331 $model->enable($timesplittingid);
332 }
333
334 if ($model->is_static()) {
335 $model->mark_as_trained();
336 }
337
338 return $model;
369389c9
DM
339 }
340
a40952d3 341 /**
1cc2b4ba 342 * Updates the model.
a40952d3
DM
343 *
344 * @param int|bool $enabled
345 * @param \core_analytics\local\indicator\base[] $indicators
346 * @param string $timesplittingid
347 * @return void
348 */
349 public function update($enabled, $indicators, $timesplittingid = '') {
369389c9
DM
350 global $USER, $DB;
351
1611308b
DM
352 \core_analytics\manager::check_can_manage_models();
353
369389c9
DM
354 $now = time();
355
356 $indicatorclasses = self::indicator_classes($indicators);
357
358 $indicatorsstr = json_encode($indicatorclasses);
a40952d3 359 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
360 $this->model->indicators !== $indicatorsstr) {
361 // We update the version of the model so different time splittings are not mixed up.
362 $this->model->version = $now;
363
364 // Delete generated predictions.
365 $this->clear_model();
366
367 // Purge all generated files.
368 \core_analytics\dataset_manager::clear_model_files($this->model->id);
369
370 // Reset trained flag.
371 $this->model->trained = 0;
372 }
a40952d3 373 $this->model->enabled = intval($enabled);
369389c9 374 $this->model->indicators = $indicatorsstr;
a40952d3 375 $this->model->timesplitting = $timesplittingid;
369389c9
DM
376 $this->model->timemodified = $now;
377 $this->model->usermodified = $USER->id;
378
379 $DB->update_record('analytics_models', $this->model);
380
381 // It needs to be reset (just in case, we may already used it).
382 $this->uniqueid = null;
383 }
384
d16cf374
DM
385 /**
386 * Removes the model.
387 *
388 * @return void
389 */
d8327b60 390 public function delete() {
d16cf374 391 global $DB;
1611308b
DM
392
393 \core_analytics\manager::check_can_manage_models();
394
d16cf374 395 $this->clear_model();
d8327b60 396 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
397 }
398
369389c9 399 /**
1cc2b4ba 400 * Evaluates the model.
369389c9 401 *
1cc2b4ba
DM
402 * This method gets the site contents (through the analyser) creates a .csv dataset
403 * with them and evaluates the model prediction accuracy multiple times using the
404 * machine learning backend. It returns an object where the model score is the average
405 * prediction accuracy of all executed evaluations.
369389c9
DM
406 *
407 * @param array $options
408 * @return \stdClass[]
409 */
410 public function evaluate($options = array()) {
411
1611308b
DM
412 \core_analytics\manager::check_can_manage_models();
413
a40952d3
DM
414 if ($this->is_static()) {
415 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
416 $result = new \stdClass();
417 $result->status = self::OK;
418 return $result;
419 }
420
369389c9
DM
421 $options['evaluation'] = true;
422 $this->init_analyser($options);
423
424 if (empty($this->get_indicators())) {
425 throw new \moodle_exception('errornoindicators', 'analytics');
426 }
427
1611308b
DM
428 $this->heavy_duty_mode();
429
369389c9
DM
430 // Before get_labelled_data call so we get an early exception if it is not ready.
431 $predictor = \core_analytics\manager::get_predictions_processor();
432
433 $datasets = $this->get_analyser()->get_labelled_data();
434
435 // No datasets generated.
436 if (empty($datasets)) {
437 $result = new \stdClass();
438 $result->status = self::NO_DATASET;
439 $result->info = $this->get_analyser()->get_logs();
440 return array($result);
441 }
442
443 if (!PHPUNIT_TEST && CLI_SCRIPT) {
444 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
445 }
446
447 $results = array();
448 foreach ($datasets as $timesplittingid => $dataset) {
449
450 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
451
452 $result = new \stdClass();
453
454 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
455 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
456
457 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
458 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
459 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
460
461 $result->status = $predictorresult->status;
462 $result->info = $predictorresult->info;
463
464 if (isset($predictorresult->score)) {
465 $result->score = $predictorresult->score;
466 } else {
467 // Prediction processors may return an error, default to 0 score in that case.
468 $result->score = 0;
469 }
470
471 $dir = false;
472 if (!empty($predictorresult->dir)) {
473 $dir = $predictorresult->dir;
474 }
475
476 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
477
478 $results[$timesplitting->get_id()] = $result;
479 }
480
481 return $results;
482 }
483
484 /**
1cc2b4ba
DM
485 * Trains the model using the site contents.
486 *
487 * This method prepares a dataset from the site contents (through the analyser)
488 * and passes it to the machine learning backends. Static models are skipped as
489 * they do not require training.
369389c9
DM
490 *
491 * @return \stdClass
492 */
493 public function train() {
369389c9 494
1611308b
DM
495 \core_analytics\manager::check_can_manage_models();
496
a40952d3
DM
497 if ($this->is_static()) {
498 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
499 $result = new \stdClass();
500 $result->status = self::OK;
501 return $result;
502 }
503
a40952d3 504 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
505 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
506 }
507
508 if (empty($this->get_indicators())) {
509 throw new \moodle_exception('errornoindicators', 'analytics');
510 }
511
1611308b
DM
512 $this->heavy_duty_mode();
513
369389c9
DM
514 // Before get_labelled_data call so we get an early exception if it is not writable.
515 $outputdir = $this->get_output_dir(array('execution'));
516
517 // Before get_labelled_data call so we get an early exception if it is not ready.
518 $predictor = \core_analytics\manager::get_predictions_processor();
519
520 $datasets = $this->get_analyser()->get_labelled_data();
521
522 // No training if no files have been provided.
523 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
524
525 $result = new \stdClass();
526 $result->status = self::NO_DATASET;
527 $result->info = $this->get_analyser()->get_logs();
528 return $result;
529 }
530 $samplesfile = $datasets[$this->model->timesplitting];
531
532 // Train using the dataset.
533 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
534
535 $result = new \stdClass();
536 $result->status = $predictorresult->status;
537 $result->info = $predictorresult->info;
538
539 $this->flag_file_as_used($samplesfile, 'trained');
540
541 // Mark the model as trained if it wasn't.
542 if ($this->model->trained == false) {
543 $this->mark_as_trained();
544 }
545
546 return $result;
547 }
548
549 /**
1cc2b4ba
DM
550 * Get predictions from the site contents.
551 *
552 * It analyses the site contents (through analyser classes) looking for samples
553 * ready to receive predictions. It generates a dataset with all samples ready to
554 * get predictions and it passes it to the machine learning backends or to the
555 * targets based on assumptions to get the predictions.
369389c9
DM
556 *
557 * @return \stdClass
558 */
559 public function predict() {
560 global $DB;
561
1611308b 562 \core_analytics\manager::check_can_manage_models();
369389c9 563
a40952d3 564 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
565 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
566 }
567
568 if (empty($this->get_indicators())) {
569 throw new \moodle_exception('errornoindicators', 'analytics');
570 }
571
1611308b
DM
572 $this->heavy_duty_mode();
573
369389c9
DM
574 // Before get_unlabelled_data call so we get an early exception if it is not writable.
575 $outputdir = $this->get_output_dir(array('execution'));
576
577 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
578 if (!$this->is_static()) {
579 $predictor = \core_analytics\manager::get_predictions_processor();
580 }
369389c9
DM
581
582 $samplesdata = $this->get_analyser()->get_unlabelled_data();
583
584 // Get the prediction samples file.
585 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
586
587 $result = new \stdClass();
588 $result->status = self::NO_DATASET;
589 $result->info = $this->get_analyser()->get_logs();
590 return $result;
591 }
592 $samplesfile = $samplesdata[$this->model->timesplitting];
593
594 // We need to throw an exception if we are trying to predict stuff that was already predicted.
595 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
596 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
597 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
598 }
599
a40952d3 600 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 601
a40952d3 602 // Prepare the results object.
369389c9 603 $result = new \stdClass();
369389c9 604
a40952d3
DM
605 if ($this->is_static()) {
606 // Prediction based on assumptions.
413f19bc 607 $result->status = self::OK;
a40952d3
DM
608 $result->info = [];
609 $result->predictions = $this->get_static_predictions($indicatorcalculations);
610
611 } else {
1611308b 612 // Prediction process runs on the machine learning backend.
a40952d3 613 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
614 $result->status = $predictorresult->status;
615 $result->info = $predictorresult->info;
1611308b
DM
616 $result->predictions = $this->format_predictor_predictions($predictorresult);
617 }
618
619 if ($result->predictions) {
620 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
621 }
622
623 if (!empty($samplecontexts) && $this->uses_insights()) {
624 $this->trigger_insights($samplecontexts);
625 }
626
627 $this->flag_file_as_used($samplesfile, 'predicted');
628
629 return $result;
630 }
631
632 /**
633 * Formats the predictor results.
634 *
635 * @param array $predictorresult
636 * @return array
637 */
638 private function format_predictor_predictions($predictorresult) {
639
640 $predictions = array();
641 if ($predictorresult->predictions) {
642 foreach ($predictorresult->predictions as $sampleinfo) {
643
413f19bc 644 // We parse each prediction.
1611308b
DM
645 switch (count($sampleinfo)) {
646 case 1:
647 // For whatever reason the predictions processor could not process this sample, we
648 // skip it and do nothing with it.
649 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
650 $sampleinfo[0], DEBUG_DEVELOPER);
651 continue;
652 case 2:
653 // Prediction processors that do not return a prediction score will have the maximum prediction
654 // score.
655 list($uniquesampleid, $prediction) = $sampleinfo;
656 $predictionscore = 1;
657 break;
658 case 3:
659 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
660 break;
661 default:
662 break;
a40952d3 663 }
1611308b
DM
664 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
665 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
666 }
667 }
1611308b
DM
668 return $predictions;
669 }
670
671 /**
672 * Execute the prediction callbacks defined by the target.
673 *
674 * @param \stdClass[] $predictions
413f19bc 675 * @param array $indicatorcalculations
1611308b
DM
676 * @return array
677 */
678 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
679
680 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
681 $samplecontexts = array();
682
1611308b 683 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 684
1611308b 685 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 686
1611308b
DM
687 // The unique sample id contains both the sampleid and the rangeindex.
688 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 689
1611308b 690 // Store the predicted values.
413f19bc
DM
691 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction,
692 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 693
1611308b
DM
694 // Also store all samples context to later generate insights or whatever action the target wants to perform.
695 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 696
1611308b
DM
697 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
698 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
699 }
700 }
701
1611308b
DM
702 return $samplecontexts;
703 }
369389c9 704
1611308b
DM
705 /**
706 * Generates insights and updates the cache.
707 *
708 * @param \context[] $samplecontexts
709 * @return void
710 */
711 protected function trigger_insights($samplecontexts) {
712
713 // Notify the target that all predictions have been processed.
714 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
715
716 // Update cache.
717 $cache = \cache::make('core', 'contextwithinsights');
718 foreach ($samplecontexts as $context) {
719 $modelids = $cache->get($context->id);
720 if (!$modelids) {
721 // The cache is empty, but we don't know if it is empty because there are no insights
722 // in this context or because cache/s have been purged, we need to be conservative and
723 // "pay" 1 db read to fill up the cache.
724 $models = \core_analytics\manager::get_models_with_insights($context);
725 $cache->set($context->id, array_keys($models));
726 } else if (!in_array($this->get_id(), $modelids)) {
727 array_push($modelids, $this->get_id());
728 $cache->set($context->id, $modelids);
369389c9
DM
729 }
730 }
369389c9
DM
731 }
732
a40952d3 733 /**
1611308b 734 * Get predictions from a static model.
a40952d3
DM
735 *
736 * @param array $indicatorcalculations
737 * @return \stdClass[]
738 */
739 protected function get_static_predictions(&$indicatorcalculations) {
740
741 // Group samples by analysable for \core_analytics\local\target::calculate.
742 $analysables = array();
743 // List all sampleids together.
744 $sampleids = array();
745
746 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
747 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
748
749 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
750 $analysableclass = get_class($analysable);
751 if (empty($analysables[$analysableclass])) {
752 $analysables[$analysableclass] = array();
753 }
754 if (empty($analysables[$analysableclass][$rangeindex])) {
755 $analysables[$analysableclass][$rangeindex] = (object)[
756 'analysable' => $analysable,
757 'indicatorsdata' => array(),
758 'sampleids' => array()
759 ];
760 }
761 // Using the sampleid as a key so we can easily merge indicators data later.
762 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
763 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
764 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
765
766 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
767 $sampleids[$sampleid] = $sampleid;
768 }
769
770 // Get all samples data.
771 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
772
773 // Calculate the targets.
1cc2b4ba 774 $predictions = array();
a40952d3
DM
775 foreach ($analysables as $analysableclass => $rangedata) {
776 foreach ($rangedata as $rangeindex => $data) {
777
778 // Attach samples data and calculated indicators data.
779 $this->get_target()->clear_sample_data();
780 $this->get_target()->add_sample_data($samplesdata);
781 $this->get_target()->add_sample_data($data->indicatorsdata);
782
1611308b 783 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 784 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 785 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
786 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
787
788 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
789 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
790 // by self::save_prediction.
791 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
792 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
793 if (!isset($calculations[$sampleid])) {
a40952d3
DM
794 return false;
795 }
796 return true;
797 }, ARRAY_FILTER_USE_BOTH);
798
799 foreach ($calculations as $sampleid => $value) {
800
801 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
802
803 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
804 if (is_null($calculations[$sampleid])) {
a40952d3
DM
805 unset($indicatorcalculations[$uniquesampleid]);
806 continue;
807 }
808
809 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
810 // true according to what the developer defined.
811 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
812 }
813 }
814 }
815 return $predictions;
816 }
817
369389c9 818 /**
1cc2b4ba 819 * Stores the prediction in the database.
369389c9
DM
820 *
821 * @param int $sampleid
822 * @param int $rangeindex
823 * @param int $prediction
824 * @param float $predictionscore
825 * @param string $calculations
826 * @return \context
827 */
828 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
829 global $DB;
830
831 $context = $this->get_analyser()->sample_access_context($sampleid);
832
833 $record = new \stdClass();
834 $record->modelid = $this->model->id;
835 $record->contextid = $context->id;
836 $record->sampleid = $sampleid;
837 $record->rangeindex = $rangeindex;
838 $record->prediction = $prediction;
839 $record->predictionscore = $predictionscore;
840 $record->calculations = $calculations;
841 $record->timecreated = time();
842 $DB->insert_record('analytics_predictions', $record);
843
844 return $context;
845 }
846
847 /**
1cc2b4ba 848 * Enabled the model using the provided time splitting method.
369389c9
DM
849 *
850 * @param string $timesplittingid
851 * @return void
852 */
853 public function enable($timesplittingid = false) {
854 global $DB;
855
1611308b
DM
856 \core_analytics\manager::check_can_manage_models();
857
369389c9
DM
858 $now = time();
859
860 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
861
862 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
863 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
864 }
865
866 if (substr($timesplittingid, 0, 1) !== '\\') {
867 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
868 }
869
870 $this->model->timesplitting = $timesplittingid;
871 $this->model->version = $now;
872 }
873 $this->model->enabled = 1;
874 $this->model->timemodified = $now;
875
876 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
877 $DB->update_record('analytics_models', $this->model);
878
879 // It needs to be reset (just in case, we may already used it).
880 $this->uniqueid = null;
881 }
882
a40952d3 883 /**
1cc2b4ba
DM
884 * Is this a static model (as defined by the target)?.
885 *
886 * Static models are based on assumptions instead of in machine learning
887 * backends results.
a40952d3
DM
888 *
889 * @return bool
890 */
891 public function is_static() {
892 return (bool)$this->get_target()->based_on_assumptions();
893 }
894
369389c9 895 /**
1cc2b4ba 896 * Is this model enabled?
369389c9
DM
897 *
898 * @return bool
899 */
900 public function is_enabled() {
901 return (bool)$this->model->enabled;
902 }
903
904 /**
1cc2b4ba 905 * Is this model already trained?
369389c9
DM
906 *
907 * @return bool
908 */
909 public function is_trained() {
a40952d3
DM
910 // Models which targets are based on assumptions do not need training.
911 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
912 }
913
914 /**
1cc2b4ba 915 * Marks the model as trained
369389c9
DM
916 *
917 * @return void
918 */
919 public function mark_as_trained() {
920 global $DB;
921
1611308b
DM
922 \core_analytics\manager::check_can_manage_models();
923
369389c9
DM
924 $this->model->trained = 1;
925 $DB->update_record('analytics_models', $this->model);
926 }
927
928 /**
1cc2b4ba 929 * Get the contexts with predictions.
369389c9
DM
930 *
931 * @return \stdClass[]
932 */
933 public function get_predictions_contexts() {
934 global $DB;
935
936 $sql = "SELECT DISTINCT contextid FROM {analytics_predictions} WHERE modelid = ?";
937 return $DB->get_records_sql($sql, array($this->model->id));
938 }
939
f9e7447f
DM
940 /**
941 * Has this model generated predictions?
942 *
943 * We don't check analytics_predictions table because targets have the ability to
944 * ignore some predicted values, if that is the case predictions are not even stored
945 * in db.
946 *
947 * @return bool
948 */
949 public function any_prediction_obtained() {
950 global $DB;
951 return $DB->record_exists('analytics_predict_ranges',
952 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
953 }
954
955 /**
956 * Whether this model generates insights or not (defined by the model's target).
957 *
958 * @return bool
959 */
960 public function uses_insights() {
961 $target = $this->get_target();
962 return $target::uses_insights();
963 }
964
369389c9
DM
965 /**
966 * Whether predictions exist for this context.
967 *
968 * @param \context $context
969 * @return bool
970 */
971 public function predictions_exist(\context $context) {
972 global $DB;
973
974 // Filters out previous predictions keeping only the last time range one.
975 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 976 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
977 return $DB->record_exists_select('analytics_predictions', $select, $params);
978 }
979
980 /**
981 * Gets the predictions for this context.
982 *
983 * @param \context $context
984 * @return \core_analytics\prediction[]
985 */
986 public function get_predictions(\context $context) {
987 global $DB;
988
1611308b
DM
989 \core_analytics\manager::check_can_list_insights($context);
990
369389c9
DM
991 // Filters out previous predictions keeping only the last time range one.
992 $sql = "SELECT tip.*
993 FROM {analytics_predictions} tip
994 JOIN (
995 SELECT sampleid, max(rangeindex) AS rangeindex
996 FROM {analytics_predictions}
997 WHERE modelid = ? and contextid = ?
998 GROUP BY sampleid
999 ) tipsub
1000 ON tip.sampleid = tipsub.sampleid AND tip.rangeindex = tipsub.rangeindex
1001 WHERE tip.modelid = ? and tip.contextid = ?";
1002 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
1003 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1004 return array();
1005 }
1006
1007 // Get predicted samples' ids.
1008 $sampleids = array_map(function($prediction) {
1009 return $prediction->sampleid;
1010 }, $predictions);
1011
1012 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1013
1014 // Add samples data as part of each prediction.
1015 foreach ($predictions as $predictionid => $predictiondata) {
1016
1017 $sampleid = $predictiondata->sampleid;
1018
1019 // Filter out predictions which samples are not available anymore.
1020 if (empty($samplesdata[$sampleid])) {
1021 unset($predictions[$predictionid]);
1022 continue;
1023 }
1024
1cc2b4ba 1025 // Replace \stdClass object by \core_analytics\prediction objects.
369389c9
DM
1026 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1027
1028 $predictions[$predictionid] = $prediction;
1029 }
1030
1031 return $predictions;
1032 }
1033
1034 /**
1611308b 1035 * Returns the sample data of a prediction.
369389c9
DM
1036 *
1037 * @param \stdClass $predictionobj
1038 * @return array
1039 */
1040 public function prediction_sample_data($predictionobj) {
1041
1042 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1043
1044 if (empty($samplesdata[$predictionobj->sampleid])) {
1045 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1046 }
1047
1048 return $samplesdata[$predictionobj->sampleid];
1049 }
1050
1051 /**
1611308b 1052 * Returns the description of a sample
369389c9
DM
1053 *
1054 * @param \core_analytics\prediction $prediction
1055 * @return array 2 elements: list(string, \renderable)
1056 */
1057 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1058 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1059 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1060 }
1061
1062 /**
1063 * Returns the output directory for prediction processors.
1064 *
1065 * Directory structure as follows:
1066 * - Evaluation runs:
1067 * models/$model->id/$model->version/evaluation/$model->timesplitting
1068 * - Training & prediction runs:
1069 * models/$model->id/$model->version/execution
1070 *
1071 * @param array $subdirs
1072 * @return string
1073 */
1074 protected function get_output_dir($subdirs = array()) {
1075 global $CFG;
1076
1077 $subdirstr = '';
1078 foreach ($subdirs as $subdir) {
1079 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1080 }
1081
1082 $outputdir = get_config('analytics', 'modeloutputdir');
1083 if (empty($outputdir)) {
1084 // Apply default value.
1085 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1086 }
1087
1088 // Append model id and version + subdirs.
1089 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1090
1091 make_writable_directory($outputdir);
1092
1093 return $outputdir;
1094 }
1095
1096 /**
1cc2b4ba
DM
1097 * Returns a unique id for this model.
1098 *
1099 * This id should be unique for this site.
369389c9
DM
1100 *
1101 * @return string
1102 */
1103 public function get_unique_id() {
1104 global $CFG;
1105
1106 if (!is_null($this->uniqueid)) {
1107 return $this->uniqueid;
1108 }
1109
1110 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1111 // that the model target and indicators were updated.
1112 $ids = array($CFG->wwwroot, $CFG->dirroot, $CFG->prefix, $this->model->id, $this->model->version);
1113 $this->uniqueid = sha1(implode('$$', $ids));
1114
1115 return $this->uniqueid;
1116 }
1117
1118 /**
1119 * Exports the model data.
1120 *
1121 * @return \stdClass
1122 */
1123 public function export() {
1611308b
DM
1124
1125 \core_analytics\manager::check_can_manage_models();
1126
369389c9
DM
1127 $data = clone $this->model;
1128 $data->target = $this->get_target()->get_name();
1129
1130 if ($timesplitting = $this->get_time_splitting()) {
1131 $data->timesplitting = $timesplitting->get_name();
1132 }
1133
1134 $data->indicators = array();
1135 foreach ($this->get_indicators() as $indicator) {
1136 $data->indicators[] = $indicator->get_name();
1137 }
1138 return $data;
1139 }
1140
584ffa4f
DM
1141 /**
1142 * Returns the model logs data.
1143 *
1144 * @param int $limitfrom
1145 * @param int $limitnum
1146 * @return \stdClass[]
1147 */
1148 public function get_logs($limitfrom = 0, $limitnum = 0) {
1149 global $DB;
1611308b
DM
1150
1151 \core_analytics\manager::check_can_manage_models();
1152
584ffa4f
DM
1153 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1154 $limitfrom, $limitnum);
1155 }
1156
369389c9 1157 /**
1cc2b4ba 1158 * Flag the provided file as used for training or prediction.
369389c9
DM
1159 *
1160 * @param \stored_file $file
1161 * @param string $action
1162 * @return void
1163 */
1164 protected function flag_file_as_used(\stored_file $file, $action) {
1165 global $DB;
1166
1167 $usedfile = new \stdClass();
1168 $usedfile->modelid = $this->model->id;
1169 $usedfile->fileid = $file->get_id();
1170 $usedfile->action = $action;
1171 $usedfile->time = time();
1172 $DB->insert_record('analytics_used_files', $usedfile);
1173 }
1174
1175 /**
1cc2b4ba 1176 * Log the evaluation results in the database.
369389c9
DM
1177 *
1178 * @param string $timesplittingid
1179 * @param float $score
1180 * @param string $dir
1181 * @param array $info
1182 * @return int The inserted log id
1183 */
1184 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1185 global $DB, $USER;
1186
1187 $log = new \stdClass();
1188 $log->modelid = $this->get_id();
1189 $log->version = $this->model->version;
1190 $log->target = $this->model->target;
1191 $log->indicators = $this->model->indicators;
1192 $log->timesplitting = $timesplittingid;
1193 $log->dir = $dir;
1194 if ($info) {
1195 // Ensure it is not an associative array.
1196 $log->info = json_encode(array_values($info));
1197 }
1198 $log->score = $score;
1199 $log->timecreated = time();
1200 $log->usermodified = $USER->id;
1201
1202 return $DB->insert_record('analytics_models_log', $log);
1203 }
1204
1205 /**
1206 * Utility method to return indicator class names from a list of indicator objects
1207 *
1208 * @param \core_analytics\local\indicator\base[] $indicators
1209 * @return string[]
1210 */
1211 private static function indicator_classes($indicators) {
1212
1213 // What we want to check and store are the indicator classes not the keys.
1214 $indicatorclasses = array();
1215 foreach ($indicators as $indicator) {
1216 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1217 if (!is_object($indicator) && !is_scalar($indicator)) {
1218 $indicator = strval($indicator);
1219 } else if (is_object($indicator)) {
1220 $indicator = get_class($indicator);
1221 }
1222 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1223 }
b0c24929 1224 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1225 }
1226
1227 return $indicatorclasses;
1228 }
1229
1230 /**
1231 * Clears the model training and prediction data.
1232 *
1233 * Executed after updating model critical elements like the time splitting method
1234 * or the indicators.
1235 *
1236 * @return void
1237 */
1238 private function clear_model() {
1239 global $DB;
1240
1241 $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
1242 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1243 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1244 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1245
1611308b
DM
1246 // We don't expect people to clear models regularly and the cost of filling the cache is
1247 // 1 db read per context.
1248 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1249 $cache->purge();
369389c9
DM
1250 }
1251
1611308b
DM
1252 /**
1253 * Increases system memory and time limits.
1254 *
1255 * @return void
1256 */
1257 private function heavy_duty_mode() {
369389c9
DM
1258 if (ini_get('memory_limit') != -1) {
1259 raise_memory_limit(MEMORY_HUGE);
1260 }
1611308b 1261 \core_php_time_limit::raise();
369389c9 1262 }
369389c9 1263}