MDL-59060 analytics: Allow all predictions to be retrieved
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
369389c9 56 const EVALUATE_LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
369389c9
DM
61 const EVALUATE_NOT_ENOUGH_DATA = 8;
62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc
DM
82
83 /**
84 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
85 */
369389c9 86 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
87
88 /**
89 * Number of evaluation repetitions.
90 */
369389c9
DM
91 const EVALUATION_ITERATIONS = 10;
92
93 /**
94 * @var \stdClass
95 */
96 protected $model = null;
97
98 /**
99 * @var \core_analytics\local\analyser\base
100 */
101 protected $analyser = null;
102
103 /**
104 * @var \core_analytics\local\target\base
105 */
106 protected $target = null;
107
108 /**
109 * @var \core_analytics\local\indicator\base[]
110 */
111 protected $indicators = null;
112
113 /**
114 * Unique Model id created from site info and last model modification.
115 *
116 * @var string
117 */
118 protected $uniqueid = null;
119
120 /**
1cc2b4ba 121 * Constructor.
369389c9 122 *
1cc2b4ba 123 * @param int|\stdClass $model
369389c9
DM
124 * @return void
125 */
126 public function __construct($model) {
127 global $DB;
128
129 if (is_scalar($model)) {
1611308b 130 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
131 if (!$model) {
132 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
133 }
369389c9
DM
134 }
135 $this->model = $model;
136 }
137
3a396286
DM
138 /**
139 * Quick safety check to discard site models which required components are not available anymore.
140 *
141 * @return bool
142 */
143 public function is_available() {
144 $target = $this->get_target();
145 if (!$target) {
146 return false;
147 }
148 $analyser = $this->get_target();
149
150 $classname = $target->get_analyser_class();
151 if (!class_exists($classname)) {
152 return false;
153 }
154
155 return true;
156 }
157
369389c9 158 /**
1cc2b4ba 159 * Returns the model id.
369389c9
DM
160 *
161 * @return int
162 */
163 public function get_id() {
164 return $this->model->id;
165 }
166
167 /**
1cc2b4ba 168 * Returns a plain \stdClass with the model data.
369389c9
DM
169 *
170 * @return \stdClass
171 */
172 public function get_model_obj() {
173 return $this->model;
174 }
175
176 /**
1cc2b4ba 177 * Returns the model target.
369389c9
DM
178 *
179 * @return \core_analytics\local\target\base
180 */
181 public function get_target() {
182 if ($this->target !== null) {
183 return $this->target;
184 }
185 $instance = \core_analytics\manager::get_target($this->model->target);
186 $this->target = $instance;
187
188 return $this->target;
189 }
190
191 /**
1cc2b4ba 192 * Returns the model indicators.
369389c9
DM
193 *
194 * @return \core_analytics\local\indicator\base[]
195 */
196 public function get_indicators() {
197 if ($this->indicators !== null) {
198 return $this->indicators;
199 }
200
201 $fullclassnames = json_decode($this->model->indicators);
202
203 if (!is_array($fullclassnames)) {
204 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
205 }
206
207 $this->indicators = array();
208 foreach ($fullclassnames as $fullclassname) {
209 $instance = \core_analytics\manager::get_indicator($fullclassname);
210 if ($instance) {
211 $this->indicators[$fullclassname] = $instance;
212 } else {
213 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
214 }
215 }
216
217 return $this->indicators;
218 }
219
220 /**
221 * Returns the list of indicators that could potentially be used by the model target.
222 *
223 * It includes the indicators that are part of the model.
224 *
a40952d3 225 * @return \core_analytics\local\indicator\base[]
369389c9
DM
226 */
227 public function get_potential_indicators() {
228
229 $indicators = \core_analytics\manager::get_all_indicators();
230
231 if (empty($this->analyser)) {
232 $this->init_analyser(array('evaluation' => true));
233 }
234
235 foreach ($indicators as $classname => $indicator) {
236 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
237 unset($indicators[$classname]);
238 }
239 }
240 return $indicators;
241 }
242
243 /**
1cc2b4ba 244 * Returns the model analyser (defined by the model target).
369389c9
DM
245 *
246 * @return \core_analytics\local\analyser\base
247 */
248 public function get_analyser() {
249 if ($this->analyser !== null) {
250 return $this->analyser;
251 }
252
253 // Default initialisation with no options.
254 $this->init_analyser();
255
256 return $this->analyser;
257 }
258
259 /**
1cc2b4ba 260 * Initialises the model analyser.
369389c9 261 *
1cc2b4ba 262 * @throws \coding_exception
369389c9
DM
263 * @param array $options
264 * @return void
265 */
266 protected function init_analyser($options = array()) {
267
268 $target = $this->get_target();
269 $indicators = $this->get_indicators();
270
271 if (empty($target)) {
272 throw new \moodle_exception('errornotarget', 'analytics');
273 }
274
275 if (!empty($options['evaluation'])) {
276 // The evaluation process will run using all available time splitting methods unless one is specified.
277 if (!empty($options['timesplitting'])) {
278 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
279 $timesplittings = array($timesplitting->get_id() => $timesplitting);
280 } else {
281 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
282 }
283 } else {
284
285 if (empty($this->model->timesplitting)) {
286 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
287 }
288
289 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
290 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
291 }
292
293 if (empty($timesplittings)) {
294 throw new \moodle_exception('errornotimesplittings', 'analytics');
295 }
296
297 $classname = $target->get_analyser_class();
298 if (!class_exists($classname)) {
299 throw \coding_exception($classname . ' class does not exists');
300 }
301
302 // Returns a \core_analytics\local\analyser\base class.
303 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
304 }
305
306 /**
1cc2b4ba 307 * Returns the model time splitting method.
369389c9 308 *
1cc2b4ba 309 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
310 */
311 public function get_time_splitting() {
312 if (empty($this->model->timesplitting)) {
313 return false;
314 }
315 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
316 }
317
318 /**
a40952d3 319 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
320 *
321 * @param \core_analytics\local\target\base $target
322 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 323 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
324 * @return \core_analytics\model
325 */
a40952d3 326 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
327 global $USER, $DB;
328
1611308b
DM
329 \core_analytics\manager::check_can_manage_models();
330
369389c9
DM
331 $indicatorclasses = self::indicator_classes($indicators);
332
333 $now = time();
334
335 $modelobj = new \stdClass();
b0c24929 336 $modelobj->target = $target->get_id();
369389c9
DM
337 $modelobj->indicators = json_encode($indicatorclasses);
338 $modelobj->version = $now;
339 $modelobj->timecreated = $now;
340 $modelobj->timemodified = $now;
341 $modelobj->usermodified = $USER->id;
342
343 $id = $DB->insert_record('analytics_models', $modelobj);
344
345 // Get db defaults.
346 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
347
a40952d3
DM
348 $model = new static($modelobj);
349
350 if ($timesplittingid) {
351 $model->enable($timesplittingid);
352 }
353
354 if ($model->is_static()) {
355 $model->mark_as_trained();
356 }
357
358 return $model;
369389c9
DM
359 }
360
a40952d3 361 /**
1cc2b4ba 362 * Updates the model.
a40952d3
DM
363 *
364 * @param int|bool $enabled
365 * @param \core_analytics\local\indicator\base[] $indicators
366 * @param string $timesplittingid
367 * @return void
368 */
369 public function update($enabled, $indicators, $timesplittingid = '') {
369389c9
DM
370 global $USER, $DB;
371
1611308b
DM
372 \core_analytics\manager::check_can_manage_models();
373
369389c9
DM
374 $now = time();
375
376 $indicatorclasses = self::indicator_classes($indicators);
377
378 $indicatorsstr = json_encode($indicatorclasses);
a40952d3 379 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
380 $this->model->indicators !== $indicatorsstr) {
381 // We update the version of the model so different time splittings are not mixed up.
382 $this->model->version = $now;
383
384 // Delete generated predictions.
385 $this->clear_model();
386
387 // Purge all generated files.
388 \core_analytics\dataset_manager::clear_model_files($this->model->id);
389
390 // Reset trained flag.
391 $this->model->trained = 0;
392 }
a40952d3 393 $this->model->enabled = intval($enabled);
369389c9 394 $this->model->indicators = $indicatorsstr;
a40952d3 395 $this->model->timesplitting = $timesplittingid;
369389c9
DM
396 $this->model->timemodified = $now;
397 $this->model->usermodified = $USER->id;
398
399 $DB->update_record('analytics_models', $this->model);
400
401 // It needs to be reset (just in case, we may already used it).
402 $this->uniqueid = null;
403 }
404
d16cf374
DM
405 /**
406 * Removes the model.
407 *
408 * @return void
409 */
d8327b60 410 public function delete() {
d16cf374 411 global $DB;
1611308b
DM
412
413 \core_analytics\manager::check_can_manage_models();
414
d16cf374 415 $this->clear_model();
d8327b60 416 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
417 }
418
369389c9 419 /**
1cc2b4ba 420 * Evaluates the model.
369389c9 421 *
1cc2b4ba
DM
422 * This method gets the site contents (through the analyser) creates a .csv dataset
423 * with them and evaluates the model prediction accuracy multiple times using the
424 * machine learning backend. It returns an object where the model score is the average
425 * prediction accuracy of all executed evaluations.
369389c9
DM
426 *
427 * @param array $options
428 * @return \stdClass[]
429 */
430 public function evaluate($options = array()) {
431
1611308b
DM
432 \core_analytics\manager::check_can_manage_models();
433
a40952d3
DM
434 if ($this->is_static()) {
435 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
436 $result = new \stdClass();
437 $result->status = self::OK;
438 return $result;
439 }
440
369389c9
DM
441 $options['evaluation'] = true;
442 $this->init_analyser($options);
443
444 if (empty($this->get_indicators())) {
445 throw new \moodle_exception('errornoindicators', 'analytics');
446 }
447
1611308b
DM
448 $this->heavy_duty_mode();
449
369389c9
DM
450 // Before get_labelled_data call so we get an early exception if it is not ready.
451 $predictor = \core_analytics\manager::get_predictions_processor();
452
453 $datasets = $this->get_analyser()->get_labelled_data();
454
455 // No datasets generated.
456 if (empty($datasets)) {
457 $result = new \stdClass();
458 $result->status = self::NO_DATASET;
459 $result->info = $this->get_analyser()->get_logs();
460 return array($result);
461 }
462
463 if (!PHPUNIT_TEST && CLI_SCRIPT) {
464 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
465 }
466
467 $results = array();
468 foreach ($datasets as $timesplittingid => $dataset) {
469
470 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
471
472 $result = new \stdClass();
473
474 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
475 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
476
477 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
478 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
479 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
480
481 $result->status = $predictorresult->status;
482 $result->info = $predictorresult->info;
483
484 if (isset($predictorresult->score)) {
485 $result->score = $predictorresult->score;
486 } else {
487 // Prediction processors may return an error, default to 0 score in that case.
488 $result->score = 0;
489 }
490
491 $dir = false;
492 if (!empty($predictorresult->dir)) {
493 $dir = $predictorresult->dir;
494 }
495
496 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
497
498 $results[$timesplitting->get_id()] = $result;
499 }
500
501 return $results;
502 }
503
504 /**
1cc2b4ba
DM
505 * Trains the model using the site contents.
506 *
507 * This method prepares a dataset from the site contents (through the analyser)
508 * and passes it to the machine learning backends. Static models are skipped as
509 * they do not require training.
369389c9
DM
510 *
511 * @return \stdClass
512 */
513 public function train() {
369389c9 514
1611308b
DM
515 \core_analytics\manager::check_can_manage_models();
516
a40952d3
DM
517 if ($this->is_static()) {
518 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
519 $result = new \stdClass();
520 $result->status = self::OK;
521 return $result;
522 }
523
a40952d3 524 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
525 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
526 }
527
528 if (empty($this->get_indicators())) {
529 throw new \moodle_exception('errornoindicators', 'analytics');
530 }
531
1611308b
DM
532 $this->heavy_duty_mode();
533
369389c9
DM
534 // Before get_labelled_data call so we get an early exception if it is not writable.
535 $outputdir = $this->get_output_dir(array('execution'));
536
537 // Before get_labelled_data call so we get an early exception if it is not ready.
538 $predictor = \core_analytics\manager::get_predictions_processor();
539
540 $datasets = $this->get_analyser()->get_labelled_data();
541
542 // No training if no files have been provided.
543 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
544
545 $result = new \stdClass();
546 $result->status = self::NO_DATASET;
547 $result->info = $this->get_analyser()->get_logs();
548 return $result;
549 }
550 $samplesfile = $datasets[$this->model->timesplitting];
551
552 // Train using the dataset.
553 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
554
555 $result = new \stdClass();
556 $result->status = $predictorresult->status;
557 $result->info = $predictorresult->info;
558
559 $this->flag_file_as_used($samplesfile, 'trained');
560
561 // Mark the model as trained if it wasn't.
562 if ($this->model->trained == false) {
563 $this->mark_as_trained();
564 }
565
566 return $result;
567 }
568
569 /**
1cc2b4ba
DM
570 * Get predictions from the site contents.
571 *
572 * It analyses the site contents (through analyser classes) looking for samples
573 * ready to receive predictions. It generates a dataset with all samples ready to
574 * get predictions and it passes it to the machine learning backends or to the
575 * targets based on assumptions to get the predictions.
369389c9
DM
576 *
577 * @return \stdClass
578 */
579 public function predict() {
580 global $DB;
581
1611308b 582 \core_analytics\manager::check_can_manage_models();
369389c9 583
a40952d3 584 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
585 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
586 }
587
588 if (empty($this->get_indicators())) {
589 throw new \moodle_exception('errornoindicators', 'analytics');
590 }
591
1611308b
DM
592 $this->heavy_duty_mode();
593
369389c9
DM
594 // Before get_unlabelled_data call so we get an early exception if it is not writable.
595 $outputdir = $this->get_output_dir(array('execution'));
596
597 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
598 if (!$this->is_static()) {
599 $predictor = \core_analytics\manager::get_predictions_processor();
600 }
369389c9
DM
601
602 $samplesdata = $this->get_analyser()->get_unlabelled_data();
603
604 // Get the prediction samples file.
605 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
606
607 $result = new \stdClass();
608 $result->status = self::NO_DATASET;
609 $result->info = $this->get_analyser()->get_logs();
610 return $result;
611 }
612 $samplesfile = $samplesdata[$this->model->timesplitting];
613
614 // We need to throw an exception if we are trying to predict stuff that was already predicted.
615 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
616 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
617 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
618 }
619
a40952d3 620 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 621
a40952d3 622 // Prepare the results object.
369389c9 623 $result = new \stdClass();
369389c9 624
a40952d3
DM
625 if ($this->is_static()) {
626 // Prediction based on assumptions.
413f19bc 627 $result->status = self::OK;
a40952d3
DM
628 $result->info = [];
629 $result->predictions = $this->get_static_predictions($indicatorcalculations);
630
631 } else {
1611308b 632 // Prediction process runs on the machine learning backend.
a40952d3 633 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
634 $result->status = $predictorresult->status;
635 $result->info = $predictorresult->info;
1611308b
DM
636 $result->predictions = $this->format_predictor_predictions($predictorresult);
637 }
638
639 if ($result->predictions) {
640 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
641 }
642
643 if (!empty($samplecontexts) && $this->uses_insights()) {
644 $this->trigger_insights($samplecontexts);
645 }
646
647 $this->flag_file_as_used($samplesfile, 'predicted');
648
649 return $result;
650 }
651
652 /**
653 * Formats the predictor results.
654 *
655 * @param array $predictorresult
656 * @return array
657 */
658 private function format_predictor_predictions($predictorresult) {
659
660 $predictions = array();
661 if ($predictorresult->predictions) {
662 foreach ($predictorresult->predictions as $sampleinfo) {
663
413f19bc 664 // We parse each prediction.
1611308b
DM
665 switch (count($sampleinfo)) {
666 case 1:
667 // For whatever reason the predictions processor could not process this sample, we
668 // skip it and do nothing with it.
669 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
670 $sampleinfo[0], DEBUG_DEVELOPER);
671 continue;
672 case 2:
673 // Prediction processors that do not return a prediction score will have the maximum prediction
674 // score.
675 list($uniquesampleid, $prediction) = $sampleinfo;
676 $predictionscore = 1;
677 break;
678 case 3:
679 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
680 break;
681 default:
682 break;
a40952d3 683 }
1611308b
DM
684 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
685 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
686 }
687 }
1611308b
DM
688 return $predictions;
689 }
690
691 /**
692 * Execute the prediction callbacks defined by the target.
693 *
694 * @param \stdClass[] $predictions
413f19bc 695 * @param array $indicatorcalculations
1611308b
DM
696 * @return array
697 */
698 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
699
700 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
701 $samplecontexts = array();
702
1611308b 703 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 704
1611308b 705 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 706
1611308b
DM
707 // The unique sample id contains both the sampleid and the rangeindex.
708 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 709
1611308b 710 // Store the predicted values.
413f19bc
DM
711 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction,
712 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 713
1611308b
DM
714 // Also store all samples context to later generate insights or whatever action the target wants to perform.
715 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 716
1611308b
DM
717 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
718 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
719 }
720 }
721
1611308b
DM
722 return $samplecontexts;
723 }
369389c9 724
1611308b
DM
725 /**
726 * Generates insights and updates the cache.
727 *
728 * @param \context[] $samplecontexts
729 * @return void
730 */
731 protected function trigger_insights($samplecontexts) {
732
733 // Notify the target that all predictions have been processed.
734 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
735
736 // Update cache.
737 $cache = \cache::make('core', 'contextwithinsights');
738 foreach ($samplecontexts as $context) {
739 $modelids = $cache->get($context->id);
740 if (!$modelids) {
741 // The cache is empty, but we don't know if it is empty because there are no insights
742 // in this context or because cache/s have been purged, we need to be conservative and
743 // "pay" 1 db read to fill up the cache.
744 $models = \core_analytics\manager::get_models_with_insights($context);
745 $cache->set($context->id, array_keys($models));
746 } else if (!in_array($this->get_id(), $modelids)) {
747 array_push($modelids, $this->get_id());
748 $cache->set($context->id, $modelids);
369389c9
DM
749 }
750 }
369389c9
DM
751 }
752
a40952d3 753 /**
1611308b 754 * Get predictions from a static model.
a40952d3
DM
755 *
756 * @param array $indicatorcalculations
757 * @return \stdClass[]
758 */
759 protected function get_static_predictions(&$indicatorcalculations) {
760
761 // Group samples by analysable for \core_analytics\local\target::calculate.
762 $analysables = array();
763 // List all sampleids together.
764 $sampleids = array();
765
766 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
767 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
768
769 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
770 $analysableclass = get_class($analysable);
771 if (empty($analysables[$analysableclass])) {
772 $analysables[$analysableclass] = array();
773 }
774 if (empty($analysables[$analysableclass][$rangeindex])) {
775 $analysables[$analysableclass][$rangeindex] = (object)[
776 'analysable' => $analysable,
777 'indicatorsdata' => array(),
778 'sampleids' => array()
779 ];
780 }
781 // Using the sampleid as a key so we can easily merge indicators data later.
782 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
783 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
784 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
785
786 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
787 $sampleids[$sampleid] = $sampleid;
788 }
789
790 // Get all samples data.
791 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
792
793 // Calculate the targets.
1cc2b4ba 794 $predictions = array();
a40952d3
DM
795 foreach ($analysables as $analysableclass => $rangedata) {
796 foreach ($rangedata as $rangeindex => $data) {
797
798 // Attach samples data and calculated indicators data.
799 $this->get_target()->clear_sample_data();
800 $this->get_target()->add_sample_data($samplesdata);
801 $this->get_target()->add_sample_data($data->indicatorsdata);
802
1611308b 803 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 804 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 805 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
806 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
807
808 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
809 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
810 // by self::save_prediction.
811 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
812 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
813 if (!isset($calculations[$sampleid])) {
a40952d3
DM
814 return false;
815 }
816 return true;
817 }, ARRAY_FILTER_USE_BOTH);
818
819 foreach ($calculations as $sampleid => $value) {
820
821 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
822
823 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
824 if (is_null($calculations[$sampleid])) {
a40952d3
DM
825 unset($indicatorcalculations[$uniquesampleid]);
826 continue;
827 }
828
829 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
830 // true according to what the developer defined.
831 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
832 }
833 }
834 }
835 return $predictions;
836 }
837
369389c9 838 /**
1cc2b4ba 839 * Stores the prediction in the database.
369389c9
DM
840 *
841 * @param int $sampleid
842 * @param int $rangeindex
843 * @param int $prediction
844 * @param float $predictionscore
845 * @param string $calculations
846 * @return \context
847 */
848 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
849 global $DB;
850
851 $context = $this->get_analyser()->sample_access_context($sampleid);
852
853 $record = new \stdClass();
854 $record->modelid = $this->model->id;
855 $record->contextid = $context->id;
856 $record->sampleid = $sampleid;
857 $record->rangeindex = $rangeindex;
858 $record->prediction = $prediction;
859 $record->predictionscore = $predictionscore;
860 $record->calculations = $calculations;
861 $record->timecreated = time();
862 $DB->insert_record('analytics_predictions', $record);
863
864 return $context;
865 }
866
867 /**
1cc2b4ba 868 * Enabled the model using the provided time splitting method.
369389c9
DM
869 *
870 * @param string $timesplittingid
871 * @return void
872 */
873 public function enable($timesplittingid = false) {
874 global $DB;
875
1611308b
DM
876 \core_analytics\manager::check_can_manage_models();
877
369389c9
DM
878 $now = time();
879
880 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
881
882 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
883 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
884 }
885
886 if (substr($timesplittingid, 0, 1) !== '\\') {
887 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
888 }
889
890 $this->model->timesplitting = $timesplittingid;
891 $this->model->version = $now;
892 }
893 $this->model->enabled = 1;
894 $this->model->timemodified = $now;
895
896 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
897 $DB->update_record('analytics_models', $this->model);
898
899 // It needs to be reset (just in case, we may already used it).
900 $this->uniqueid = null;
901 }
902
a40952d3 903 /**
1cc2b4ba
DM
904 * Is this a static model (as defined by the target)?.
905 *
906 * Static models are based on assumptions instead of in machine learning
907 * backends results.
a40952d3
DM
908 *
909 * @return bool
910 */
911 public function is_static() {
912 return (bool)$this->get_target()->based_on_assumptions();
913 }
914
369389c9 915 /**
1cc2b4ba 916 * Is this model enabled?
369389c9
DM
917 *
918 * @return bool
919 */
920 public function is_enabled() {
921 return (bool)$this->model->enabled;
922 }
923
924 /**
1cc2b4ba 925 * Is this model already trained?
369389c9
DM
926 *
927 * @return bool
928 */
929 public function is_trained() {
a40952d3
DM
930 // Models which targets are based on assumptions do not need training.
931 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
932 }
933
934 /**
1cc2b4ba 935 * Marks the model as trained
369389c9
DM
936 *
937 * @return void
938 */
939 public function mark_as_trained() {
940 global $DB;
941
1611308b
DM
942 \core_analytics\manager::check_can_manage_models();
943
369389c9
DM
944 $this->model->trained = 1;
945 $DB->update_record('analytics_models', $this->model);
946 }
947
948 /**
1cc2b4ba 949 * Get the contexts with predictions.
369389c9
DM
950 *
951 * @return \stdClass[]
952 */
953 public function get_predictions_contexts() {
954 global $DB;
955
956 $sql = "SELECT DISTINCT contextid FROM {analytics_predictions} WHERE modelid = ?";
957 return $DB->get_records_sql($sql, array($this->model->id));
958 }
959
f9e7447f
DM
960 /**
961 * Has this model generated predictions?
962 *
963 * We don't check analytics_predictions table because targets have the ability to
964 * ignore some predicted values, if that is the case predictions are not even stored
965 * in db.
966 *
967 * @return bool
968 */
969 public function any_prediction_obtained() {
970 global $DB;
971 return $DB->record_exists('analytics_predict_ranges',
972 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
973 }
974
975 /**
976 * Whether this model generates insights or not (defined by the model's target).
977 *
978 * @return bool
979 */
980 public function uses_insights() {
981 $target = $this->get_target();
982 return $target::uses_insights();
983 }
984
369389c9
DM
985 /**
986 * Whether predictions exist for this context.
987 *
988 * @param \context $context
989 * @return bool
990 */
991 public function predictions_exist(\context $context) {
992 global $DB;
993
994 // Filters out previous predictions keeping only the last time range one.
995 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 996 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
997 return $DB->record_exists_select('analytics_predictions', $select, $params);
998 }
999
1000 /**
1001 * Gets the predictions for this context.
1002 *
1003 * @param \context $context
21d4ae93
DM
1004 * @param int $page The page of results to fetch. False for all results.
1005 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1006 * @return array($total, \core_analytics\prediction[])
369389c9 1007 */
21d4ae93 1008 public function get_predictions(\context $context, $page = false, $perpage = 100) {
369389c9
DM
1009 global $DB;
1010
1611308b
DM
1011 \core_analytics\manager::check_can_list_insights($context);
1012
369389c9
DM
1013 // Filters out previous predictions keeping only the last time range one.
1014 $sql = "SELECT tip.*
1015 FROM {analytics_predictions} tip
1016 JOIN (
1017 SELECT sampleid, max(rangeindex) AS rangeindex
1018 FROM {analytics_predictions}
1019 WHERE modelid = ? and contextid = ?
1020 GROUP BY sampleid
1021 ) tipsub
1022 ON tip.sampleid = tipsub.sampleid AND tip.rangeindex = tipsub.rangeindex
1023 WHERE tip.modelid = ? and tip.contextid = ?";
1024 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
1025 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1026 return array();
1027 }
1028
1029 // Get predicted samples' ids.
1030 $sampleids = array_map(function($prediction) {
1031 return $prediction->sampleid;
1032 }, $predictions);
1033
1034 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1035
68bfe1de
DW
1036
1037 $current = 0;
21d4ae93
DM
1038
1039 if ($page !== false) {
1040 $offset = $page * $perpage;
1041 $limit = $offset + $perpage;
1042 }
68bfe1de 1043
369389c9
DM
1044 foreach ($predictions as $predictionid => $predictiondata) {
1045
1046 $sampleid = $predictiondata->sampleid;
1047
1048 // Filter out predictions which samples are not available anymore.
1049 if (empty($samplesdata[$sampleid])) {
1050 unset($predictions[$predictionid]);
1051 continue;
1052 }
1053
68bfe1de 1054 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1055 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1056 // Replace \stdClass object by \core_analytics\prediction objects.
1057 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1058 $predictions[$predictionid] = $prediction;
1059 } else {
1060 unset($predictions[$predictionid]);
1061 }
369389c9 1062
68bfe1de 1063 $current++;
369389c9
DM
1064 }
1065
68bfe1de 1066 return [$current, $predictions];
369389c9
DM
1067 }
1068
1069 /**
1611308b 1070 * Returns the sample data of a prediction.
369389c9
DM
1071 *
1072 * @param \stdClass $predictionobj
1073 * @return array
1074 */
1075 public function prediction_sample_data($predictionobj) {
1076
1077 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1078
1079 if (empty($samplesdata[$predictionobj->sampleid])) {
1080 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1081 }
1082
1083 return $samplesdata[$predictionobj->sampleid];
1084 }
1085
1086 /**
1611308b 1087 * Returns the description of a sample
369389c9
DM
1088 *
1089 * @param \core_analytics\prediction $prediction
1090 * @return array 2 elements: list(string, \renderable)
1091 */
1092 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1093 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1094 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1095 }
1096
1097 /**
1098 * Returns the output directory for prediction processors.
1099 *
1100 * Directory structure as follows:
1101 * - Evaluation runs:
1102 * models/$model->id/$model->version/evaluation/$model->timesplitting
1103 * - Training & prediction runs:
1104 * models/$model->id/$model->version/execution
1105 *
1106 * @param array $subdirs
1107 * @return string
1108 */
1109 protected function get_output_dir($subdirs = array()) {
1110 global $CFG;
1111
1112 $subdirstr = '';
1113 foreach ($subdirs as $subdir) {
1114 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1115 }
1116
1117 $outputdir = get_config('analytics', 'modeloutputdir');
1118 if (empty($outputdir)) {
1119 // Apply default value.
1120 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1121 }
1122
1123 // Append model id and version + subdirs.
1124 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1125
1126 make_writable_directory($outputdir);
1127
1128 return $outputdir;
1129 }
1130
1131 /**
1cc2b4ba
DM
1132 * Returns a unique id for this model.
1133 *
1134 * This id should be unique for this site.
369389c9
DM
1135 *
1136 * @return string
1137 */
1138 public function get_unique_id() {
1139 global $CFG;
1140
1141 if (!is_null($this->uniqueid)) {
1142 return $this->uniqueid;
1143 }
1144
1145 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1146 // that the model target and indicators were updated.
1147 $ids = array($CFG->wwwroot, $CFG->dirroot, $CFG->prefix, $this->model->id, $this->model->version);
1148 $this->uniqueid = sha1(implode('$$', $ids));
1149
1150 return $this->uniqueid;
1151 }
1152
1153 /**
1154 * Exports the model data.
1155 *
1156 * @return \stdClass
1157 */
1158 public function export() {
1611308b
DM
1159
1160 \core_analytics\manager::check_can_manage_models();
1161
369389c9
DM
1162 $data = clone $this->model;
1163 $data->target = $this->get_target()->get_name();
1164
1165 if ($timesplitting = $this->get_time_splitting()) {
1166 $data->timesplitting = $timesplitting->get_name();
1167 }
1168
1169 $data->indicators = array();
1170 foreach ($this->get_indicators() as $indicator) {
1171 $data->indicators[] = $indicator->get_name();
1172 }
1173 return $data;
1174 }
1175
584ffa4f
DM
1176 /**
1177 * Returns the model logs data.
1178 *
1179 * @param int $limitfrom
1180 * @param int $limitnum
1181 * @return \stdClass[]
1182 */
1183 public function get_logs($limitfrom = 0, $limitnum = 0) {
1184 global $DB;
1611308b
DM
1185
1186 \core_analytics\manager::check_can_manage_models();
1187
584ffa4f
DM
1188 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1189 $limitfrom, $limitnum);
1190 }
1191
369389c9 1192 /**
1cc2b4ba 1193 * Flag the provided file as used for training or prediction.
369389c9
DM
1194 *
1195 * @param \stored_file $file
1196 * @param string $action
1197 * @return void
1198 */
1199 protected function flag_file_as_used(\stored_file $file, $action) {
1200 global $DB;
1201
1202 $usedfile = new \stdClass();
1203 $usedfile->modelid = $this->model->id;
1204 $usedfile->fileid = $file->get_id();
1205 $usedfile->action = $action;
1206 $usedfile->time = time();
1207 $DB->insert_record('analytics_used_files', $usedfile);
1208 }
1209
1210 /**
1cc2b4ba 1211 * Log the evaluation results in the database.
369389c9
DM
1212 *
1213 * @param string $timesplittingid
1214 * @param float $score
1215 * @param string $dir
1216 * @param array $info
1217 * @return int The inserted log id
1218 */
1219 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1220 global $DB, $USER;
1221
1222 $log = new \stdClass();
1223 $log->modelid = $this->get_id();
1224 $log->version = $this->model->version;
1225 $log->target = $this->model->target;
1226 $log->indicators = $this->model->indicators;
1227 $log->timesplitting = $timesplittingid;
1228 $log->dir = $dir;
1229 if ($info) {
1230 // Ensure it is not an associative array.
1231 $log->info = json_encode(array_values($info));
1232 }
1233 $log->score = $score;
1234 $log->timecreated = time();
1235 $log->usermodified = $USER->id;
1236
1237 return $DB->insert_record('analytics_models_log', $log);
1238 }
1239
1240 /**
1241 * Utility method to return indicator class names from a list of indicator objects
1242 *
1243 * @param \core_analytics\local\indicator\base[] $indicators
1244 * @return string[]
1245 */
1246 private static function indicator_classes($indicators) {
1247
1248 // What we want to check and store are the indicator classes not the keys.
1249 $indicatorclasses = array();
1250 foreach ($indicators as $indicator) {
1251 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1252 if (!is_object($indicator) && !is_scalar($indicator)) {
1253 $indicator = strval($indicator);
1254 } else if (is_object($indicator)) {
3a396286 1255 $indicator = '\\' . get_class($indicator);
369389c9
DM
1256 }
1257 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1258 }
b0c24929 1259 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1260 }
1261
1262 return $indicatorclasses;
1263 }
1264
1265 /**
1266 * Clears the model training and prediction data.
1267 *
1268 * Executed after updating model critical elements like the time splitting method
1269 * or the indicators.
1270 *
1271 * @return void
1272 */
1273 private function clear_model() {
1274 global $DB;
1275
1276 $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
1277 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1278 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1279 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1280
1611308b
DM
1281 // We don't expect people to clear models regularly and the cost of filling the cache is
1282 // 1 db read per context.
1283 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1284 $cache->purge();
369389c9
DM
1285 }
1286
1611308b
DM
1287 /**
1288 * Increases system memory and time limits.
1289 *
1290 * @return void
1291 */
1292 private function heavy_duty_mode() {
369389c9
DM
1293 if (ini_get('memory_limit') != -1) {
1294 raise_memory_limit(MEMORY_HUGE);
1295 }
1611308b 1296 \core_php_time_limit::raise();
369389c9 1297 }
369389c9 1298}