MDL-59265 analytics: Remove dirroot from the model unique id
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
369389c9 56 const EVALUATE_LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
369389c9
DM
61 const EVALUATE_NOT_ENOUGH_DATA = 8;
62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc
DM
82
83 /**
84 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
85 */
369389c9 86 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
87
88 /**
89 * Number of evaluation repetitions.
90 */
369389c9
DM
91 const EVALUATION_ITERATIONS = 10;
92
93 /**
94 * @var \stdClass
95 */
96 protected $model = null;
97
98 /**
99 * @var \core_analytics\local\analyser\base
100 */
101 protected $analyser = null;
102
103 /**
104 * @var \core_analytics\local\target\base
105 */
106 protected $target = null;
107
108 /**
109 * @var \core_analytics\local\indicator\base[]
110 */
111 protected $indicators = null;
112
113 /**
114 * Unique Model id created from site info and last model modification.
115 *
116 * @var string
117 */
118 protected $uniqueid = null;
119
120 /**
1cc2b4ba 121 * Constructor.
369389c9 122 *
1cc2b4ba 123 * @param int|\stdClass $model
369389c9
DM
124 * @return void
125 */
126 public function __construct($model) {
127 global $DB;
128
129 if (is_scalar($model)) {
1611308b 130 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
131 if (!$model) {
132 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
133 }
369389c9
DM
134 }
135 $this->model = $model;
136 }
137
3a396286
DM
138 /**
139 * Quick safety check to discard site models which required components are not available anymore.
140 *
141 * @return bool
142 */
143 public function is_available() {
144 $target = $this->get_target();
145 if (!$target) {
146 return false;
147 }
3a396286
DM
148
149 $classname = $target->get_analyser_class();
150 if (!class_exists($classname)) {
151 return false;
152 }
153
154 return true;
155 }
156
369389c9 157 /**
1cc2b4ba 158 * Returns the model id.
369389c9
DM
159 *
160 * @return int
161 */
162 public function get_id() {
163 return $this->model->id;
164 }
165
166 /**
1cc2b4ba 167 * Returns a plain \stdClass with the model data.
369389c9
DM
168 *
169 * @return \stdClass
170 */
171 public function get_model_obj() {
172 return $this->model;
173 }
174
175 /**
1cc2b4ba 176 * Returns the model target.
369389c9
DM
177 *
178 * @return \core_analytics\local\target\base
179 */
180 public function get_target() {
181 if ($this->target !== null) {
182 return $this->target;
183 }
184 $instance = \core_analytics\manager::get_target($this->model->target);
185 $this->target = $instance;
186
187 return $this->target;
188 }
189
190 /**
1cc2b4ba 191 * Returns the model indicators.
369389c9
DM
192 *
193 * @return \core_analytics\local\indicator\base[]
194 */
195 public function get_indicators() {
196 if ($this->indicators !== null) {
197 return $this->indicators;
198 }
199
200 $fullclassnames = json_decode($this->model->indicators);
201
202 if (!is_array($fullclassnames)) {
203 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
204 }
205
206 $this->indicators = array();
207 foreach ($fullclassnames as $fullclassname) {
208 $instance = \core_analytics\manager::get_indicator($fullclassname);
209 if ($instance) {
210 $this->indicators[$fullclassname] = $instance;
211 } else {
212 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
213 }
214 }
215
216 return $this->indicators;
217 }
218
219 /**
220 * Returns the list of indicators that could potentially be used by the model target.
221 *
222 * It includes the indicators that are part of the model.
223 *
a40952d3 224 * @return \core_analytics\local\indicator\base[]
369389c9
DM
225 */
226 public function get_potential_indicators() {
227
228 $indicators = \core_analytics\manager::get_all_indicators();
229
230 if (empty($this->analyser)) {
231 $this->init_analyser(array('evaluation' => true));
232 }
233
234 foreach ($indicators as $classname => $indicator) {
235 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
236 unset($indicators[$classname]);
237 }
238 }
239 return $indicators;
240 }
241
242 /**
1cc2b4ba 243 * Returns the model analyser (defined by the model target).
369389c9
DM
244 *
245 * @return \core_analytics\local\analyser\base
246 */
247 public function get_analyser() {
248 if ($this->analyser !== null) {
249 return $this->analyser;
250 }
251
252 // Default initialisation with no options.
253 $this->init_analyser();
254
255 return $this->analyser;
256 }
257
258 /**
1cc2b4ba 259 * Initialises the model analyser.
369389c9 260 *
1cc2b4ba 261 * @throws \coding_exception
369389c9
DM
262 * @param array $options
263 * @return void
264 */
265 protected function init_analyser($options = array()) {
266
267 $target = $this->get_target();
268 $indicators = $this->get_indicators();
269
270 if (empty($target)) {
271 throw new \moodle_exception('errornotarget', 'analytics');
272 }
273
274 if (!empty($options['evaluation'])) {
275 // The evaluation process will run using all available time splitting methods unless one is specified.
276 if (!empty($options['timesplitting'])) {
277 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
278 $timesplittings = array($timesplitting->get_id() => $timesplitting);
279 } else {
280 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
281 }
282 } else {
283
284 if (empty($this->model->timesplitting)) {
285 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
286 }
287
288 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
289 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
290 }
291
292 if (empty($timesplittings)) {
293 throw new \moodle_exception('errornotimesplittings', 'analytics');
294 }
295
0690a271
DM
296 if (!empty($options['evaluation'])) {
297 foreach ($timesplittings as $timesplitting) {
298 $timesplitting->set_evaluating(true);
299 }
300 }
301
369389c9
DM
302 $classname = $target->get_analyser_class();
303 if (!class_exists($classname)) {
08015e18 304 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
305 }
306
307 // Returns a \core_analytics\local\analyser\base class.
308 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
309 }
310
311 /**
1cc2b4ba 312 * Returns the model time splitting method.
369389c9 313 *
1cc2b4ba 314 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
315 */
316 public function get_time_splitting() {
317 if (empty($this->model->timesplitting)) {
318 return false;
319 }
320 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
321 }
322
323 /**
a40952d3 324 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
325 *
326 * @param \core_analytics\local\target\base $target
327 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 328 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
329 * @return \core_analytics\model
330 */
a40952d3 331 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
332 global $USER, $DB;
333
1611308b
DM
334 \core_analytics\manager::check_can_manage_models();
335
369389c9
DM
336 $indicatorclasses = self::indicator_classes($indicators);
337
338 $now = time();
339
340 $modelobj = new \stdClass();
b0c24929 341 $modelobj->target = $target->get_id();
369389c9
DM
342 $modelobj->indicators = json_encode($indicatorclasses);
343 $modelobj->version = $now;
344 $modelobj->timecreated = $now;
345 $modelobj->timemodified = $now;
346 $modelobj->usermodified = $USER->id;
347
348 $id = $DB->insert_record('analytics_models', $modelobj);
349
350 // Get db defaults.
351 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
352
a40952d3
DM
353 $model = new static($modelobj);
354
355 if ($timesplittingid) {
356 $model->enable($timesplittingid);
357 }
358
359 if ($model->is_static()) {
360 $model->mark_as_trained();
361 }
362
363 return $model;
369389c9
DM
364 }
365
e709e544
DM
366 /**
367 * Does this model exist?
368 *
369 * If no indicators are provided it considers any model with the provided
370 * target a match.
371 *
372 * @param \core_analytics\local\target\base $target
373 * @param \core_analytics\local\indicator\base[]|false $indicators
374 * @return bool
375 */
376 public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
377 global $DB;
378
379 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
380
381 if (!$indicators && $existingmodels) {
382 return true;
383 }
384
385 $indicatorids = array_keys($indicators);
386 sort($indicatorids);
387
388 foreach ($existingmodels as $modelobj) {
389 $model = new \core_analytics\model($modelobj);
390 $modelindicatorids = array_keys($model->get_indicators());
391 sort($modelindicatorids);
392
393 if ($indicatorids === $modelindicatorids) {
394 return true;
395 }
396 }
397 return false;
398 }
399
a40952d3 400 /**
1cc2b4ba 401 * Updates the model.
a40952d3
DM
402 *
403 * @param int|bool $enabled
5c140ac4
DM
404 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
405 * @param string|false $timesplittingid False to respect current time splitting method
a40952d3
DM
406 * @return void
407 */
5c140ac4 408 public function update($enabled, $indicators = false, $timesplittingid = '') {
369389c9
DM
409 global $USER, $DB;
410
1611308b
DM
411 \core_analytics\manager::check_can_manage_models();
412
369389c9
DM
413 $now = time();
414
5c140ac4
DM
415 if ($indicators !== false) {
416 $indicatorclasses = self::indicator_classes($indicators);
417 $indicatorsstr = json_encode($indicatorclasses);
418 } else {
419 // Respect current value.
420 $indicatorsstr = $this->model->indicators;
421 }
422
423 if ($timesplittingid === false) {
424 // Respect current value.
425 $timesplittingid = $this->model->timesplitting;
426 }
369389c9 427
a40952d3 428 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
429 $this->model->indicators !== $indicatorsstr) {
430 // We update the version of the model so different time splittings are not mixed up.
431 $this->model->version = $now;
432
433 // Delete generated predictions.
434 $this->clear_model();
435
436 // Purge all generated files.
437 \core_analytics\dataset_manager::clear_model_files($this->model->id);
438
439 // Reset trained flag.
440 $this->model->trained = 0;
441 }
a40952d3 442 $this->model->enabled = intval($enabled);
369389c9 443 $this->model->indicators = $indicatorsstr;
a40952d3 444 $this->model->timesplitting = $timesplittingid;
369389c9
DM
445 $this->model->timemodified = $now;
446 $this->model->usermodified = $USER->id;
447
448 $DB->update_record('analytics_models', $this->model);
449
450 // It needs to be reset (just in case, we may already used it).
451 $this->uniqueid = null;
452 }
453
d16cf374
DM
454 /**
455 * Removes the model.
456 *
457 * @return void
458 */
d8327b60 459 public function delete() {
d16cf374 460 global $DB;
1611308b
DM
461
462 \core_analytics\manager::check_can_manage_models();
463
d16cf374 464 $this->clear_model();
d8327b60 465 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
466 }
467
369389c9 468 /**
1cc2b4ba 469 * Evaluates the model.
369389c9 470 *
1cc2b4ba
DM
471 * This method gets the site contents (through the analyser) creates a .csv dataset
472 * with them and evaluates the model prediction accuracy multiple times using the
473 * machine learning backend. It returns an object where the model score is the average
474 * prediction accuracy of all executed evaluations.
369389c9
DM
475 *
476 * @param array $options
477 * @return \stdClass[]
478 */
479 public function evaluate($options = array()) {
480
1611308b
DM
481 \core_analytics\manager::check_can_manage_models();
482
a40952d3
DM
483 if ($this->is_static()) {
484 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
485 $result = new \stdClass();
cbf4c391
DM
486 $result->status = self::NO_DATASET;
487 return array($this->get_time_splitting()->get_id() => $result);
a40952d3
DM
488 }
489
369389c9
DM
490 $options['evaluation'] = true;
491 $this->init_analyser($options);
492
493 if (empty($this->get_indicators())) {
494 throw new \moodle_exception('errornoindicators', 'analytics');
495 }
496
1611308b
DM
497 $this->heavy_duty_mode();
498
369389c9
DM
499 // Before get_labelled_data call so we get an early exception if it is not ready.
500 $predictor = \core_analytics\manager::get_predictions_processor();
501
502 $datasets = $this->get_analyser()->get_labelled_data();
503
504 // No datasets generated.
505 if (empty($datasets)) {
506 $result = new \stdClass();
507 $result->status = self::NO_DATASET;
508 $result->info = $this->get_analyser()->get_logs();
509 return array($result);
510 }
511
512 if (!PHPUNIT_TEST && CLI_SCRIPT) {
513 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
514 }
515
516 $results = array();
517 foreach ($datasets as $timesplittingid => $dataset) {
518
519 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
520
521 $result = new \stdClass();
522
523 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
524 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
525
526 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
527 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
528 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
529
530 $result->status = $predictorresult->status;
531 $result->info = $predictorresult->info;
532
533 if (isset($predictorresult->score)) {
534 $result->score = $predictorresult->score;
535 } else {
536 // Prediction processors may return an error, default to 0 score in that case.
537 $result->score = 0;
538 }
539
540 $dir = false;
541 if (!empty($predictorresult->dir)) {
542 $dir = $predictorresult->dir;
543 }
544
545 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
546
547 $results[$timesplitting->get_id()] = $result;
548 }
549
550 return $results;
551 }
552
553 /**
1cc2b4ba
DM
554 * Trains the model using the site contents.
555 *
556 * This method prepares a dataset from the site contents (through the analyser)
557 * and passes it to the machine learning backends. Static models are skipped as
558 * they do not require training.
369389c9
DM
559 *
560 * @return \stdClass
561 */
562 public function train() {
369389c9 563
1611308b
DM
564 \core_analytics\manager::check_can_manage_models();
565
a40952d3
DM
566 if ($this->is_static()) {
567 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
568 $result = new \stdClass();
569 $result->status = self::OK;
570 return $result;
571 }
572
a40952d3 573 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
574 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
575 }
576
577 if (empty($this->get_indicators())) {
578 throw new \moodle_exception('errornoindicators', 'analytics');
579 }
580
1611308b
DM
581 $this->heavy_duty_mode();
582
369389c9
DM
583 // Before get_labelled_data call so we get an early exception if it is not writable.
584 $outputdir = $this->get_output_dir(array('execution'));
585
586 // Before get_labelled_data call so we get an early exception if it is not ready.
587 $predictor = \core_analytics\manager::get_predictions_processor();
588
589 $datasets = $this->get_analyser()->get_labelled_data();
590
591 // No training if no files have been provided.
592 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
593
594 $result = new \stdClass();
595 $result->status = self::NO_DATASET;
596 $result->info = $this->get_analyser()->get_logs();
597 return $result;
598 }
599 $samplesfile = $datasets[$this->model->timesplitting];
600
601 // Train using the dataset.
602 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
603
604 $result = new \stdClass();
605 $result->status = $predictorresult->status;
606 $result->info = $predictorresult->info;
607
608 $this->flag_file_as_used($samplesfile, 'trained');
609
610 // Mark the model as trained if it wasn't.
611 if ($this->model->trained == false) {
612 $this->mark_as_trained();
613 }
614
615 return $result;
616 }
617
618 /**
1cc2b4ba
DM
619 * Get predictions from the site contents.
620 *
621 * It analyses the site contents (through analyser classes) looking for samples
622 * ready to receive predictions. It generates a dataset with all samples ready to
623 * get predictions and it passes it to the machine learning backends or to the
624 * targets based on assumptions to get the predictions.
369389c9
DM
625 *
626 * @return \stdClass
627 */
628 public function predict() {
629 global $DB;
630
1611308b 631 \core_analytics\manager::check_can_manage_models();
369389c9 632
a40952d3 633 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
634 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
635 }
636
637 if (empty($this->get_indicators())) {
638 throw new \moodle_exception('errornoindicators', 'analytics');
639 }
640
1611308b
DM
641 $this->heavy_duty_mode();
642
369389c9
DM
643 // Before get_unlabelled_data call so we get an early exception if it is not writable.
644 $outputdir = $this->get_output_dir(array('execution'));
645
646 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
647 if (!$this->is_static()) {
648 $predictor = \core_analytics\manager::get_predictions_processor();
649 }
369389c9
DM
650
651 $samplesdata = $this->get_analyser()->get_unlabelled_data();
652
653 // Get the prediction samples file.
654 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
655
656 $result = new \stdClass();
657 $result->status = self::NO_DATASET;
658 $result->info = $this->get_analyser()->get_logs();
659 return $result;
660 }
661 $samplesfile = $samplesdata[$this->model->timesplitting];
662
663 // We need to throw an exception if we are trying to predict stuff that was already predicted.
664 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
665 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
666 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
667 }
668
a40952d3 669 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 670
a40952d3 671 // Prepare the results object.
369389c9 672 $result = new \stdClass();
369389c9 673
a40952d3
DM
674 if ($this->is_static()) {
675 // Prediction based on assumptions.
413f19bc 676 $result->status = self::OK;
a40952d3
DM
677 $result->info = [];
678 $result->predictions = $this->get_static_predictions($indicatorcalculations);
679
680 } else {
1611308b 681 // Prediction process runs on the machine learning backend.
a40952d3 682 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
683 $result->status = $predictorresult->status;
684 $result->info = $predictorresult->info;
1611308b
DM
685 $result->predictions = $this->format_predictor_predictions($predictorresult);
686 }
687
688 if ($result->predictions) {
689 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
690 }
691
692 if (!empty($samplecontexts) && $this->uses_insights()) {
693 $this->trigger_insights($samplecontexts);
694 }
695
696 $this->flag_file_as_used($samplesfile, 'predicted');
697
698 return $result;
699 }
700
701 /**
702 * Formats the predictor results.
703 *
704 * @param array $predictorresult
705 * @return array
706 */
707 private function format_predictor_predictions($predictorresult) {
708
709 $predictions = array();
710 if ($predictorresult->predictions) {
711 foreach ($predictorresult->predictions as $sampleinfo) {
712
413f19bc 713 // We parse each prediction.
1611308b
DM
714 switch (count($sampleinfo)) {
715 case 1:
716 // For whatever reason the predictions processor could not process this sample, we
717 // skip it and do nothing with it.
718 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
719 $sampleinfo[0], DEBUG_DEVELOPER);
720 continue;
721 case 2:
722 // Prediction processors that do not return a prediction score will have the maximum prediction
723 // score.
724 list($uniquesampleid, $prediction) = $sampleinfo;
725 $predictionscore = 1;
726 break;
727 case 3:
728 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
729 break;
730 default:
731 break;
a40952d3 732 }
1611308b
DM
733 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
734 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
735 }
736 }
1611308b
DM
737 return $predictions;
738 }
739
740 /**
741 * Execute the prediction callbacks defined by the target.
742 *
743 * @param \stdClass[] $predictions
413f19bc 744 * @param array $indicatorcalculations
1611308b
DM
745 * @return array
746 */
747 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
748
749 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
750 $samplecontexts = array();
751
1611308b 752 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 753
1611308b 754 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 755
1611308b
DM
756 // The unique sample id contains both the sampleid and the rangeindex.
757 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 758
1611308b 759 // Store the predicted values.
413f19bc
DM
760 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction,
761 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 762
1611308b
DM
763 // Also store all samples context to later generate insights or whatever action the target wants to perform.
764 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 765
1611308b
DM
766 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
767 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
768 }
769 }
770
1611308b
DM
771 return $samplecontexts;
772 }
369389c9 773
1611308b
DM
774 /**
775 * Generates insights and updates the cache.
776 *
777 * @param \context[] $samplecontexts
778 * @return void
779 */
780 protected function trigger_insights($samplecontexts) {
781
782 // Notify the target that all predictions have been processed.
783 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
784
785 // Update cache.
786 $cache = \cache::make('core', 'contextwithinsights');
787 foreach ($samplecontexts as $context) {
788 $modelids = $cache->get($context->id);
789 if (!$modelids) {
790 // The cache is empty, but we don't know if it is empty because there are no insights
791 // in this context or because cache/s have been purged, we need to be conservative and
792 // "pay" 1 db read to fill up the cache.
793 $models = \core_analytics\manager::get_models_with_insights($context);
794 $cache->set($context->id, array_keys($models));
795 } else if (!in_array($this->get_id(), $modelids)) {
796 array_push($modelids, $this->get_id());
797 $cache->set($context->id, $modelids);
369389c9
DM
798 }
799 }
369389c9
DM
800 }
801
a40952d3 802 /**
1611308b 803 * Get predictions from a static model.
a40952d3
DM
804 *
805 * @param array $indicatorcalculations
806 * @return \stdClass[]
807 */
808 protected function get_static_predictions(&$indicatorcalculations) {
809
810 // Group samples by analysable for \core_analytics\local\target::calculate.
811 $analysables = array();
812 // List all sampleids together.
813 $sampleids = array();
814
815 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
816 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
817
818 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
819 $analysableclass = get_class($analysable);
820 if (empty($analysables[$analysableclass])) {
821 $analysables[$analysableclass] = array();
822 }
823 if (empty($analysables[$analysableclass][$rangeindex])) {
824 $analysables[$analysableclass][$rangeindex] = (object)[
825 'analysable' => $analysable,
826 'indicatorsdata' => array(),
827 'sampleids' => array()
828 ];
829 }
830 // Using the sampleid as a key so we can easily merge indicators data later.
831 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
832 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
833 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
834
835 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
836 $sampleids[$sampleid] = $sampleid;
837 }
838
839 // Get all samples data.
840 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
841
842 // Calculate the targets.
1cc2b4ba 843 $predictions = array();
a40952d3
DM
844 foreach ($analysables as $analysableclass => $rangedata) {
845 foreach ($rangedata as $rangeindex => $data) {
846
847 // Attach samples data and calculated indicators data.
848 $this->get_target()->clear_sample_data();
849 $this->get_target()->add_sample_data($samplesdata);
850 $this->get_target()->add_sample_data($data->indicatorsdata);
851
1611308b 852 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 853 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 854 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
855 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
856
857 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
858 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
859 // by self::save_prediction.
860 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
861 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
862 if (!isset($calculations[$sampleid])) {
a40952d3
DM
863 return false;
864 }
865 return true;
866 }, ARRAY_FILTER_USE_BOTH);
867
868 foreach ($calculations as $sampleid => $value) {
869
870 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
871
872 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
873 if (is_null($calculations[$sampleid])) {
a40952d3
DM
874 unset($indicatorcalculations[$uniquesampleid]);
875 continue;
876 }
877
878 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
879 // true according to what the developer defined.
880 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
881 }
882 }
883 }
884 return $predictions;
885 }
886
369389c9 887 /**
1cc2b4ba 888 * Stores the prediction in the database.
369389c9
DM
889 *
890 * @param int $sampleid
891 * @param int $rangeindex
892 * @param int $prediction
893 * @param float $predictionscore
894 * @param string $calculations
895 * @return \context
896 */
897 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
898 global $DB;
899
900 $context = $this->get_analyser()->sample_access_context($sampleid);
901
902 $record = new \stdClass();
903 $record->modelid = $this->model->id;
904 $record->contextid = $context->id;
905 $record->sampleid = $sampleid;
906 $record->rangeindex = $rangeindex;
907 $record->prediction = $prediction;
908 $record->predictionscore = $predictionscore;
909 $record->calculations = $calculations;
910 $record->timecreated = time();
911 $DB->insert_record('analytics_predictions', $record);
912
913 return $context;
914 }
915
916 /**
1cc2b4ba 917 * Enabled the model using the provided time splitting method.
369389c9 918 *
5c140ac4 919 * @param string|false $timesplittingid False to respect the current time splitting method.
369389c9
DM
920 * @return void
921 */
922 public function enable($timesplittingid = false) {
923 global $DB;
924
1611308b
DM
925 \core_analytics\manager::check_can_manage_models();
926
369389c9
DM
927 $now = time();
928
929 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
930
931 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
932 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
933 }
934
935 if (substr($timesplittingid, 0, 1) !== '\\') {
936 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
937 }
938
939 $this->model->timesplitting = $timesplittingid;
940 $this->model->version = $now;
941 }
942 $this->model->enabled = 1;
943 $this->model->timemodified = $now;
944
945 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
946 $DB->update_record('analytics_models', $this->model);
947
948 // It needs to be reset (just in case, we may already used it).
949 $this->uniqueid = null;
950 }
951
a40952d3 952 /**
1cc2b4ba
DM
953 * Is this a static model (as defined by the target)?.
954 *
955 * Static models are based on assumptions instead of in machine learning
956 * backends results.
a40952d3
DM
957 *
958 * @return bool
959 */
960 public function is_static() {
961 return (bool)$this->get_target()->based_on_assumptions();
962 }
963
369389c9 964 /**
1cc2b4ba 965 * Is this model enabled?
369389c9
DM
966 *
967 * @return bool
968 */
969 public function is_enabled() {
970 return (bool)$this->model->enabled;
971 }
972
973 /**
1cc2b4ba 974 * Is this model already trained?
369389c9
DM
975 *
976 * @return bool
977 */
978 public function is_trained() {
a40952d3
DM
979 // Models which targets are based on assumptions do not need training.
980 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
981 }
982
983 /**
1cc2b4ba 984 * Marks the model as trained
369389c9
DM
985 *
986 * @return void
987 */
988 public function mark_as_trained() {
989 global $DB;
990
1611308b
DM
991 \core_analytics\manager::check_can_manage_models();
992
369389c9
DM
993 $this->model->trained = 1;
994 $DB->update_record('analytics_models', $this->model);
995 }
996
997 /**
1cc2b4ba 998 * Get the contexts with predictions.
369389c9
DM
999 *
1000 * @return \stdClass[]
1001 */
1002 public function get_predictions_contexts() {
1003 global $DB;
1004
4a210b06
DM
1005 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1006 JOIN {context} ctx ON ctx.id = ap.contextid
1007 WHERE ap.modelid = ?";
369389c9
DM
1008 return $DB->get_records_sql($sql, array($this->model->id));
1009 }
1010
f9e7447f
DM
1011 /**
1012 * Has this model generated predictions?
1013 *
1014 * We don't check analytics_predictions table because targets have the ability to
1015 * ignore some predicted values, if that is the case predictions are not even stored
1016 * in db.
1017 *
1018 * @return bool
1019 */
1020 public function any_prediction_obtained() {
1021 global $DB;
00da1e60 1022 return $DB->record_exists('analytics_predict_samples',
f9e7447f
DM
1023 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1024 }
1025
1026 /**
1027 * Whether this model generates insights or not (defined by the model's target).
1028 *
1029 * @return bool
1030 */
1031 public function uses_insights() {
1032 $target = $this->get_target();
1033 return $target::uses_insights();
1034 }
1035
369389c9
DM
1036 /**
1037 * Whether predictions exist for this context.
1038 *
1039 * @param \context $context
1040 * @return bool
1041 */
1042 public function predictions_exist(\context $context) {
1043 global $DB;
1044
1045 // Filters out previous predictions keeping only the last time range one.
1046 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 1047 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
1048 return $DB->record_exists_select('analytics_predictions', $select, $params);
1049 }
1050
1051 /**
1052 * Gets the predictions for this context.
1053 *
1054 * @param \context $context
21d4ae93
DM
1055 * @param int $page The page of results to fetch. False for all results.
1056 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1057 * @return array($total, \core_analytics\prediction[])
369389c9 1058 */
21d4ae93 1059 public function get_predictions(\context $context, $page = false, $perpage = 100) {
369389c9
DM
1060 global $DB;
1061
1611308b
DM
1062 \core_analytics\manager::check_can_list_insights($context);
1063
369389c9 1064 // Filters out previous predictions keeping only the last time range one.
4a210b06
DM
1065 $sql = "SELECT ap.*
1066 FROM {analytics_predictions} ap
369389c9
DM
1067 JOIN (
1068 SELECT sampleid, max(rangeindex) AS rangeindex
1069 FROM {analytics_predictions}
1070 WHERE modelid = ? and contextid = ?
1071 GROUP BY sampleid
4a210b06
DM
1072 ) apsub
1073 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
1074 WHERE ap.modelid = ? and ap.contextid = ?";
369389c9
DM
1075 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
1076 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1077 return array();
1078 }
1079
1080 // Get predicted samples' ids.
1081 $sampleids = array_map(function($prediction) {
1082 return $prediction->sampleid;
1083 }, $predictions);
1084
1085 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1086
68bfe1de 1087 $current = 0;
21d4ae93
DM
1088
1089 if ($page !== false) {
1090 $offset = $page * $perpage;
1091 $limit = $offset + $perpage;
1092 }
68bfe1de 1093
369389c9
DM
1094 foreach ($predictions as $predictionid => $predictiondata) {
1095
1096 $sampleid = $predictiondata->sampleid;
1097
1098 // Filter out predictions which samples are not available anymore.
1099 if (empty($samplesdata[$sampleid])) {
1100 unset($predictions[$predictionid]);
1101 continue;
1102 }
1103
68bfe1de 1104 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1105 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1106 // Replace \stdClass object by \core_analytics\prediction objects.
1107 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1108 $predictions[$predictionid] = $prediction;
1109 } else {
1110 unset($predictions[$predictionid]);
1111 }
369389c9 1112
68bfe1de 1113 $current++;
369389c9
DM
1114 }
1115
68bfe1de 1116 return [$current, $predictions];
369389c9
DM
1117 }
1118
1119 /**
1611308b 1120 * Returns the sample data of a prediction.
369389c9
DM
1121 *
1122 * @param \stdClass $predictionobj
1123 * @return array
1124 */
1125 public function prediction_sample_data($predictionobj) {
1126
1127 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1128
1129 if (empty($samplesdata[$predictionobj->sampleid])) {
1130 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1131 }
1132
1133 return $samplesdata[$predictionobj->sampleid];
1134 }
1135
1136 /**
1611308b 1137 * Returns the description of a sample
369389c9
DM
1138 *
1139 * @param \core_analytics\prediction $prediction
1140 * @return array 2 elements: list(string, \renderable)
1141 */
1142 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1143 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1144 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1145 }
1146
1147 /**
1148 * Returns the output directory for prediction processors.
1149 *
1150 * Directory structure as follows:
1151 * - Evaluation runs:
1152 * models/$model->id/$model->version/evaluation/$model->timesplitting
1153 * - Training & prediction runs:
1154 * models/$model->id/$model->version/execution
1155 *
1156 * @param array $subdirs
1157 * @return string
1158 */
1159 protected function get_output_dir($subdirs = array()) {
1160 global $CFG;
1161
1162 $subdirstr = '';
1163 foreach ($subdirs as $subdir) {
1164 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1165 }
1166
1167 $outputdir = get_config('analytics', 'modeloutputdir');
1168 if (empty($outputdir)) {
1169 // Apply default value.
1170 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1171 }
1172
1173 // Append model id and version + subdirs.
1174 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1175
1176 make_writable_directory($outputdir);
1177
1178 return $outputdir;
1179 }
1180
1181 /**
1cc2b4ba
DM
1182 * Returns a unique id for this model.
1183 *
1184 * This id should be unique for this site.
369389c9
DM
1185 *
1186 * @return string
1187 */
1188 public function get_unique_id() {
1189 global $CFG;
1190
1191 if (!is_null($this->uniqueid)) {
1192 return $this->uniqueid;
1193 }
1194
1195 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1196 // that the model target and indicators were updated.
b8fe16cd 1197 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
369389c9
DM
1198 $this->uniqueid = sha1(implode('$$', $ids));
1199
1200 return $this->uniqueid;
1201 }
1202
1203 /**
1204 * Exports the model data.
1205 *
1206 * @return \stdClass
1207 */
1208 public function export() {
1611308b
DM
1209
1210 \core_analytics\manager::check_can_manage_models();
1211
369389c9
DM
1212 $data = clone $this->model;
1213 $data->target = $this->get_target()->get_name();
1214
1215 if ($timesplitting = $this->get_time_splitting()) {
1216 $data->timesplitting = $timesplitting->get_name();
1217 }
1218
1219 $data->indicators = array();
1220 foreach ($this->get_indicators() as $indicator) {
1221 $data->indicators[] = $indicator->get_name();
1222 }
1223 return $data;
1224 }
1225
584ffa4f
DM
1226 /**
1227 * Returns the model logs data.
1228 *
1229 * @param int $limitfrom
1230 * @param int $limitnum
1231 * @return \stdClass[]
1232 */
1233 public function get_logs($limitfrom = 0, $limitnum = 0) {
1234 global $DB;
1611308b
DM
1235
1236 \core_analytics\manager::check_can_manage_models();
1237
584ffa4f
DM
1238 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1239 $limitfrom, $limitnum);
1240 }
1241
d126f838
DM
1242 /**
1243 * Merges all training data files into one and returns it.
1244 *
1245 * @return \stored_file|false
1246 */
1247 public function get_training_data() {
1248
1249 \core_analytics\manager::check_can_manage_models();
1250
1251 $timesplittingid = $this->get_time_splitting()->get_id();
1252 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1253 }
1254
369389c9 1255 /**
1cc2b4ba 1256 * Flag the provided file as used for training or prediction.
369389c9
DM
1257 *
1258 * @param \stored_file $file
1259 * @param string $action
1260 * @return void
1261 */
1262 protected function flag_file_as_used(\stored_file $file, $action) {
1263 global $DB;
1264
1265 $usedfile = new \stdClass();
1266 $usedfile->modelid = $this->model->id;
1267 $usedfile->fileid = $file->get_id();
1268 $usedfile->action = $action;
1269 $usedfile->time = time();
1270 $DB->insert_record('analytics_used_files', $usedfile);
1271 }
1272
1273 /**
1cc2b4ba 1274 * Log the evaluation results in the database.
369389c9
DM
1275 *
1276 * @param string $timesplittingid
1277 * @param float $score
1278 * @param string $dir
1279 * @param array $info
1280 * @return int The inserted log id
1281 */
1282 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1283 global $DB, $USER;
1284
1285 $log = new \stdClass();
1286 $log->modelid = $this->get_id();
1287 $log->version = $this->model->version;
1288 $log->target = $this->model->target;
1289 $log->indicators = $this->model->indicators;
1290 $log->timesplitting = $timesplittingid;
1291 $log->dir = $dir;
1292 if ($info) {
1293 // Ensure it is not an associative array.
1294 $log->info = json_encode(array_values($info));
1295 }
1296 $log->score = $score;
1297 $log->timecreated = time();
1298 $log->usermodified = $USER->id;
1299
1300 return $DB->insert_record('analytics_models_log', $log);
1301 }
1302
1303 /**
1304 * Utility method to return indicator class names from a list of indicator objects
1305 *
1306 * @param \core_analytics\local\indicator\base[] $indicators
1307 * @return string[]
1308 */
1309 private static function indicator_classes($indicators) {
1310
1311 // What we want to check and store are the indicator classes not the keys.
1312 $indicatorclasses = array();
1313 foreach ($indicators as $indicator) {
1314 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1315 if (!is_object($indicator) && !is_scalar($indicator)) {
1316 $indicator = strval($indicator);
1317 } else if (is_object($indicator)) {
3a396286 1318 $indicator = '\\' . get_class($indicator);
369389c9
DM
1319 }
1320 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1321 }
b0c24929 1322 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1323 }
1324
1325 return $indicatorclasses;
1326 }
1327
1328 /**
1329 * Clears the model training and prediction data.
1330 *
1331 * Executed after updating model critical elements like the time splitting method
1332 * or the indicators.
1333 *
1334 * @return void
1335 */
1336 private function clear_model() {
1337 global $DB;
1338
369389c9 1339 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
00da1e60 1340 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
369389c9
DM
1341 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1342 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1343
1611308b
DM
1344 // We don't expect people to clear models regularly and the cost of filling the cache is
1345 // 1 db read per context.
1346 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1347 $cache->purge();
369389c9
DM
1348 }
1349
1611308b
DM
1350 /**
1351 * Increases system memory and time limits.
1352 *
1353 * @return void
1354 */
1355 private function heavy_duty_mode() {
369389c9
DM
1356 if (ini_get('memory_limit') != -1) {
1357 raise_memory_limit(MEMORY_HUGE);
1358 }
1611308b 1359 \core_php_time_limit::raise();
369389c9 1360 }
369389c9 1361}