MDL-57791 analytics: Add missing 'new' to throw
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
369389c9 56 const EVALUATE_LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
369389c9
DM
61 const EVALUATE_NOT_ENOUGH_DATA = 8;
62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc
DM
82
83 /**
84 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
85 */
369389c9 86 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
87
88 /**
89 * Number of evaluation repetitions.
90 */
369389c9
DM
91 const EVALUATION_ITERATIONS = 10;
92
93 /**
94 * @var \stdClass
95 */
96 protected $model = null;
97
98 /**
99 * @var \core_analytics\local\analyser\base
100 */
101 protected $analyser = null;
102
103 /**
104 * @var \core_analytics\local\target\base
105 */
106 protected $target = null;
107
108 /**
109 * @var \core_analytics\local\indicator\base[]
110 */
111 protected $indicators = null;
112
113 /**
114 * Unique Model id created from site info and last model modification.
115 *
116 * @var string
117 */
118 protected $uniqueid = null;
119
120 /**
1cc2b4ba 121 * Constructor.
369389c9 122 *
1cc2b4ba 123 * @param int|\stdClass $model
369389c9
DM
124 * @return void
125 */
126 public function __construct($model) {
127 global $DB;
128
129 if (is_scalar($model)) {
1611308b 130 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
131 if (!$model) {
132 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
133 }
369389c9
DM
134 }
135 $this->model = $model;
136 }
137
3a396286
DM
138 /**
139 * Quick safety check to discard site models which required components are not available anymore.
140 *
141 * @return bool
142 */
143 public function is_available() {
144 $target = $this->get_target();
145 if (!$target) {
146 return false;
147 }
3a396286
DM
148
149 $classname = $target->get_analyser_class();
150 if (!class_exists($classname)) {
151 return false;
152 }
153
154 return true;
155 }
156
369389c9 157 /**
1cc2b4ba 158 * Returns the model id.
369389c9
DM
159 *
160 * @return int
161 */
162 public function get_id() {
163 return $this->model->id;
164 }
165
166 /**
1cc2b4ba 167 * Returns a plain \stdClass with the model data.
369389c9
DM
168 *
169 * @return \stdClass
170 */
171 public function get_model_obj() {
172 return $this->model;
173 }
174
175 /**
1cc2b4ba 176 * Returns the model target.
369389c9
DM
177 *
178 * @return \core_analytics\local\target\base
179 */
180 public function get_target() {
181 if ($this->target !== null) {
182 return $this->target;
183 }
184 $instance = \core_analytics\manager::get_target($this->model->target);
185 $this->target = $instance;
186
187 return $this->target;
188 }
189
190 /**
1cc2b4ba 191 * Returns the model indicators.
369389c9
DM
192 *
193 * @return \core_analytics\local\indicator\base[]
194 */
195 public function get_indicators() {
196 if ($this->indicators !== null) {
197 return $this->indicators;
198 }
199
200 $fullclassnames = json_decode($this->model->indicators);
201
202 if (!is_array($fullclassnames)) {
203 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
204 }
205
206 $this->indicators = array();
207 foreach ($fullclassnames as $fullclassname) {
208 $instance = \core_analytics\manager::get_indicator($fullclassname);
209 if ($instance) {
210 $this->indicators[$fullclassname] = $instance;
211 } else {
212 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
213 }
214 }
215
216 return $this->indicators;
217 }
218
219 /**
220 * Returns the list of indicators that could potentially be used by the model target.
221 *
222 * It includes the indicators that are part of the model.
223 *
a40952d3 224 * @return \core_analytics\local\indicator\base[]
369389c9
DM
225 */
226 public function get_potential_indicators() {
227
228 $indicators = \core_analytics\manager::get_all_indicators();
229
230 if (empty($this->analyser)) {
231 $this->init_analyser(array('evaluation' => true));
232 }
233
234 foreach ($indicators as $classname => $indicator) {
235 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
236 unset($indicators[$classname]);
237 }
238 }
239 return $indicators;
240 }
241
242 /**
1cc2b4ba 243 * Returns the model analyser (defined by the model target).
369389c9
DM
244 *
245 * @return \core_analytics\local\analyser\base
246 */
247 public function get_analyser() {
248 if ($this->analyser !== null) {
249 return $this->analyser;
250 }
251
252 // Default initialisation with no options.
253 $this->init_analyser();
254
255 return $this->analyser;
256 }
257
258 /**
1cc2b4ba 259 * Initialises the model analyser.
369389c9 260 *
1cc2b4ba 261 * @throws \coding_exception
369389c9
DM
262 * @param array $options
263 * @return void
264 */
265 protected function init_analyser($options = array()) {
266
267 $target = $this->get_target();
268 $indicators = $this->get_indicators();
269
270 if (empty($target)) {
271 throw new \moodle_exception('errornotarget', 'analytics');
272 }
273
274 if (!empty($options['evaluation'])) {
275 // The evaluation process will run using all available time splitting methods unless one is specified.
276 if (!empty($options['timesplitting'])) {
277 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
278 $timesplittings = array($timesplitting->get_id() => $timesplitting);
279 } else {
280 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
281 }
282 } else {
283
284 if (empty($this->model->timesplitting)) {
285 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
286 }
287
288 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
289 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
290 }
291
292 if (empty($timesplittings)) {
293 throw new \moodle_exception('errornotimesplittings', 'analytics');
294 }
295
296 $classname = $target->get_analyser_class();
297 if (!class_exists($classname)) {
08015e18 298 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
299 }
300
301 // Returns a \core_analytics\local\analyser\base class.
302 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
303 }
304
305 /**
1cc2b4ba 306 * Returns the model time splitting method.
369389c9 307 *
1cc2b4ba 308 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
309 */
310 public function get_time_splitting() {
311 if (empty($this->model->timesplitting)) {
312 return false;
313 }
314 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
315 }
316
317 /**
a40952d3 318 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
319 *
320 * @param \core_analytics\local\target\base $target
321 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 322 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
323 * @return \core_analytics\model
324 */
a40952d3 325 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
326 global $USER, $DB;
327
1611308b
DM
328 \core_analytics\manager::check_can_manage_models();
329
369389c9
DM
330 $indicatorclasses = self::indicator_classes($indicators);
331
332 $now = time();
333
334 $modelobj = new \stdClass();
b0c24929 335 $modelobj->target = $target->get_id();
369389c9
DM
336 $modelobj->indicators = json_encode($indicatorclasses);
337 $modelobj->version = $now;
338 $modelobj->timecreated = $now;
339 $modelobj->timemodified = $now;
340 $modelobj->usermodified = $USER->id;
341
342 $id = $DB->insert_record('analytics_models', $modelobj);
343
344 // Get db defaults.
345 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
346
a40952d3
DM
347 $model = new static($modelobj);
348
349 if ($timesplittingid) {
350 $model->enable($timesplittingid);
351 }
352
353 if ($model->is_static()) {
354 $model->mark_as_trained();
355 }
356
357 return $model;
369389c9
DM
358 }
359
a40952d3 360 /**
1cc2b4ba 361 * Updates the model.
a40952d3
DM
362 *
363 * @param int|bool $enabled
364 * @param \core_analytics\local\indicator\base[] $indicators
365 * @param string $timesplittingid
366 * @return void
367 */
368 public function update($enabled, $indicators, $timesplittingid = '') {
369389c9
DM
369 global $USER, $DB;
370
1611308b
DM
371 \core_analytics\manager::check_can_manage_models();
372
369389c9
DM
373 $now = time();
374
375 $indicatorclasses = self::indicator_classes($indicators);
376
377 $indicatorsstr = json_encode($indicatorclasses);
a40952d3 378 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
379 $this->model->indicators !== $indicatorsstr) {
380 // We update the version of the model so different time splittings are not mixed up.
381 $this->model->version = $now;
382
383 // Delete generated predictions.
384 $this->clear_model();
385
386 // Purge all generated files.
387 \core_analytics\dataset_manager::clear_model_files($this->model->id);
388
389 // Reset trained flag.
390 $this->model->trained = 0;
391 }
a40952d3 392 $this->model->enabled = intval($enabled);
369389c9 393 $this->model->indicators = $indicatorsstr;
a40952d3 394 $this->model->timesplitting = $timesplittingid;
369389c9
DM
395 $this->model->timemodified = $now;
396 $this->model->usermodified = $USER->id;
397
398 $DB->update_record('analytics_models', $this->model);
399
400 // It needs to be reset (just in case, we may already used it).
401 $this->uniqueid = null;
402 }
403
d16cf374
DM
404 /**
405 * Removes the model.
406 *
407 * @return void
408 */
d8327b60 409 public function delete() {
d16cf374 410 global $DB;
1611308b
DM
411
412 \core_analytics\manager::check_can_manage_models();
413
d16cf374 414 $this->clear_model();
d8327b60 415 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
416 }
417
369389c9 418 /**
1cc2b4ba 419 * Evaluates the model.
369389c9 420 *
1cc2b4ba
DM
421 * This method gets the site contents (through the analyser) creates a .csv dataset
422 * with them and evaluates the model prediction accuracy multiple times using the
423 * machine learning backend. It returns an object where the model score is the average
424 * prediction accuracy of all executed evaluations.
369389c9
DM
425 *
426 * @param array $options
427 * @return \stdClass[]
428 */
429 public function evaluate($options = array()) {
430
1611308b
DM
431 \core_analytics\manager::check_can_manage_models();
432
a40952d3
DM
433 if ($this->is_static()) {
434 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
435 $result = new \stdClass();
436 $result->status = self::OK;
437 return $result;
438 }
439
369389c9
DM
440 $options['evaluation'] = true;
441 $this->init_analyser($options);
442
443 if (empty($this->get_indicators())) {
444 throw new \moodle_exception('errornoindicators', 'analytics');
445 }
446
1611308b
DM
447 $this->heavy_duty_mode();
448
369389c9
DM
449 // Before get_labelled_data call so we get an early exception if it is not ready.
450 $predictor = \core_analytics\manager::get_predictions_processor();
451
452 $datasets = $this->get_analyser()->get_labelled_data();
453
454 // No datasets generated.
455 if (empty($datasets)) {
456 $result = new \stdClass();
457 $result->status = self::NO_DATASET;
458 $result->info = $this->get_analyser()->get_logs();
459 return array($result);
460 }
461
462 if (!PHPUNIT_TEST && CLI_SCRIPT) {
463 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
464 }
465
466 $results = array();
467 foreach ($datasets as $timesplittingid => $dataset) {
468
469 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
470
471 $result = new \stdClass();
472
473 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
474 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
475
476 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
477 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
478 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
479
480 $result->status = $predictorresult->status;
481 $result->info = $predictorresult->info;
482
483 if (isset($predictorresult->score)) {
484 $result->score = $predictorresult->score;
485 } else {
486 // Prediction processors may return an error, default to 0 score in that case.
487 $result->score = 0;
488 }
489
490 $dir = false;
491 if (!empty($predictorresult->dir)) {
492 $dir = $predictorresult->dir;
493 }
494
495 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
496
497 $results[$timesplitting->get_id()] = $result;
498 }
499
500 return $results;
501 }
502
503 /**
1cc2b4ba
DM
504 * Trains the model using the site contents.
505 *
506 * This method prepares a dataset from the site contents (through the analyser)
507 * and passes it to the machine learning backends. Static models are skipped as
508 * they do not require training.
369389c9
DM
509 *
510 * @return \stdClass
511 */
512 public function train() {
369389c9 513
1611308b
DM
514 \core_analytics\manager::check_can_manage_models();
515
a40952d3
DM
516 if ($this->is_static()) {
517 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
518 $result = new \stdClass();
519 $result->status = self::OK;
520 return $result;
521 }
522
a40952d3 523 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
524 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
525 }
526
527 if (empty($this->get_indicators())) {
528 throw new \moodle_exception('errornoindicators', 'analytics');
529 }
530
1611308b
DM
531 $this->heavy_duty_mode();
532
369389c9
DM
533 // Before get_labelled_data call so we get an early exception if it is not writable.
534 $outputdir = $this->get_output_dir(array('execution'));
535
536 // Before get_labelled_data call so we get an early exception if it is not ready.
537 $predictor = \core_analytics\manager::get_predictions_processor();
538
539 $datasets = $this->get_analyser()->get_labelled_data();
540
541 // No training if no files have been provided.
542 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
543
544 $result = new \stdClass();
545 $result->status = self::NO_DATASET;
546 $result->info = $this->get_analyser()->get_logs();
547 return $result;
548 }
549 $samplesfile = $datasets[$this->model->timesplitting];
550
551 // Train using the dataset.
552 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
553
554 $result = new \stdClass();
555 $result->status = $predictorresult->status;
556 $result->info = $predictorresult->info;
557
558 $this->flag_file_as_used($samplesfile, 'trained');
559
560 // Mark the model as trained if it wasn't.
561 if ($this->model->trained == false) {
562 $this->mark_as_trained();
563 }
564
565 return $result;
566 }
567
568 /**
1cc2b4ba
DM
569 * Get predictions from the site contents.
570 *
571 * It analyses the site contents (through analyser classes) looking for samples
572 * ready to receive predictions. It generates a dataset with all samples ready to
573 * get predictions and it passes it to the machine learning backends or to the
574 * targets based on assumptions to get the predictions.
369389c9
DM
575 *
576 * @return \stdClass
577 */
578 public function predict() {
579 global $DB;
580
1611308b 581 \core_analytics\manager::check_can_manage_models();
369389c9 582
a40952d3 583 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
584 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
585 }
586
587 if (empty($this->get_indicators())) {
588 throw new \moodle_exception('errornoindicators', 'analytics');
589 }
590
1611308b
DM
591 $this->heavy_duty_mode();
592
369389c9
DM
593 // Before get_unlabelled_data call so we get an early exception if it is not writable.
594 $outputdir = $this->get_output_dir(array('execution'));
595
596 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
597 if (!$this->is_static()) {
598 $predictor = \core_analytics\manager::get_predictions_processor();
599 }
369389c9
DM
600
601 $samplesdata = $this->get_analyser()->get_unlabelled_data();
602
603 // Get the prediction samples file.
604 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
605
606 $result = new \stdClass();
607 $result->status = self::NO_DATASET;
608 $result->info = $this->get_analyser()->get_logs();
609 return $result;
610 }
611 $samplesfile = $samplesdata[$this->model->timesplitting];
612
613 // We need to throw an exception if we are trying to predict stuff that was already predicted.
614 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
615 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
616 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
617 }
618
a40952d3 619 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 620
a40952d3 621 // Prepare the results object.
369389c9 622 $result = new \stdClass();
369389c9 623
a40952d3
DM
624 if ($this->is_static()) {
625 // Prediction based on assumptions.
413f19bc 626 $result->status = self::OK;
a40952d3
DM
627 $result->info = [];
628 $result->predictions = $this->get_static_predictions($indicatorcalculations);
629
630 } else {
1611308b 631 // Prediction process runs on the machine learning backend.
a40952d3 632 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
633 $result->status = $predictorresult->status;
634 $result->info = $predictorresult->info;
1611308b
DM
635 $result->predictions = $this->format_predictor_predictions($predictorresult);
636 }
637
638 if ($result->predictions) {
639 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
640 }
641
642 if (!empty($samplecontexts) && $this->uses_insights()) {
643 $this->trigger_insights($samplecontexts);
644 }
645
646 $this->flag_file_as_used($samplesfile, 'predicted');
647
648 return $result;
649 }
650
651 /**
652 * Formats the predictor results.
653 *
654 * @param array $predictorresult
655 * @return array
656 */
657 private function format_predictor_predictions($predictorresult) {
658
659 $predictions = array();
660 if ($predictorresult->predictions) {
661 foreach ($predictorresult->predictions as $sampleinfo) {
662
413f19bc 663 // We parse each prediction.
1611308b
DM
664 switch (count($sampleinfo)) {
665 case 1:
666 // For whatever reason the predictions processor could not process this sample, we
667 // skip it and do nothing with it.
668 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
669 $sampleinfo[0], DEBUG_DEVELOPER);
670 continue;
671 case 2:
672 // Prediction processors that do not return a prediction score will have the maximum prediction
673 // score.
674 list($uniquesampleid, $prediction) = $sampleinfo;
675 $predictionscore = 1;
676 break;
677 case 3:
678 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
679 break;
680 default:
681 break;
a40952d3 682 }
1611308b
DM
683 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
684 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
685 }
686 }
1611308b
DM
687 return $predictions;
688 }
689
690 /**
691 * Execute the prediction callbacks defined by the target.
692 *
693 * @param \stdClass[] $predictions
413f19bc 694 * @param array $indicatorcalculations
1611308b
DM
695 * @return array
696 */
697 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
698
699 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
700 $samplecontexts = array();
701
1611308b 702 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 703
1611308b 704 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 705
1611308b
DM
706 // The unique sample id contains both the sampleid and the rangeindex.
707 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 708
1611308b 709 // Store the predicted values.
413f19bc
DM
710 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction,
711 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 712
1611308b
DM
713 // Also store all samples context to later generate insights or whatever action the target wants to perform.
714 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 715
1611308b
DM
716 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
717 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
718 }
719 }
720
1611308b
DM
721 return $samplecontexts;
722 }
369389c9 723
1611308b
DM
724 /**
725 * Generates insights and updates the cache.
726 *
727 * @param \context[] $samplecontexts
728 * @return void
729 */
730 protected function trigger_insights($samplecontexts) {
731
732 // Notify the target that all predictions have been processed.
733 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
734
735 // Update cache.
736 $cache = \cache::make('core', 'contextwithinsights');
737 foreach ($samplecontexts as $context) {
738 $modelids = $cache->get($context->id);
739 if (!$modelids) {
740 // The cache is empty, but we don't know if it is empty because there are no insights
741 // in this context or because cache/s have been purged, we need to be conservative and
742 // "pay" 1 db read to fill up the cache.
743 $models = \core_analytics\manager::get_models_with_insights($context);
744 $cache->set($context->id, array_keys($models));
745 } else if (!in_array($this->get_id(), $modelids)) {
746 array_push($modelids, $this->get_id());
747 $cache->set($context->id, $modelids);
369389c9
DM
748 }
749 }
369389c9
DM
750 }
751
a40952d3 752 /**
1611308b 753 * Get predictions from a static model.
a40952d3
DM
754 *
755 * @param array $indicatorcalculations
756 * @return \stdClass[]
757 */
758 protected function get_static_predictions(&$indicatorcalculations) {
759
760 // Group samples by analysable for \core_analytics\local\target::calculate.
761 $analysables = array();
762 // List all sampleids together.
763 $sampleids = array();
764
765 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
766 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
767
768 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
769 $analysableclass = get_class($analysable);
770 if (empty($analysables[$analysableclass])) {
771 $analysables[$analysableclass] = array();
772 }
773 if (empty($analysables[$analysableclass][$rangeindex])) {
774 $analysables[$analysableclass][$rangeindex] = (object)[
775 'analysable' => $analysable,
776 'indicatorsdata' => array(),
777 'sampleids' => array()
778 ];
779 }
780 // Using the sampleid as a key so we can easily merge indicators data later.
781 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
782 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
783 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
784
785 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
786 $sampleids[$sampleid] = $sampleid;
787 }
788
789 // Get all samples data.
790 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
791
792 // Calculate the targets.
1cc2b4ba 793 $predictions = array();
a40952d3
DM
794 foreach ($analysables as $analysableclass => $rangedata) {
795 foreach ($rangedata as $rangeindex => $data) {
796
797 // Attach samples data and calculated indicators data.
798 $this->get_target()->clear_sample_data();
799 $this->get_target()->add_sample_data($samplesdata);
800 $this->get_target()->add_sample_data($data->indicatorsdata);
801
1611308b 802 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 803 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 804 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
805 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
806
807 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
808 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
809 // by self::save_prediction.
810 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
811 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
812 if (!isset($calculations[$sampleid])) {
a40952d3
DM
813 return false;
814 }
815 return true;
816 }, ARRAY_FILTER_USE_BOTH);
817
818 foreach ($calculations as $sampleid => $value) {
819
820 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
821
822 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
823 if (is_null($calculations[$sampleid])) {
a40952d3
DM
824 unset($indicatorcalculations[$uniquesampleid]);
825 continue;
826 }
827
828 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
829 // true according to what the developer defined.
830 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
831 }
832 }
833 }
834 return $predictions;
835 }
836
369389c9 837 /**
1cc2b4ba 838 * Stores the prediction in the database.
369389c9
DM
839 *
840 * @param int $sampleid
841 * @param int $rangeindex
842 * @param int $prediction
843 * @param float $predictionscore
844 * @param string $calculations
845 * @return \context
846 */
847 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
848 global $DB;
849
850 $context = $this->get_analyser()->sample_access_context($sampleid);
851
852 $record = new \stdClass();
853 $record->modelid = $this->model->id;
854 $record->contextid = $context->id;
855 $record->sampleid = $sampleid;
856 $record->rangeindex = $rangeindex;
857 $record->prediction = $prediction;
858 $record->predictionscore = $predictionscore;
859 $record->calculations = $calculations;
860 $record->timecreated = time();
861 $DB->insert_record('analytics_predictions', $record);
862
863 return $context;
864 }
865
866 /**
1cc2b4ba 867 * Enabled the model using the provided time splitting method.
369389c9
DM
868 *
869 * @param string $timesplittingid
870 * @return void
871 */
872 public function enable($timesplittingid = false) {
873 global $DB;
874
1611308b
DM
875 \core_analytics\manager::check_can_manage_models();
876
369389c9
DM
877 $now = time();
878
879 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
880
881 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
882 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
883 }
884
885 if (substr($timesplittingid, 0, 1) !== '\\') {
886 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
887 }
888
889 $this->model->timesplitting = $timesplittingid;
890 $this->model->version = $now;
891 }
892 $this->model->enabled = 1;
893 $this->model->timemodified = $now;
894
895 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
896 $DB->update_record('analytics_models', $this->model);
897
898 // It needs to be reset (just in case, we may already used it).
899 $this->uniqueid = null;
900 }
901
a40952d3 902 /**
1cc2b4ba
DM
903 * Is this a static model (as defined by the target)?.
904 *
905 * Static models are based on assumptions instead of in machine learning
906 * backends results.
a40952d3
DM
907 *
908 * @return bool
909 */
910 public function is_static() {
911 return (bool)$this->get_target()->based_on_assumptions();
912 }
913
369389c9 914 /**
1cc2b4ba 915 * Is this model enabled?
369389c9
DM
916 *
917 * @return bool
918 */
919 public function is_enabled() {
920 return (bool)$this->model->enabled;
921 }
922
923 /**
1cc2b4ba 924 * Is this model already trained?
369389c9
DM
925 *
926 * @return bool
927 */
928 public function is_trained() {
a40952d3
DM
929 // Models which targets are based on assumptions do not need training.
930 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
931 }
932
933 /**
1cc2b4ba 934 * Marks the model as trained
369389c9
DM
935 *
936 * @return void
937 */
938 public function mark_as_trained() {
939 global $DB;
940
1611308b
DM
941 \core_analytics\manager::check_can_manage_models();
942
369389c9
DM
943 $this->model->trained = 1;
944 $DB->update_record('analytics_models', $this->model);
945 }
946
947 /**
1cc2b4ba 948 * Get the contexts with predictions.
369389c9
DM
949 *
950 * @return \stdClass[]
951 */
952 public function get_predictions_contexts() {
953 global $DB;
954
955 $sql = "SELECT DISTINCT contextid FROM {analytics_predictions} WHERE modelid = ?";
956 return $DB->get_records_sql($sql, array($this->model->id));
957 }
958
f9e7447f
DM
959 /**
960 * Has this model generated predictions?
961 *
962 * We don't check analytics_predictions table because targets have the ability to
963 * ignore some predicted values, if that is the case predictions are not even stored
964 * in db.
965 *
966 * @return bool
967 */
968 public function any_prediction_obtained() {
969 global $DB;
970 return $DB->record_exists('analytics_predict_ranges',
971 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
972 }
973
974 /**
975 * Whether this model generates insights or not (defined by the model's target).
976 *
977 * @return bool
978 */
979 public function uses_insights() {
980 $target = $this->get_target();
981 return $target::uses_insights();
982 }
983
369389c9
DM
984 /**
985 * Whether predictions exist for this context.
986 *
987 * @param \context $context
988 * @return bool
989 */
990 public function predictions_exist(\context $context) {
991 global $DB;
992
993 // Filters out previous predictions keeping only the last time range one.
994 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 995 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
996 return $DB->record_exists_select('analytics_predictions', $select, $params);
997 }
998
999 /**
1000 * Gets the predictions for this context.
1001 *
1002 * @param \context $context
21d4ae93
DM
1003 * @param int $page The page of results to fetch. False for all results.
1004 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1005 * @return array($total, \core_analytics\prediction[])
369389c9 1006 */
21d4ae93 1007 public function get_predictions(\context $context, $page = false, $perpage = 100) {
369389c9
DM
1008 global $DB;
1009
1611308b
DM
1010 \core_analytics\manager::check_can_list_insights($context);
1011
369389c9
DM
1012 // Filters out previous predictions keeping only the last time range one.
1013 $sql = "SELECT tip.*
1014 FROM {analytics_predictions} tip
1015 JOIN (
1016 SELECT sampleid, max(rangeindex) AS rangeindex
1017 FROM {analytics_predictions}
1018 WHERE modelid = ? and contextid = ?
1019 GROUP BY sampleid
1020 ) tipsub
1021 ON tip.sampleid = tipsub.sampleid AND tip.rangeindex = tipsub.rangeindex
1022 WHERE tip.modelid = ? and tip.contextid = ?";
1023 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
1024 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1025 return array();
1026 }
1027
1028 // Get predicted samples' ids.
1029 $sampleids = array_map(function($prediction) {
1030 return $prediction->sampleid;
1031 }, $predictions);
1032
1033 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1034
68bfe1de
DW
1035
1036 $current = 0;
21d4ae93
DM
1037
1038 if ($page !== false) {
1039 $offset = $page * $perpage;
1040 $limit = $offset + $perpage;
1041 }
68bfe1de 1042
369389c9
DM
1043 foreach ($predictions as $predictionid => $predictiondata) {
1044
1045 $sampleid = $predictiondata->sampleid;
1046
1047 // Filter out predictions which samples are not available anymore.
1048 if (empty($samplesdata[$sampleid])) {
1049 unset($predictions[$predictionid]);
1050 continue;
1051 }
1052
68bfe1de 1053 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1054 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1055 // Replace \stdClass object by \core_analytics\prediction objects.
1056 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1057 $predictions[$predictionid] = $prediction;
1058 } else {
1059 unset($predictions[$predictionid]);
1060 }
369389c9 1061
68bfe1de 1062 $current++;
369389c9
DM
1063 }
1064
68bfe1de 1065 return [$current, $predictions];
369389c9
DM
1066 }
1067
1068 /**
1611308b 1069 * Returns the sample data of a prediction.
369389c9
DM
1070 *
1071 * @param \stdClass $predictionobj
1072 * @return array
1073 */
1074 public function prediction_sample_data($predictionobj) {
1075
1076 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1077
1078 if (empty($samplesdata[$predictionobj->sampleid])) {
1079 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1080 }
1081
1082 return $samplesdata[$predictionobj->sampleid];
1083 }
1084
1085 /**
1611308b 1086 * Returns the description of a sample
369389c9
DM
1087 *
1088 * @param \core_analytics\prediction $prediction
1089 * @return array 2 elements: list(string, \renderable)
1090 */
1091 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1092 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1093 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1094 }
1095
1096 /**
1097 * Returns the output directory for prediction processors.
1098 *
1099 * Directory structure as follows:
1100 * - Evaluation runs:
1101 * models/$model->id/$model->version/evaluation/$model->timesplitting
1102 * - Training & prediction runs:
1103 * models/$model->id/$model->version/execution
1104 *
1105 * @param array $subdirs
1106 * @return string
1107 */
1108 protected function get_output_dir($subdirs = array()) {
1109 global $CFG;
1110
1111 $subdirstr = '';
1112 foreach ($subdirs as $subdir) {
1113 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1114 }
1115
1116 $outputdir = get_config('analytics', 'modeloutputdir');
1117 if (empty($outputdir)) {
1118 // Apply default value.
1119 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1120 }
1121
1122 // Append model id and version + subdirs.
1123 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1124
1125 make_writable_directory($outputdir);
1126
1127 return $outputdir;
1128 }
1129
1130 /**
1cc2b4ba
DM
1131 * Returns a unique id for this model.
1132 *
1133 * This id should be unique for this site.
369389c9
DM
1134 *
1135 * @return string
1136 */
1137 public function get_unique_id() {
1138 global $CFG;
1139
1140 if (!is_null($this->uniqueid)) {
1141 return $this->uniqueid;
1142 }
1143
1144 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1145 // that the model target and indicators were updated.
1146 $ids = array($CFG->wwwroot, $CFG->dirroot, $CFG->prefix, $this->model->id, $this->model->version);
1147 $this->uniqueid = sha1(implode('$$', $ids));
1148
1149 return $this->uniqueid;
1150 }
1151
1152 /**
1153 * Exports the model data.
1154 *
1155 * @return \stdClass
1156 */
1157 public function export() {
1611308b
DM
1158
1159 \core_analytics\manager::check_can_manage_models();
1160
369389c9
DM
1161 $data = clone $this->model;
1162 $data->target = $this->get_target()->get_name();
1163
1164 if ($timesplitting = $this->get_time_splitting()) {
1165 $data->timesplitting = $timesplitting->get_name();
1166 }
1167
1168 $data->indicators = array();
1169 foreach ($this->get_indicators() as $indicator) {
1170 $data->indicators[] = $indicator->get_name();
1171 }
1172 return $data;
1173 }
1174
584ffa4f
DM
1175 /**
1176 * Returns the model logs data.
1177 *
1178 * @param int $limitfrom
1179 * @param int $limitnum
1180 * @return \stdClass[]
1181 */
1182 public function get_logs($limitfrom = 0, $limitnum = 0) {
1183 global $DB;
1611308b
DM
1184
1185 \core_analytics\manager::check_can_manage_models();
1186
584ffa4f
DM
1187 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1188 $limitfrom, $limitnum);
1189 }
1190
369389c9 1191 /**
1cc2b4ba 1192 * Flag the provided file as used for training or prediction.
369389c9
DM
1193 *
1194 * @param \stored_file $file
1195 * @param string $action
1196 * @return void
1197 */
1198 protected function flag_file_as_used(\stored_file $file, $action) {
1199 global $DB;
1200
1201 $usedfile = new \stdClass();
1202 $usedfile->modelid = $this->model->id;
1203 $usedfile->fileid = $file->get_id();
1204 $usedfile->action = $action;
1205 $usedfile->time = time();
1206 $DB->insert_record('analytics_used_files', $usedfile);
1207 }
1208
1209 /**
1cc2b4ba 1210 * Log the evaluation results in the database.
369389c9
DM
1211 *
1212 * @param string $timesplittingid
1213 * @param float $score
1214 * @param string $dir
1215 * @param array $info
1216 * @return int The inserted log id
1217 */
1218 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1219 global $DB, $USER;
1220
1221 $log = new \stdClass();
1222 $log->modelid = $this->get_id();
1223 $log->version = $this->model->version;
1224 $log->target = $this->model->target;
1225 $log->indicators = $this->model->indicators;
1226 $log->timesplitting = $timesplittingid;
1227 $log->dir = $dir;
1228 if ($info) {
1229 // Ensure it is not an associative array.
1230 $log->info = json_encode(array_values($info));
1231 }
1232 $log->score = $score;
1233 $log->timecreated = time();
1234 $log->usermodified = $USER->id;
1235
1236 return $DB->insert_record('analytics_models_log', $log);
1237 }
1238
1239 /**
1240 * Utility method to return indicator class names from a list of indicator objects
1241 *
1242 * @param \core_analytics\local\indicator\base[] $indicators
1243 * @return string[]
1244 */
1245 private static function indicator_classes($indicators) {
1246
1247 // What we want to check and store are the indicator classes not the keys.
1248 $indicatorclasses = array();
1249 foreach ($indicators as $indicator) {
1250 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1251 if (!is_object($indicator) && !is_scalar($indicator)) {
1252 $indicator = strval($indicator);
1253 } else if (is_object($indicator)) {
3a396286 1254 $indicator = '\\' . get_class($indicator);
369389c9
DM
1255 }
1256 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1257 }
b0c24929 1258 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1259 }
1260
1261 return $indicatorclasses;
1262 }
1263
1264 /**
1265 * Clears the model training and prediction data.
1266 *
1267 * Executed after updating model critical elements like the time splitting method
1268 * or the indicators.
1269 *
1270 * @return void
1271 */
1272 private function clear_model() {
1273 global $DB;
1274
1275 $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
1276 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1277 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1278 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1279
1611308b
DM
1280 // We don't expect people to clear models regularly and the cost of filling the cache is
1281 // 1 db read per context.
1282 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1283 $cache->purge();
369389c9
DM
1284 }
1285
1611308b
DM
1286 /**
1287 * Increases system memory and time limits.
1288 *
1289 * @return void
1290 */
1291 private function heavy_duty_mode() {
369389c9
DM
1292 if (ini_get('memory_limit') != -1) {
1293 raise_memory_limit(MEMORY_HUGE);
1294 }
1611308b 1295 \core_php_time_limit::raise();
369389c9 1296 }
369389c9 1297}