MDL-59211 analytics: Make cibot happy
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
369389c9 56 const EVALUATE_LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
369389c9
DM
61 const EVALUATE_NOT_ENOUGH_DATA = 8;
62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc
DM
82
83 /**
84 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
85 */
369389c9 86 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
87
88 /**
89 * Number of evaluation repetitions.
90 */
369389c9
DM
91 const EVALUATION_ITERATIONS = 10;
92
93 /**
94 * @var \stdClass
95 */
96 protected $model = null;
97
98 /**
99 * @var \core_analytics\local\analyser\base
100 */
101 protected $analyser = null;
102
103 /**
104 * @var \core_analytics\local\target\base
105 */
106 protected $target = null;
107
108 /**
109 * @var \core_analytics\local\indicator\base[]
110 */
111 protected $indicators = null;
112
113 /**
114 * Unique Model id created from site info and last model modification.
115 *
116 * @var string
117 */
118 protected $uniqueid = null;
119
120 /**
121 * __construct
122 *
123 * @param int|stdClass $model
124 * @return void
125 */
126 public function __construct($model) {
127 global $DB;
128
129 if (is_scalar($model)) {
1611308b 130 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
131 if (!$model) {
132 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
133 }
369389c9
DM
134 }
135 $this->model = $model;
136 }
137
138 /**
139 * get_id
140 *
141 * @return int
142 */
143 public function get_id() {
144 return $this->model->id;
145 }
146
147 /**
148 * get_model_obj
149 *
150 * @return \stdClass
151 */
152 public function get_model_obj() {
153 return $this->model;
154 }
155
156 /**
157 * get_target
158 *
159 * @return \core_analytics\local\target\base
160 */
161 public function get_target() {
162 if ($this->target !== null) {
163 return $this->target;
164 }
165 $instance = \core_analytics\manager::get_target($this->model->target);
166 $this->target = $instance;
167
168 return $this->target;
169 }
170
171 /**
172 * get_indicators
173 *
174 * @return \core_analytics\local\indicator\base[]
175 */
176 public function get_indicators() {
177 if ($this->indicators !== null) {
178 return $this->indicators;
179 }
180
181 $fullclassnames = json_decode($this->model->indicators);
182
183 if (!is_array($fullclassnames)) {
184 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
185 }
186
187 $this->indicators = array();
188 foreach ($fullclassnames as $fullclassname) {
189 $instance = \core_analytics\manager::get_indicator($fullclassname);
190 if ($instance) {
191 $this->indicators[$fullclassname] = $instance;
192 } else {
193 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
194 }
195 }
196
197 return $this->indicators;
198 }
199
200 /**
201 * Returns the list of indicators that could potentially be used by the model target.
202 *
203 * It includes the indicators that are part of the model.
204 *
a40952d3 205 * @return \core_analytics\local\indicator\base[]
369389c9
DM
206 */
207 public function get_potential_indicators() {
208
209 $indicators = \core_analytics\manager::get_all_indicators();
210
211 if (empty($this->analyser)) {
212 $this->init_analyser(array('evaluation' => true));
213 }
214
215 foreach ($indicators as $classname => $indicator) {
216 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
217 unset($indicators[$classname]);
218 }
219 }
220 return $indicators;
221 }
222
223 /**
224 * get_analyser
225 *
226 * @return \core_analytics\local\analyser\base
227 */
228 public function get_analyser() {
229 if ($this->analyser !== null) {
230 return $this->analyser;
231 }
232
233 // Default initialisation with no options.
234 $this->init_analyser();
235
236 return $this->analyser;
237 }
238
239 /**
240 * init_analyser
241 *
242 * @param array $options
243 * @return void
244 */
245 protected function init_analyser($options = array()) {
246
247 $target = $this->get_target();
248 $indicators = $this->get_indicators();
249
250 if (empty($target)) {
251 throw new \moodle_exception('errornotarget', 'analytics');
252 }
253
254 if (!empty($options['evaluation'])) {
255 // The evaluation process will run using all available time splitting methods unless one is specified.
256 if (!empty($options['timesplitting'])) {
257 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
258 $timesplittings = array($timesplitting->get_id() => $timesplitting);
259 } else {
260 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
261 }
262 } else {
263
264 if (empty($this->model->timesplitting)) {
265 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
266 }
267
268 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
269 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
270 }
271
272 if (empty($timesplittings)) {
273 throw new \moodle_exception('errornotimesplittings', 'analytics');
274 }
275
276 $classname = $target->get_analyser_class();
277 if (!class_exists($classname)) {
278 throw \coding_exception($classname . ' class does not exists');
279 }
280
281 // Returns a \core_analytics\local\analyser\base class.
282 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
283 }
284
285 /**
286 * get_time_splitting
287 *
288 * @return \core_analytics\local\time_splitting\base
289 */
290 public function get_time_splitting() {
291 if (empty($this->model->timesplitting)) {
292 return false;
293 }
294 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
295 }
296
297 /**
a40952d3 298 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
299 *
300 * @param \core_analytics\local\target\base $target
301 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 302 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
303 * @return \core_analytics\model
304 */
a40952d3 305 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
306 global $USER, $DB;
307
1611308b
DM
308 \core_analytics\manager::check_can_manage_models();
309
369389c9
DM
310 $indicatorclasses = self::indicator_classes($indicators);
311
312 $now = time();
313
314 $modelobj = new \stdClass();
b0c24929 315 $modelobj->target = $target->get_id();
369389c9
DM
316 $modelobj->indicators = json_encode($indicatorclasses);
317 $modelobj->version = $now;
318 $modelobj->timecreated = $now;
319 $modelobj->timemodified = $now;
320 $modelobj->usermodified = $USER->id;
321
322 $id = $DB->insert_record('analytics_models', $modelobj);
323
324 // Get db defaults.
325 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
326
a40952d3
DM
327 $model = new static($modelobj);
328
329 if ($timesplittingid) {
330 $model->enable($timesplittingid);
331 }
332
333 if ($model->is_static()) {
334 $model->mark_as_trained();
335 }
336
337 return $model;
369389c9
DM
338 }
339
a40952d3
DM
340 /**
341 * update
342 *
343 * @param int|bool $enabled
344 * @param \core_analytics\local\indicator\base[] $indicators
345 * @param string $timesplittingid
346 * @return void
347 */
348 public function update($enabled, $indicators, $timesplittingid = '') {
369389c9
DM
349 global $USER, $DB;
350
1611308b
DM
351 \core_analytics\manager::check_can_manage_models();
352
369389c9
DM
353 $now = time();
354
355 $indicatorclasses = self::indicator_classes($indicators);
356
357 $indicatorsstr = json_encode($indicatorclasses);
a40952d3 358 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
359 $this->model->indicators !== $indicatorsstr) {
360 // We update the version of the model so different time splittings are not mixed up.
361 $this->model->version = $now;
362
363 // Delete generated predictions.
364 $this->clear_model();
365
366 // Purge all generated files.
367 \core_analytics\dataset_manager::clear_model_files($this->model->id);
368
369 // Reset trained flag.
370 $this->model->trained = 0;
371 }
a40952d3 372 $this->model->enabled = intval($enabled);
369389c9 373 $this->model->indicators = $indicatorsstr;
a40952d3 374 $this->model->timesplitting = $timesplittingid;
369389c9
DM
375 $this->model->timemodified = $now;
376 $this->model->usermodified = $USER->id;
377
378 $DB->update_record('analytics_models', $this->model);
379
380 // It needs to be reset (just in case, we may already used it).
381 $this->uniqueid = null;
382 }
383
d16cf374
DM
384 /**
385 * Removes the model.
386 *
387 * @return void
388 */
d8327b60 389 public function delete() {
d16cf374 390 global $DB;
1611308b
DM
391
392 \core_analytics\manager::check_can_manage_models();
393
d16cf374 394 $this->clear_model();
d8327b60 395 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
396 }
397
369389c9
DM
398 /**
399 * Evaluates the model datasets.
400 *
401 * Model datasets should already be available in Moodle's filesystem.
402 *
403 * @param array $options
404 * @return \stdClass[]
405 */
406 public function evaluate($options = array()) {
407
1611308b
DM
408 \core_analytics\manager::check_can_manage_models();
409
a40952d3
DM
410 if ($this->is_static()) {
411 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
412 $result = new \stdClass();
413 $result->status = self::OK;
414 return $result;
415 }
416
369389c9
DM
417 $options['evaluation'] = true;
418 $this->init_analyser($options);
419
420 if (empty($this->get_indicators())) {
421 throw new \moodle_exception('errornoindicators', 'analytics');
422 }
423
1611308b
DM
424 $this->heavy_duty_mode();
425
369389c9
DM
426 // Before get_labelled_data call so we get an early exception if it is not ready.
427 $predictor = \core_analytics\manager::get_predictions_processor();
428
429 $datasets = $this->get_analyser()->get_labelled_data();
430
431 // No datasets generated.
432 if (empty($datasets)) {
433 $result = new \stdClass();
434 $result->status = self::NO_DATASET;
435 $result->info = $this->get_analyser()->get_logs();
436 return array($result);
437 }
438
439 if (!PHPUNIT_TEST && CLI_SCRIPT) {
440 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
441 }
442
443 $results = array();
444 foreach ($datasets as $timesplittingid => $dataset) {
445
446 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
447
448 $result = new \stdClass();
449
450 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
451 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
452
453 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
454 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
455 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
456
457 $result->status = $predictorresult->status;
458 $result->info = $predictorresult->info;
459
460 if (isset($predictorresult->score)) {
461 $result->score = $predictorresult->score;
462 } else {
463 // Prediction processors may return an error, default to 0 score in that case.
464 $result->score = 0;
465 }
466
467 $dir = false;
468 if (!empty($predictorresult->dir)) {
469 $dir = $predictorresult->dir;
470 }
471
472 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
473
474 $results[$timesplitting->get_id()] = $result;
475 }
476
477 return $results;
478 }
479
480 /**
481 * train
482 *
483 * @return \stdClass
484 */
485 public function train() {
486 global $DB;
487
1611308b
DM
488 \core_analytics\manager::check_can_manage_models();
489
a40952d3
DM
490 if ($this->is_static()) {
491 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
492 $result = new \stdClass();
493 $result->status = self::OK;
494 return $result;
495 }
496
a40952d3 497 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
498 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
499 }
500
501 if (empty($this->get_indicators())) {
502 throw new \moodle_exception('errornoindicators', 'analytics');
503 }
504
1611308b
DM
505 $this->heavy_duty_mode();
506
369389c9
DM
507 // Before get_labelled_data call so we get an early exception if it is not writable.
508 $outputdir = $this->get_output_dir(array('execution'));
509
510 // Before get_labelled_data call so we get an early exception if it is not ready.
511 $predictor = \core_analytics\manager::get_predictions_processor();
512
513 $datasets = $this->get_analyser()->get_labelled_data();
514
515 // No training if no files have been provided.
516 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
517
518 $result = new \stdClass();
519 $result->status = self::NO_DATASET;
520 $result->info = $this->get_analyser()->get_logs();
521 return $result;
522 }
523 $samplesfile = $datasets[$this->model->timesplitting];
524
525 // Train using the dataset.
526 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
527
528 $result = new \stdClass();
529 $result->status = $predictorresult->status;
530 $result->info = $predictorresult->info;
531
532 $this->flag_file_as_used($samplesfile, 'trained');
533
534 // Mark the model as trained if it wasn't.
535 if ($this->model->trained == false) {
536 $this->mark_as_trained();
537 }
538
539 return $result;
540 }
541
542 /**
543 * predict
544 *
545 * @return \stdClass
546 */
547 public function predict() {
548 global $DB;
549
1611308b 550 \core_analytics\manager::check_can_manage_models();
369389c9 551
a40952d3 552 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
553 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
554 }
555
556 if (empty($this->get_indicators())) {
557 throw new \moodle_exception('errornoindicators', 'analytics');
558 }
559
1611308b
DM
560 $this->heavy_duty_mode();
561
369389c9
DM
562 // Before get_unlabelled_data call so we get an early exception if it is not writable.
563 $outputdir = $this->get_output_dir(array('execution'));
564
565 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
566 if (!$this->is_static()) {
567 $predictor = \core_analytics\manager::get_predictions_processor();
568 }
369389c9
DM
569
570 $samplesdata = $this->get_analyser()->get_unlabelled_data();
571
572 // Get the prediction samples file.
573 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
574
575 $result = new \stdClass();
576 $result->status = self::NO_DATASET;
577 $result->info = $this->get_analyser()->get_logs();
578 return $result;
579 }
580 $samplesfile = $samplesdata[$this->model->timesplitting];
581
582 // We need to throw an exception if we are trying to predict stuff that was already predicted.
583 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
584 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
585 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
586 }
587
a40952d3 588 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 589
a40952d3 590 // Prepare the results object.
369389c9 591 $result = new \stdClass();
369389c9 592
a40952d3
DM
593 if ($this->is_static()) {
594 // Prediction based on assumptions.
413f19bc 595 $result->status = self::OK;
a40952d3
DM
596 $result->info = [];
597 $result->predictions = $this->get_static_predictions($indicatorcalculations);
598
599 } else {
1611308b 600 // Prediction process runs on the machine learning backend.
a40952d3 601 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
602 $result->status = $predictorresult->status;
603 $result->info = $predictorresult->info;
1611308b
DM
604 $result->predictions = $this->format_predictor_predictions($predictorresult);
605 }
606
607 if ($result->predictions) {
608 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
609 }
610
611 if (!empty($samplecontexts) && $this->uses_insights()) {
612 $this->trigger_insights($samplecontexts);
613 }
614
615 $this->flag_file_as_used($samplesfile, 'predicted');
616
617 return $result;
618 }
619
620 /**
621 * Formats the predictor results.
622 *
623 * @param array $predictorresult
624 * @return array
625 */
626 private function format_predictor_predictions($predictorresult) {
627
628 $predictions = array();
629 if ($predictorresult->predictions) {
630 foreach ($predictorresult->predictions as $sampleinfo) {
631
413f19bc 632 // We parse each prediction.
1611308b
DM
633 switch (count($sampleinfo)) {
634 case 1:
635 // For whatever reason the predictions processor could not process this sample, we
636 // skip it and do nothing with it.
637 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
638 $sampleinfo[0], DEBUG_DEVELOPER);
639 continue;
640 case 2:
641 // Prediction processors that do not return a prediction score will have the maximum prediction
642 // score.
643 list($uniquesampleid, $prediction) = $sampleinfo;
644 $predictionscore = 1;
645 break;
646 case 3:
647 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
648 break;
649 default:
650 break;
a40952d3 651 }
1611308b
DM
652 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
653 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
654 }
655 }
1611308b
DM
656 return $predictions;
657 }
658
659 /**
660 * Execute the prediction callbacks defined by the target.
661 *
662 * @param \stdClass[] $predictions
413f19bc 663 * @param array $indicatorcalculations
1611308b
DM
664 * @return array
665 */
666 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
667
668 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
669 $samplecontexts = array();
670
1611308b 671 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 672
1611308b 673 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 674
1611308b
DM
675 // The unique sample id contains both the sampleid and the rangeindex.
676 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 677
1611308b 678 // Store the predicted values.
413f19bc
DM
679 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction,
680 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 681
1611308b
DM
682 // Also store all samples context to later generate insights or whatever action the target wants to perform.
683 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 684
1611308b
DM
685 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
686 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
687 }
688 }
689
1611308b
DM
690 return $samplecontexts;
691 }
369389c9 692
1611308b
DM
693 /**
694 * Generates insights and updates the cache.
695 *
696 * @param \context[] $samplecontexts
697 * @return void
698 */
699 protected function trigger_insights($samplecontexts) {
700
701 // Notify the target that all predictions have been processed.
702 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
703
704 // Update cache.
705 $cache = \cache::make('core', 'contextwithinsights');
706 foreach ($samplecontexts as $context) {
707 $modelids = $cache->get($context->id);
708 if (!$modelids) {
709 // The cache is empty, but we don't know if it is empty because there are no insights
710 // in this context or because cache/s have been purged, we need to be conservative and
711 // "pay" 1 db read to fill up the cache.
712 $models = \core_analytics\manager::get_models_with_insights($context);
713 $cache->set($context->id, array_keys($models));
714 } else if (!in_array($this->get_id(), $modelids)) {
715 array_push($modelids, $this->get_id());
716 $cache->set($context->id, $modelids);
369389c9
DM
717 }
718 }
369389c9
DM
719 }
720
a40952d3 721 /**
1611308b 722 * Get predictions from a static model.
a40952d3
DM
723 *
724 * @param array $indicatorcalculations
725 * @return \stdClass[]
726 */
727 protected function get_static_predictions(&$indicatorcalculations) {
728
729 // Group samples by analysable for \core_analytics\local\target::calculate.
730 $analysables = array();
731 // List all sampleids together.
732 $sampleids = array();
733
734 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
735 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
736
737 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
738 $analysableclass = get_class($analysable);
739 if (empty($analysables[$analysableclass])) {
740 $analysables[$analysableclass] = array();
741 }
742 if (empty($analysables[$analysableclass][$rangeindex])) {
743 $analysables[$analysableclass][$rangeindex] = (object)[
744 'analysable' => $analysable,
745 'indicatorsdata' => array(),
746 'sampleids' => array()
747 ];
748 }
749 // Using the sampleid as a key so we can easily merge indicators data later.
750 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
751 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
752 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
753
754 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
755 $sampleids[$sampleid] = $sampleid;
756 }
757
758 // Get all samples data.
759 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
760
761 // Calculate the targets.
762 $calculations = array();
763 foreach ($analysables as $analysableclass => $rangedata) {
764 foreach ($rangedata as $rangeindex => $data) {
765
766 // Attach samples data and calculated indicators data.
767 $this->get_target()->clear_sample_data();
768 $this->get_target()->add_sample_data($samplesdata);
769 $this->get_target()->add_sample_data($data->indicatorsdata);
770
1611308b 771 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 772 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 773 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
774 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
775
776 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
777 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
778 // by self::save_prediction.
779 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
780 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
781 if (!isset($calculations[$sampleid])) {
a40952d3
DM
782 return false;
783 }
784 return true;
785 }, ARRAY_FILTER_USE_BOTH);
786
787 foreach ($calculations as $sampleid => $value) {
788
789 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
790
791 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
792 if (is_null($calculations[$sampleid])) {
a40952d3
DM
793 unset($indicatorcalculations[$uniquesampleid]);
794 continue;
795 }
796
797 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
798 // true according to what the developer defined.
799 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
800 }
801 }
802 }
803 return $predictions;
804 }
805
369389c9
DM
806 /**
807 * save_prediction
808 *
809 * @param int $sampleid
810 * @param int $rangeindex
811 * @param int $prediction
812 * @param float $predictionscore
813 * @param string $calculations
814 * @return \context
815 */
816 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
817 global $DB;
818
819 $context = $this->get_analyser()->sample_access_context($sampleid);
820
821 $record = new \stdClass();
822 $record->modelid = $this->model->id;
823 $record->contextid = $context->id;
824 $record->sampleid = $sampleid;
825 $record->rangeindex = $rangeindex;
826 $record->prediction = $prediction;
827 $record->predictionscore = $predictionscore;
828 $record->calculations = $calculations;
829 $record->timecreated = time();
830 $DB->insert_record('analytics_predictions', $record);
831
832 return $context;
833 }
834
835 /**
836 * enable
837 *
838 * @param string $timesplittingid
839 * @return void
840 */
841 public function enable($timesplittingid = false) {
842 global $DB;
843
1611308b
DM
844 \core_analytics\manager::check_can_manage_models();
845
369389c9
DM
846 $now = time();
847
848 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
849
850 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
851 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
852 }
853
854 if (substr($timesplittingid, 0, 1) !== '\\') {
855 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
856 }
857
858 $this->model->timesplitting = $timesplittingid;
859 $this->model->version = $now;
860 }
861 $this->model->enabled = 1;
862 $this->model->timemodified = $now;
863
864 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
865 $DB->update_record('analytics_models', $this->model);
866
867 // It needs to be reset (just in case, we may already used it).
868 $this->uniqueid = null;
869 }
870
a40952d3
DM
871 /**
872 * is_static
873 *
874 * @return bool
875 */
876 public function is_static() {
877 return (bool)$this->get_target()->based_on_assumptions();
878 }
879
369389c9
DM
880 /**
881 * is_enabled
882 *
883 * @return bool
884 */
885 public function is_enabled() {
886 return (bool)$this->model->enabled;
887 }
888
889 /**
890 * is_trained
891 *
892 * @return bool
893 */
894 public function is_trained() {
a40952d3
DM
895 // Models which targets are based on assumptions do not need training.
896 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
897 }
898
899 /**
900 * mark_as_trained
901 *
902 * @return void
903 */
904 public function mark_as_trained() {
905 global $DB;
906
1611308b
DM
907 \core_analytics\manager::check_can_manage_models();
908
369389c9
DM
909 $this->model->trained = 1;
910 $DB->update_record('analytics_models', $this->model);
911 }
912
913 /**
914 * get_predictions_contexts
915 *
916 * @return \stdClass[]
917 */
918 public function get_predictions_contexts() {
919 global $DB;
920
921 $sql = "SELECT DISTINCT contextid FROM {analytics_predictions} WHERE modelid = ?";
922 return $DB->get_records_sql($sql, array($this->model->id));
923 }
924
f9e7447f
DM
925 /**
926 * Has this model generated predictions?
927 *
928 * We don't check analytics_predictions table because targets have the ability to
929 * ignore some predicted values, if that is the case predictions are not even stored
930 * in db.
931 *
932 * @return bool
933 */
934 public function any_prediction_obtained() {
935 global $DB;
936 return $DB->record_exists('analytics_predict_ranges',
937 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
938 }
939
940 /**
941 * Whether this model generates insights or not (defined by the model's target).
942 *
943 * @return bool
944 */
945 public function uses_insights() {
946 $target = $this->get_target();
947 return $target::uses_insights();
948 }
949
369389c9
DM
950 /**
951 * Whether predictions exist for this context.
952 *
953 * @param \context $context
954 * @return bool
955 */
956 public function predictions_exist(\context $context) {
957 global $DB;
958
959 // Filters out previous predictions keeping only the last time range one.
960 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 961 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
962 return $DB->record_exists_select('analytics_predictions', $select, $params);
963 }
964
965 /**
966 * Gets the predictions for this context.
967 *
968 * @param \context $context
969 * @return \core_analytics\prediction[]
970 */
971 public function get_predictions(\context $context) {
972 global $DB;
973
1611308b
DM
974 \core_analytics\manager::check_can_list_insights($context);
975
369389c9
DM
976 // Filters out previous predictions keeping only the last time range one.
977 $sql = "SELECT tip.*
978 FROM {analytics_predictions} tip
979 JOIN (
980 SELECT sampleid, max(rangeindex) AS rangeindex
981 FROM {analytics_predictions}
982 WHERE modelid = ? and contextid = ?
983 GROUP BY sampleid
984 ) tipsub
985 ON tip.sampleid = tipsub.sampleid AND tip.rangeindex = tipsub.rangeindex
986 WHERE tip.modelid = ? and tip.contextid = ?";
987 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
988 if (!$predictions = $DB->get_records_sql($sql, $params)) {
989 return array();
990 }
991
992 // Get predicted samples' ids.
993 $sampleids = array_map(function($prediction) {
994 return $prediction->sampleid;
995 }, $predictions);
996
997 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
998
999 // Add samples data as part of each prediction.
1000 foreach ($predictions as $predictionid => $predictiondata) {
1001
1002 $sampleid = $predictiondata->sampleid;
1003
1004 // Filter out predictions which samples are not available anymore.
1005 if (empty($samplesdata[$sampleid])) {
1006 unset($predictions[$predictionid]);
1007 continue;
1008 }
1009
1010 // Replace stdClass object by \core_analytics\prediction objects.
1011 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1012
1013 $predictions[$predictionid] = $prediction;
1014 }
1015
1016 return $predictions;
1017 }
1018
1019 /**
1611308b 1020 * Returns the sample data of a prediction.
369389c9
DM
1021 *
1022 * @param \stdClass $predictionobj
1023 * @return array
1024 */
1025 public function prediction_sample_data($predictionobj) {
1026
1027 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1028
1029 if (empty($samplesdata[$predictionobj->sampleid])) {
1030 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1031 }
1032
1033 return $samplesdata[$predictionobj->sampleid];
1034 }
1035
1036 /**
1611308b 1037 * Returns the description of a sample
369389c9
DM
1038 *
1039 * @param \core_analytics\prediction $prediction
1040 * @return array 2 elements: list(string, \renderable)
1041 */
1042 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1043 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1044 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1045 }
1046
1047 /**
1048 * Returns the output directory for prediction processors.
1049 *
1050 * Directory structure as follows:
1051 * - Evaluation runs:
1052 * models/$model->id/$model->version/evaluation/$model->timesplitting
1053 * - Training & prediction runs:
1054 * models/$model->id/$model->version/execution
1055 *
1056 * @param array $subdirs
1057 * @return string
1058 */
1059 protected function get_output_dir($subdirs = array()) {
1060 global $CFG;
1061
1062 $subdirstr = '';
1063 foreach ($subdirs as $subdir) {
1064 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1065 }
1066
1067 $outputdir = get_config('analytics', 'modeloutputdir');
1068 if (empty($outputdir)) {
1069 // Apply default value.
1070 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1071 }
1072
1073 // Append model id and version + subdirs.
1074 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1075
1076 make_writable_directory($outputdir);
1077
1078 return $outputdir;
1079 }
1080
1081 /**
1082 * get_unique_id
1083 *
1084 * @return string
1085 */
1086 public function get_unique_id() {
1087 global $CFG;
1088
1089 if (!is_null($this->uniqueid)) {
1090 return $this->uniqueid;
1091 }
1092
1093 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1094 // that the model target and indicators were updated.
1095 $ids = array($CFG->wwwroot, $CFG->dirroot, $CFG->prefix, $this->model->id, $this->model->version);
1096 $this->uniqueid = sha1(implode('$$', $ids));
1097
1098 return $this->uniqueid;
1099 }
1100
1101 /**
1102 * Exports the model data.
1103 *
1104 * @return \stdClass
1105 */
1106 public function export() {
1611308b
DM
1107
1108 \core_analytics\manager::check_can_manage_models();
1109
369389c9
DM
1110 $data = clone $this->model;
1111 $data->target = $this->get_target()->get_name();
1112
1113 if ($timesplitting = $this->get_time_splitting()) {
1114 $data->timesplitting = $timesplitting->get_name();
1115 }
1116
1117 $data->indicators = array();
1118 foreach ($this->get_indicators() as $indicator) {
1119 $data->indicators[] = $indicator->get_name();
1120 }
1121 return $data;
1122 }
1123
584ffa4f
DM
1124 /**
1125 * Returns the model logs data.
1126 *
1127 * @param int $limitfrom
1128 * @param int $limitnum
1129 * @return \stdClass[]
1130 */
1131 public function get_logs($limitfrom = 0, $limitnum = 0) {
1132 global $DB;
1611308b
DM
1133
1134 \core_analytics\manager::check_can_manage_models();
1135
584ffa4f
DM
1136 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1137 $limitfrom, $limitnum);
1138 }
1139
369389c9
DM
1140 /**
1141 * flag_file_as_used
1142 *
1143 * @param \stored_file $file
1144 * @param string $action
1145 * @return void
1146 */
1147 protected function flag_file_as_used(\stored_file $file, $action) {
1148 global $DB;
1149
1150 $usedfile = new \stdClass();
1151 $usedfile->modelid = $this->model->id;
1152 $usedfile->fileid = $file->get_id();
1153 $usedfile->action = $action;
1154 $usedfile->time = time();
1155 $DB->insert_record('analytics_used_files', $usedfile);
1156 }
1157
1158 /**
1159 * log_result
1160 *
1161 * @param string $timesplittingid
1162 * @param float $score
1163 * @param string $dir
1164 * @param array $info
1165 * @return int The inserted log id
1166 */
1167 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1168 global $DB, $USER;
1169
1170 $log = new \stdClass();
1171 $log->modelid = $this->get_id();
1172 $log->version = $this->model->version;
1173 $log->target = $this->model->target;
1174 $log->indicators = $this->model->indicators;
1175 $log->timesplitting = $timesplittingid;
1176 $log->dir = $dir;
1177 if ($info) {
1178 // Ensure it is not an associative array.
1179 $log->info = json_encode(array_values($info));
1180 }
1181 $log->score = $score;
1182 $log->timecreated = time();
1183 $log->usermodified = $USER->id;
1184
1185 return $DB->insert_record('analytics_models_log', $log);
1186 }
1187
1188 /**
1189 * Utility method to return indicator class names from a list of indicator objects
1190 *
1191 * @param \core_analytics\local\indicator\base[] $indicators
1192 * @return string[]
1193 */
1194 private static function indicator_classes($indicators) {
1195
1196 // What we want to check and store are the indicator classes not the keys.
1197 $indicatorclasses = array();
1198 foreach ($indicators as $indicator) {
1199 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1200 if (!is_object($indicator) && !is_scalar($indicator)) {
1201 $indicator = strval($indicator);
1202 } else if (is_object($indicator)) {
1203 $indicator = get_class($indicator);
1204 }
1205 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1206 }
b0c24929 1207 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1208 }
1209
1210 return $indicatorclasses;
1211 }
1212
1213 /**
1214 * Clears the model training and prediction data.
1215 *
1216 * Executed after updating model critical elements like the time splitting method
1217 * or the indicators.
1218 *
1219 * @return void
1220 */
1221 private function clear_model() {
1222 global $DB;
1223
1224 $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
1225 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1226 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1227 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1228
1611308b
DM
1229 // We don't expect people to clear models regularly and the cost of filling the cache is
1230 // 1 db read per context.
1231 $cache = \cache::make('core', 'contextwithinsights');
369389c9
DM
1232 $result = $cache->purge();
1233 }
1234
1611308b
DM
1235 /**
1236 * Increases system memory and time limits.
1237 *
1238 * @return void
1239 */
1240 private function heavy_duty_mode() {
369389c9
DM
1241 if (ini_get('memory_limit') != -1) {
1242 raise_memory_limit(MEMORY_HUGE);
1243 }
1611308b 1244 \core_php_time_limit::raise();
369389c9 1245 }
369389c9 1246}