MDL-57791 analytics: Changes after review
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
38 const OK = 0;
39 const GENERAL_ERROR = 1;
40 const NO_DATASET = 2;
41
42 const EVALUATE_LOW_SCORE = 4;
43 const EVALUATE_NOT_ENOUGH_DATA = 8;
44
369389c9
DM
45 const ANALYSE_REJECTED_RANGE_PROCESSOR = 4;
46 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
47 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
48
49 const MIN_SCORE = 0.7;
50 const ACCEPTED_DEVIATION = 0.05;
51 const EVALUATION_ITERATIONS = 10;
52
53 /**
54 * @var \stdClass
55 */
56 protected $model = null;
57
58 /**
59 * @var \core_analytics\local\analyser\base
60 */
61 protected $analyser = null;
62
63 /**
64 * @var \core_analytics\local\target\base
65 */
66 protected $target = null;
67
68 /**
69 * @var \core_analytics\local\indicator\base[]
70 */
71 protected $indicators = null;
72
73 /**
74 * Unique Model id created from site info and last model modification.
75 *
76 * @var string
77 */
78 protected $uniqueid = null;
79
80 /**
81 * __construct
82 *
83 * @param int|stdClass $model
84 * @return void
85 */
86 public function __construct($model) {
87 global $DB;
88
89 if (is_scalar($model)) {
1611308b 90 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
91 if (!$model) {
92 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
93 }
369389c9
DM
94 }
95 $this->model = $model;
96 }
97
98 /**
99 * get_id
100 *
101 * @return int
102 */
103 public function get_id() {
104 return $this->model->id;
105 }
106
107 /**
108 * get_model_obj
109 *
110 * @return \stdClass
111 */
112 public function get_model_obj() {
113 return $this->model;
114 }
115
116 /**
117 * get_target
118 *
119 * @return \core_analytics\local\target\base
120 */
121 public function get_target() {
122 if ($this->target !== null) {
123 return $this->target;
124 }
125 $instance = \core_analytics\manager::get_target($this->model->target);
126 $this->target = $instance;
127
128 return $this->target;
129 }
130
131 /**
132 * get_indicators
133 *
134 * @return \core_analytics\local\indicator\base[]
135 */
136 public function get_indicators() {
137 if ($this->indicators !== null) {
138 return $this->indicators;
139 }
140
141 $fullclassnames = json_decode($this->model->indicators);
142
143 if (!is_array($fullclassnames)) {
144 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
145 }
146
147 $this->indicators = array();
148 foreach ($fullclassnames as $fullclassname) {
149 $instance = \core_analytics\manager::get_indicator($fullclassname);
150 if ($instance) {
151 $this->indicators[$fullclassname] = $instance;
152 } else {
153 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
154 }
155 }
156
157 return $this->indicators;
158 }
159
160 /**
161 * Returns the list of indicators that could potentially be used by the model target.
162 *
163 * It includes the indicators that are part of the model.
164 *
a40952d3 165 * @return \core_analytics\local\indicator\base[]
369389c9
DM
166 */
167 public function get_potential_indicators() {
168
169 $indicators = \core_analytics\manager::get_all_indicators();
170
171 if (empty($this->analyser)) {
172 $this->init_analyser(array('evaluation' => true));
173 }
174
175 foreach ($indicators as $classname => $indicator) {
176 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
177 unset($indicators[$classname]);
178 }
179 }
180 return $indicators;
181 }
182
183 /**
184 * get_analyser
185 *
186 * @return \core_analytics\local\analyser\base
187 */
188 public function get_analyser() {
189 if ($this->analyser !== null) {
190 return $this->analyser;
191 }
192
193 // Default initialisation with no options.
194 $this->init_analyser();
195
196 return $this->analyser;
197 }
198
199 /**
200 * init_analyser
201 *
202 * @param array $options
203 * @return void
204 */
205 protected function init_analyser($options = array()) {
206
207 $target = $this->get_target();
208 $indicators = $this->get_indicators();
209
210 if (empty($target)) {
211 throw new \moodle_exception('errornotarget', 'analytics');
212 }
213
214 if (!empty($options['evaluation'])) {
215 // The evaluation process will run using all available time splitting methods unless one is specified.
216 if (!empty($options['timesplitting'])) {
217 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
218 $timesplittings = array($timesplitting->get_id() => $timesplitting);
219 } else {
220 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
221 }
222 } else {
223
224 if (empty($this->model->timesplitting)) {
225 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
226 }
227
228 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
229 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
230 }
231
232 if (empty($timesplittings)) {
233 throw new \moodle_exception('errornotimesplittings', 'analytics');
234 }
235
236 $classname = $target->get_analyser_class();
237 if (!class_exists($classname)) {
238 throw \coding_exception($classname . ' class does not exists');
239 }
240
241 // Returns a \core_analytics\local\analyser\base class.
242 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
243 }
244
245 /**
246 * get_time_splitting
247 *
248 * @return \core_analytics\local\time_splitting\base
249 */
250 public function get_time_splitting() {
251 if (empty($this->model->timesplitting)) {
252 return false;
253 }
254 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
255 }
256
257 /**
a40952d3 258 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
259 *
260 * @param \core_analytics\local\target\base $target
261 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 262 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
263 * @return \core_analytics\model
264 */
a40952d3 265 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
266 global $USER, $DB;
267
1611308b
DM
268 \core_analytics\manager::check_can_manage_models();
269
369389c9
DM
270 $indicatorclasses = self::indicator_classes($indicators);
271
272 $now = time();
273
274 $modelobj = new \stdClass();
b0c24929 275 $modelobj->target = $target->get_id();
369389c9
DM
276 $modelobj->indicators = json_encode($indicatorclasses);
277 $modelobj->version = $now;
278 $modelobj->timecreated = $now;
279 $modelobj->timemodified = $now;
280 $modelobj->usermodified = $USER->id;
281
282 $id = $DB->insert_record('analytics_models', $modelobj);
283
284 // Get db defaults.
285 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
286
a40952d3
DM
287 $model = new static($modelobj);
288
289 if ($timesplittingid) {
290 $model->enable($timesplittingid);
291 }
292
293 if ($model->is_static()) {
294 $model->mark_as_trained();
295 }
296
297 return $model;
369389c9
DM
298 }
299
a40952d3
DM
300 /**
301 * update
302 *
303 * @param int|bool $enabled
304 * @param \core_analytics\local\indicator\base[] $indicators
305 * @param string $timesplittingid
306 * @return void
307 */
308 public function update($enabled, $indicators, $timesplittingid = '') {
369389c9
DM
309 global $USER, $DB;
310
1611308b
DM
311 \core_analytics\manager::check_can_manage_models();
312
369389c9
DM
313 $now = time();
314
315 $indicatorclasses = self::indicator_classes($indicators);
316
317 $indicatorsstr = json_encode($indicatorclasses);
a40952d3 318 if ($this->model->timesplitting !== $timesplittingid ||
369389c9
DM
319 $this->model->indicators !== $indicatorsstr) {
320 // We update the version of the model so different time splittings are not mixed up.
321 $this->model->version = $now;
322
323 // Delete generated predictions.
324 $this->clear_model();
325
326 // Purge all generated files.
327 \core_analytics\dataset_manager::clear_model_files($this->model->id);
328
329 // Reset trained flag.
330 $this->model->trained = 0;
331 }
a40952d3 332 $this->model->enabled = intval($enabled);
369389c9 333 $this->model->indicators = $indicatorsstr;
a40952d3 334 $this->model->timesplitting = $timesplittingid;
369389c9
DM
335 $this->model->timemodified = $now;
336 $this->model->usermodified = $USER->id;
337
338 $DB->update_record('analytics_models', $this->model);
339
340 // It needs to be reset (just in case, we may already used it).
341 $this->uniqueid = null;
342 }
343
d16cf374
DM
344 /**
345 * Removes the model.
346 *
347 * @return void
348 */
d8327b60 349 public function delete() {
d16cf374 350 global $DB;
1611308b
DM
351
352 \core_analytics\manager::check_can_manage_models();
353
d16cf374 354 $this->clear_model();
d8327b60 355 $DB->delete_records('analytics_models', array('id' => $this->model->id));
d16cf374
DM
356 }
357
369389c9
DM
358 /**
359 * Evaluates the model datasets.
360 *
361 * Model datasets should already be available in Moodle's filesystem.
362 *
363 * @param array $options
364 * @return \stdClass[]
365 */
366 public function evaluate($options = array()) {
367
1611308b
DM
368 \core_analytics\manager::check_can_manage_models();
369
a40952d3
DM
370 if ($this->is_static()) {
371 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
372 $result = new \stdClass();
373 $result->status = self::OK;
374 return $result;
375 }
376
369389c9
DM
377 $options['evaluation'] = true;
378 $this->init_analyser($options);
379
380 if (empty($this->get_indicators())) {
381 throw new \moodle_exception('errornoindicators', 'analytics');
382 }
383
1611308b
DM
384 $this->heavy_duty_mode();
385
369389c9
DM
386 // Before get_labelled_data call so we get an early exception if it is not ready.
387 $predictor = \core_analytics\manager::get_predictions_processor();
388
389 $datasets = $this->get_analyser()->get_labelled_data();
390
391 // No datasets generated.
392 if (empty($datasets)) {
393 $result = new \stdClass();
394 $result->status = self::NO_DATASET;
395 $result->info = $this->get_analyser()->get_logs();
396 return array($result);
397 }
398
399 if (!PHPUNIT_TEST && CLI_SCRIPT) {
400 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
401 }
402
403 $results = array();
404 foreach ($datasets as $timesplittingid => $dataset) {
405
406 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
407
408 $result = new \stdClass();
409
410 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
411 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
412
413 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
414 $predictorresult = $predictor->evaluate($this->model->id, self::ACCEPTED_DEVIATION,
415 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
416
417 $result->status = $predictorresult->status;
418 $result->info = $predictorresult->info;
419
420 if (isset($predictorresult->score)) {
421 $result->score = $predictorresult->score;
422 } else {
423 // Prediction processors may return an error, default to 0 score in that case.
424 $result->score = 0;
425 }
426
427 $dir = false;
428 if (!empty($predictorresult->dir)) {
429 $dir = $predictorresult->dir;
430 }
431
432 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
433
434 $results[$timesplitting->get_id()] = $result;
435 }
436
437 return $results;
438 }
439
440 /**
441 * train
442 *
443 * @return \stdClass
444 */
445 public function train() {
446 global $DB;
447
1611308b
DM
448 \core_analytics\manager::check_can_manage_models();
449
a40952d3
DM
450 if ($this->is_static()) {
451 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
452 $result = new \stdClass();
453 $result->status = self::OK;
454 return $result;
455 }
456
a40952d3 457 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
458 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
459 }
460
461 if (empty($this->get_indicators())) {
462 throw new \moodle_exception('errornoindicators', 'analytics');
463 }
464
1611308b
DM
465 $this->heavy_duty_mode();
466
369389c9
DM
467 // Before get_labelled_data call so we get an early exception if it is not writable.
468 $outputdir = $this->get_output_dir(array('execution'));
469
470 // Before get_labelled_data call so we get an early exception if it is not ready.
471 $predictor = \core_analytics\manager::get_predictions_processor();
472
473 $datasets = $this->get_analyser()->get_labelled_data();
474
475 // No training if no files have been provided.
476 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
477
478 $result = new \stdClass();
479 $result->status = self::NO_DATASET;
480 $result->info = $this->get_analyser()->get_logs();
481 return $result;
482 }
483 $samplesfile = $datasets[$this->model->timesplitting];
484
485 // Train using the dataset.
486 $predictorresult = $predictor->train($this->get_unique_id(), $samplesfile, $outputdir);
487
488 $result = new \stdClass();
489 $result->status = $predictorresult->status;
490 $result->info = $predictorresult->info;
491
492 $this->flag_file_as_used($samplesfile, 'trained');
493
494 // Mark the model as trained if it wasn't.
495 if ($this->model->trained == false) {
496 $this->mark_as_trained();
497 }
498
499 return $result;
500 }
501
502 /**
503 * predict
504 *
505 * @return \stdClass
506 */
507 public function predict() {
508 global $DB;
509
1611308b 510 \core_analytics\manager::check_can_manage_models();
369389c9 511
a40952d3 512 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
513 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
514 }
515
516 if (empty($this->get_indicators())) {
517 throw new \moodle_exception('errornoindicators', 'analytics');
518 }
519
1611308b
DM
520 $this->heavy_duty_mode();
521
369389c9
DM
522 // Before get_unlabelled_data call so we get an early exception if it is not writable.
523 $outputdir = $this->get_output_dir(array('execution'));
524
525 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
526 if (!$this->is_static()) {
527 $predictor = \core_analytics\manager::get_predictions_processor();
528 }
369389c9
DM
529
530 $samplesdata = $this->get_analyser()->get_unlabelled_data();
531
532 // Get the prediction samples file.
533 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
534
535 $result = new \stdClass();
536 $result->status = self::NO_DATASET;
537 $result->info = $this->get_analyser()->get_logs();
538 return $result;
539 }
540 $samplesfile = $samplesdata[$this->model->timesplitting];
541
542 // We need to throw an exception if we are trying to predict stuff that was already predicted.
543 $params = array('modelid' => $this->model->id, 'fileid' => $samplesfile->get_id(), 'action' => 'predicted');
544 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
545 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
546 }
547
a40952d3 548 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 549
a40952d3 550 // Prepare the results object.
369389c9 551 $result = new \stdClass();
369389c9 552
a40952d3
DM
553 if ($this->is_static()) {
554 // Prediction based on assumptions.
555 $result->status = \core_analytics\model::OK;
556 $result->info = [];
557 $result->predictions = $this->get_static_predictions($indicatorcalculations);
558
559 } else {
1611308b 560 // Prediction process runs on the machine learning backend.
a40952d3 561 $predictorresult = $predictor->predict($this->get_unique_id(), $samplesfile, $outputdir);
a40952d3
DM
562 $result->status = $predictorresult->status;
563 $result->info = $predictorresult->info;
1611308b
DM
564 $result->predictions = $this->format_predictor_predictions($predictorresult);
565 }
566
567 if ($result->predictions) {
568 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
569 }
570
571 if (!empty($samplecontexts) && $this->uses_insights()) {
572 $this->trigger_insights($samplecontexts);
573 }
574
575 $this->flag_file_as_used($samplesfile, 'predicted');
576
577 return $result;
578 }
579
580 /**
581 * Formats the predictor results.
582 *
583 * @param array $predictorresult
584 * @return array
585 */
586 private function format_predictor_predictions($predictorresult) {
587
588 $predictions = array();
589 if ($predictorresult->predictions) {
590 foreach ($predictorresult->predictions as $sampleinfo) {
591
592 // We parse each prediction
593 switch (count($sampleinfo)) {
594 case 1:
595 // For whatever reason the predictions processor could not process this sample, we
596 // skip it and do nothing with it.
597 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
598 $sampleinfo[0], DEBUG_DEVELOPER);
599 continue;
600 case 2:
601 // Prediction processors that do not return a prediction score will have the maximum prediction
602 // score.
603 list($uniquesampleid, $prediction) = $sampleinfo;
604 $predictionscore = 1;
605 break;
606 case 3:
607 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
608 break;
609 default:
610 break;
a40952d3 611 }
1611308b
DM
612 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
613 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
614 }
615 }
1611308b
DM
616 return $predictions;
617 }
618
619 /**
620 * Execute the prediction callbacks defined by the target.
621 *
622 * @param \stdClass[] $predictions
623 * @param array $predictions
624 * @return array
625 */
626 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
627
628 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
629 $samplecontexts = array();
630
1611308b 631 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 632
1611308b 633 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 634
1611308b
DM
635 // The unique sample id contains both the sampleid and the rangeindex.
636 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 637
1611308b
DM
638 // Store the predicted values.
639 $samplecontext = $this->save_prediction($sampleid, $rangeindex, $prediction->prediction, $prediction->predictionscore,
640 json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 641
1611308b
DM
642 // Also store all samples context to later generate insights or whatever action the target wants to perform.
643 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 644
1611308b
DM
645 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
646 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
647 }
648 }
649
1611308b
DM
650 return $samplecontexts;
651 }
369389c9 652
1611308b
DM
653 /**
654 * Generates insights and updates the cache.
655 *
656 * @param \context[] $samplecontexts
657 * @return void
658 */
659 protected function trigger_insights($samplecontexts) {
660
661 // Notify the target that all predictions have been processed.
662 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
663
664 // Update cache.
665 $cache = \cache::make('core', 'contextwithinsights');
666 foreach ($samplecontexts as $context) {
667 $modelids = $cache->get($context->id);
668 if (!$modelids) {
669 // The cache is empty, but we don't know if it is empty because there are no insights
670 // in this context or because cache/s have been purged, we need to be conservative and
671 // "pay" 1 db read to fill up the cache.
672 $models = \core_analytics\manager::get_models_with_insights($context);
673 $cache->set($context->id, array_keys($models));
674 } else if (!in_array($this->get_id(), $modelids)) {
675 array_push($modelids, $this->get_id());
676 $cache->set($context->id, $modelids);
369389c9
DM
677 }
678 }
369389c9
DM
679 }
680
a40952d3 681 /**
1611308b 682 * Get predictions from a static model.
a40952d3
DM
683 *
684 * @param array $indicatorcalculations
685 * @return \stdClass[]
686 */
687 protected function get_static_predictions(&$indicatorcalculations) {
688
689 // Group samples by analysable for \core_analytics\local\target::calculate.
690 $analysables = array();
691 // List all sampleids together.
692 $sampleids = array();
693
694 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
695 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
696
697 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
698 $analysableclass = get_class($analysable);
699 if (empty($analysables[$analysableclass])) {
700 $analysables[$analysableclass] = array();
701 }
702 if (empty($analysables[$analysableclass][$rangeindex])) {
703 $analysables[$analysableclass][$rangeindex] = (object)[
704 'analysable' => $analysable,
705 'indicatorsdata' => array(),
706 'sampleids' => array()
707 ];
708 }
709 // Using the sampleid as a key so we can easily merge indicators data later.
710 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
711 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
712 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
713
714 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
715 $sampleids[$sampleid] = $sampleid;
716 }
717
718 // Get all samples data.
719 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
720
721 // Calculate the targets.
722 $calculations = array();
723 foreach ($analysables as $analysableclass => $rangedata) {
724 foreach ($rangedata as $rangeindex => $data) {
725
726 // Attach samples data and calculated indicators data.
727 $this->get_target()->clear_sample_data();
728 $this->get_target()->add_sample_data($samplesdata);
729 $this->get_target()->add_sample_data($data->indicatorsdata);
730
1611308b 731 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 732 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 733 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
734 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
735
736 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
737 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
738 // by self::save_prediction.
739 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
740 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
741 if (!isset($calculations[$sampleid])) {
a40952d3
DM
742 return false;
743 }
744 return true;
745 }, ARRAY_FILTER_USE_BOTH);
746
747 foreach ($calculations as $sampleid => $value) {
748
749 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
750
751 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
752 if (is_null($calculations[$sampleid])) {
a40952d3
DM
753 unset($indicatorcalculations[$uniquesampleid]);
754 continue;
755 }
756
757 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
758 // true according to what the developer defined.
759 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
760 }
761 }
762 }
763 return $predictions;
764 }
765
369389c9
DM
766 /**
767 * save_prediction
768 *
769 * @param int $sampleid
770 * @param int $rangeindex
771 * @param int $prediction
772 * @param float $predictionscore
773 * @param string $calculations
774 * @return \context
775 */
776 protected function save_prediction($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
777 global $DB;
778
779 $context = $this->get_analyser()->sample_access_context($sampleid);
780
781 $record = new \stdClass();
782 $record->modelid = $this->model->id;
783 $record->contextid = $context->id;
784 $record->sampleid = $sampleid;
785 $record->rangeindex = $rangeindex;
786 $record->prediction = $prediction;
787 $record->predictionscore = $predictionscore;
788 $record->calculations = $calculations;
789 $record->timecreated = time();
790 $DB->insert_record('analytics_predictions', $record);
791
792 return $context;
793 }
794
795 /**
796 * enable
797 *
798 * @param string $timesplittingid
799 * @return void
800 */
801 public function enable($timesplittingid = false) {
802 global $DB;
803
1611308b
DM
804 \core_analytics\manager::check_can_manage_models();
805
369389c9
DM
806 $now = time();
807
808 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
809
810 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
811 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
812 }
813
814 if (substr($timesplittingid, 0, 1) !== '\\') {
815 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
816 }
817
818 $this->model->timesplitting = $timesplittingid;
819 $this->model->version = $now;
820 }
821 $this->model->enabled = 1;
822 $this->model->timemodified = $now;
823
824 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
825 $DB->update_record('analytics_models', $this->model);
826
827 // It needs to be reset (just in case, we may already used it).
828 $this->uniqueid = null;
829 }
830
a40952d3
DM
831 /**
832 * is_static
833 *
834 * @return bool
835 */
836 public function is_static() {
837 return (bool)$this->get_target()->based_on_assumptions();
838 }
839
369389c9
DM
840 /**
841 * is_enabled
842 *
843 * @return bool
844 */
845 public function is_enabled() {
846 return (bool)$this->model->enabled;
847 }
848
849 /**
850 * is_trained
851 *
852 * @return bool
853 */
854 public function is_trained() {
a40952d3
DM
855 // Models which targets are based on assumptions do not need training.
856 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
857 }
858
859 /**
860 * mark_as_trained
861 *
862 * @return void
863 */
864 public function mark_as_trained() {
865 global $DB;
866
1611308b
DM
867 \core_analytics\manager::check_can_manage_models();
868
369389c9
DM
869 $this->model->trained = 1;
870 $DB->update_record('analytics_models', $this->model);
871 }
872
873 /**
874 * get_predictions_contexts
875 *
876 * @return \stdClass[]
877 */
878 public function get_predictions_contexts() {
879 global $DB;
880
881 $sql = "SELECT DISTINCT contextid FROM {analytics_predictions} WHERE modelid = ?";
882 return $DB->get_records_sql($sql, array($this->model->id));
883 }
884
f9e7447f
DM
885 /**
886 * Has this model generated predictions?
887 *
888 * We don't check analytics_predictions table because targets have the ability to
889 * ignore some predicted values, if that is the case predictions are not even stored
890 * in db.
891 *
892 * @return bool
893 */
894 public function any_prediction_obtained() {
895 global $DB;
896 return $DB->record_exists('analytics_predict_ranges',
897 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
898 }
899
900 /**
901 * Whether this model generates insights or not (defined by the model's target).
902 *
903 * @return bool
904 */
905 public function uses_insights() {
906 $target = $this->get_target();
907 return $target::uses_insights();
908 }
909
369389c9
DM
910 /**
911 * Whether predictions exist for this context.
912 *
913 * @param \context $context
914 * @return bool
915 */
916 public function predictions_exist(\context $context) {
917 global $DB;
918
919 // Filters out previous predictions keeping only the last time range one.
920 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 921 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
922 return $DB->record_exists_select('analytics_predictions', $select, $params);
923 }
924
925 /**
926 * Gets the predictions for this context.
927 *
928 * @param \context $context
929 * @return \core_analytics\prediction[]
930 */
931 public function get_predictions(\context $context) {
932 global $DB;
933
1611308b
DM
934 \core_analytics\manager::check_can_list_insights($context);
935
369389c9
DM
936 // Filters out previous predictions keeping only the last time range one.
937 $sql = "SELECT tip.*
938 FROM {analytics_predictions} tip
939 JOIN (
940 SELECT sampleid, max(rangeindex) AS rangeindex
941 FROM {analytics_predictions}
942 WHERE modelid = ? and contextid = ?
943 GROUP BY sampleid
944 ) tipsub
945 ON tip.sampleid = tipsub.sampleid AND tip.rangeindex = tipsub.rangeindex
946 WHERE tip.modelid = ? and tip.contextid = ?";
947 $params = array($this->model->id, $context->id, $this->model->id, $context->id);
948 if (!$predictions = $DB->get_records_sql($sql, $params)) {
949 return array();
950 }
951
952 // Get predicted samples' ids.
953 $sampleids = array_map(function($prediction) {
954 return $prediction->sampleid;
955 }, $predictions);
956
957 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
958
959 // Add samples data as part of each prediction.
960 foreach ($predictions as $predictionid => $predictiondata) {
961
962 $sampleid = $predictiondata->sampleid;
963
964 // Filter out predictions which samples are not available anymore.
965 if (empty($samplesdata[$sampleid])) {
966 unset($predictions[$predictionid]);
967 continue;
968 }
969
970 // Replace stdClass object by \core_analytics\prediction objects.
971 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
972
973 $predictions[$predictionid] = $prediction;
974 }
975
976 return $predictions;
977 }
978
979 /**
1611308b 980 * Returns the sample data of a prediction.
369389c9
DM
981 *
982 * @param \stdClass $predictionobj
983 * @return array
984 */
985 public function prediction_sample_data($predictionobj) {
986
987 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
988
989 if (empty($samplesdata[$predictionobj->sampleid])) {
990 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
991 }
992
993 return $samplesdata[$predictionobj->sampleid];
994 }
995
996 /**
1611308b 997 * Returns the description of a sample
369389c9
DM
998 *
999 * @param \core_analytics\prediction $prediction
1000 * @return array 2 elements: list(string, \renderable)
1001 */
1002 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1003 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1004 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1005 }
1006
1007 /**
1008 * Returns the output directory for prediction processors.
1009 *
1010 * Directory structure as follows:
1011 * - Evaluation runs:
1012 * models/$model->id/$model->version/evaluation/$model->timesplitting
1013 * - Training & prediction runs:
1014 * models/$model->id/$model->version/execution
1015 *
1016 * @param array $subdirs
1017 * @return string
1018 */
1019 protected function get_output_dir($subdirs = array()) {
1020 global $CFG;
1021
1022 $subdirstr = '';
1023 foreach ($subdirs as $subdir) {
1024 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1025 }
1026
1027 $outputdir = get_config('analytics', 'modeloutputdir');
1028 if (empty($outputdir)) {
1029 // Apply default value.
1030 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1031 }
1032
1033 // Append model id and version + subdirs.
1034 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id . DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1035
1036 make_writable_directory($outputdir);
1037
1038 return $outputdir;
1039 }
1040
1041 /**
1042 * get_unique_id
1043 *
1044 * @return string
1045 */
1046 public function get_unique_id() {
1047 global $CFG;
1048
1049 if (!is_null($this->uniqueid)) {
1050 return $this->uniqueid;
1051 }
1052
1053 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1054 // that the model target and indicators were updated.
1055 $ids = array($CFG->wwwroot, $CFG->dirroot, $CFG->prefix, $this->model->id, $this->model->version);
1056 $this->uniqueid = sha1(implode('$$', $ids));
1057
1058 return $this->uniqueid;
1059 }
1060
1061 /**
1062 * Exports the model data.
1063 *
1064 * @return \stdClass
1065 */
1066 public function export() {
1611308b
DM
1067
1068 \core_analytics\manager::check_can_manage_models();
1069
369389c9
DM
1070 $data = clone $this->model;
1071 $data->target = $this->get_target()->get_name();
1072
1073 if ($timesplitting = $this->get_time_splitting()) {
1074 $data->timesplitting = $timesplitting->get_name();
1075 }
1076
1077 $data->indicators = array();
1078 foreach ($this->get_indicators() as $indicator) {
1079 $data->indicators[] = $indicator->get_name();
1080 }
1081 return $data;
1082 }
1083
584ffa4f
DM
1084 /**
1085 * Returns the model logs data.
1086 *
1087 * @param int $limitfrom
1088 * @param int $limitnum
1089 * @return \stdClass[]
1090 */
1091 public function get_logs($limitfrom = 0, $limitnum = 0) {
1092 global $DB;
1611308b
DM
1093
1094 \core_analytics\manager::check_can_manage_models();
1095
584ffa4f
DM
1096 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1097 $limitfrom, $limitnum);
1098 }
1099
369389c9
DM
1100 /**
1101 * flag_file_as_used
1102 *
1103 * @param \stored_file $file
1104 * @param string $action
1105 * @return void
1106 */
1107 protected function flag_file_as_used(\stored_file $file, $action) {
1108 global $DB;
1109
1110 $usedfile = new \stdClass();
1111 $usedfile->modelid = $this->model->id;
1112 $usedfile->fileid = $file->get_id();
1113 $usedfile->action = $action;
1114 $usedfile->time = time();
1115 $DB->insert_record('analytics_used_files', $usedfile);
1116 }
1117
1118 /**
1119 * log_result
1120 *
1121 * @param string $timesplittingid
1122 * @param float $score
1123 * @param string $dir
1124 * @param array $info
1125 * @return int The inserted log id
1126 */
1127 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1128 global $DB, $USER;
1129
1130 $log = new \stdClass();
1131 $log->modelid = $this->get_id();
1132 $log->version = $this->model->version;
1133 $log->target = $this->model->target;
1134 $log->indicators = $this->model->indicators;
1135 $log->timesplitting = $timesplittingid;
1136 $log->dir = $dir;
1137 if ($info) {
1138 // Ensure it is not an associative array.
1139 $log->info = json_encode(array_values($info));
1140 }
1141 $log->score = $score;
1142 $log->timecreated = time();
1143 $log->usermodified = $USER->id;
1144
1145 return $DB->insert_record('analytics_models_log', $log);
1146 }
1147
1148 /**
1149 * Utility method to return indicator class names from a list of indicator objects
1150 *
1151 * @param \core_analytics\local\indicator\base[] $indicators
1152 * @return string[]
1153 */
1154 private static function indicator_classes($indicators) {
1155
1156 // What we want to check and store are the indicator classes not the keys.
1157 $indicatorclasses = array();
1158 foreach ($indicators as $indicator) {
1159 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1160 if (!is_object($indicator) && !is_scalar($indicator)) {
1161 $indicator = strval($indicator);
1162 } else if (is_object($indicator)) {
1163 $indicator = get_class($indicator);
1164 }
1165 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1166 }
b0c24929 1167 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1168 }
1169
1170 return $indicatorclasses;
1171 }
1172
1173 /**
1174 * Clears the model training and prediction data.
1175 *
1176 * Executed after updating model critical elements like the time splitting method
1177 * or the indicators.
1178 *
1179 * @return void
1180 */
1181 private function clear_model() {
1182 global $DB;
1183
1184 $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
1185 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
1186 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1187 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
1188
1611308b
DM
1189 // We don't expect people to clear models regularly and the cost of filling the cache is
1190 // 1 db read per context.
1191 $cache = \cache::make('core', 'contextwithinsights');
369389c9
DM
1192 $result = $cache->purge();
1193 }
1194
1611308b
DM
1195 /**
1196 * Increases system memory and time limits.
1197 *
1198 * @return void
1199 */
1200 private function heavy_duty_mode() {
369389c9
DM
1201 if (ini_get('memory_limit') != -1) {
1202 raise_memory_limit(MEMORY_HUGE);
1203 }
1611308b 1204 \core_php_time_limit::raise();
369389c9 1205 }
369389c9 1206}