on-demand release 3.6dev+
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
325b3bdd 56 const LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
325b3bdd 61 const NOT_ENOUGH_DATA = 8;
369389c9 62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc 82
5c5cb3ee
DM
83 /**
84 * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
85 */
86 const PREDICTION_MIN_SCORE = 0.6;
87
413f19bc
DM
88 /**
89 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
90 */
369389c9 91 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
92
93 /**
94 * Number of evaluation repetitions.
95 */
369389c9
DM
96 const EVALUATION_ITERATIONS = 10;
97
98 /**
99 * @var \stdClass
100 */
101 protected $model = null;
102
103 /**
104 * @var \core_analytics\local\analyser\base
105 */
106 protected $analyser = null;
107
108 /**
109 * @var \core_analytics\local\target\base
110 */
111 protected $target = null;
112
113 /**
114 * @var \core_analytics\local\indicator\base[]
115 */
116 protected $indicators = null;
117
118 /**
119 * Unique Model id created from site info and last model modification.
120 *
121 * @var string
122 */
123 protected $uniqueid = null;
124
125 /**
1cc2b4ba 126 * Constructor.
369389c9 127 *
1cc2b4ba 128 * @param int|\stdClass $model
369389c9
DM
129 * @return void
130 */
131 public function __construct($model) {
132 global $DB;
133
134 if (is_scalar($model)) {
1611308b 135 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
136 if (!$model) {
137 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
138 }
369389c9
DM
139 }
140 $this->model = $model;
141 }
142
3a396286
DM
143 /**
144 * Quick safety check to discard site models which required components are not available anymore.
145 *
146 * @return bool
147 */
148 public function is_available() {
149 $target = $this->get_target();
150 if (!$target) {
151 return false;
152 }
3a396286
DM
153
154 $classname = $target->get_analyser_class();
155 if (!class_exists($classname)) {
156 return false;
157 }
158
159 return true;
160 }
161
369389c9 162 /**
1cc2b4ba 163 * Returns the model id.
369389c9
DM
164 *
165 * @return int
166 */
167 public function get_id() {
168 return $this->model->id;
169 }
170
171 /**
1cc2b4ba 172 * Returns a plain \stdClass with the model data.
369389c9
DM
173 *
174 * @return \stdClass
175 */
176 public function get_model_obj() {
177 return $this->model;
178 }
179
180 /**
1cc2b4ba 181 * Returns the model target.
369389c9
DM
182 *
183 * @return \core_analytics\local\target\base
184 */
185 public function get_target() {
186 if ($this->target !== null) {
187 return $this->target;
188 }
189 $instance = \core_analytics\manager::get_target($this->model->target);
190 $this->target = $instance;
191
192 return $this->target;
193 }
194
195 /**
1cc2b4ba 196 * Returns the model indicators.
369389c9
DM
197 *
198 * @return \core_analytics\local\indicator\base[]
199 */
200 public function get_indicators() {
201 if ($this->indicators !== null) {
202 return $this->indicators;
203 }
204
205 $fullclassnames = json_decode($this->model->indicators);
206
207 if (!is_array($fullclassnames)) {
208 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
209 }
210
211 $this->indicators = array();
212 foreach ($fullclassnames as $fullclassname) {
213 $instance = \core_analytics\manager::get_indicator($fullclassname);
214 if ($instance) {
215 $this->indicators[$fullclassname] = $instance;
216 } else {
217 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
218 }
219 }
220
221 return $this->indicators;
222 }
223
224 /**
225 * Returns the list of indicators that could potentially be used by the model target.
226 *
227 * It includes the indicators that are part of the model.
228 *
a40952d3 229 * @return \core_analytics\local\indicator\base[]
369389c9
DM
230 */
231 public function get_potential_indicators() {
232
233 $indicators = \core_analytics\manager::get_all_indicators();
234
235 if (empty($this->analyser)) {
236 $this->init_analyser(array('evaluation' => true));
237 }
238
239 foreach ($indicators as $classname => $indicator) {
240 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
241 unset($indicators[$classname]);
242 }
243 }
244 return $indicators;
245 }
246
247 /**
1cc2b4ba 248 * Returns the model analyser (defined by the model target).
369389c9 249 *
a8ccc5f2 250 * @param array $options Default initialisation with no options.
369389c9
DM
251 * @return \core_analytics\local\analyser\base
252 */
a8ccc5f2 253 public function get_analyser($options = array()) {
369389c9
DM
254 if ($this->analyser !== null) {
255 return $this->analyser;
256 }
257
a8ccc5f2 258 $this->init_analyser($options);
369389c9
DM
259
260 return $this->analyser;
261 }
262
263 /**
1cc2b4ba 264 * Initialises the model analyser.
369389c9 265 *
1cc2b4ba 266 * @throws \coding_exception
369389c9
DM
267 * @param array $options
268 * @return void
269 */
270 protected function init_analyser($options = array()) {
271
272 $target = $this->get_target();
273 $indicators = $this->get_indicators();
274
275 if (empty($target)) {
276 throw new \moodle_exception('errornotarget', 'analytics');
277 }
278
a8ccc5f2
DM
279 $timesplittings = array();
280 if (empty($options['notimesplitting'])) {
281 if (!empty($options['evaluation'])) {
282 // The evaluation process will run using all available time splitting methods unless one is specified.
283 if (!empty($options['timesplitting'])) {
284 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
285 $timesplittings = array($timesplitting->get_id() => $timesplitting);
286 } else {
287 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
288 }
369389c9 289 } else {
369389c9 290
a8ccc5f2
DM
291 if (empty($this->model->timesplitting)) {
292 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
293 }
369389c9 294
a8ccc5f2
DM
295 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
296 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
297 }
369389c9 298
a8ccc5f2
DM
299 if (empty($timesplittings)) {
300 throw new \moodle_exception('errornotimesplittings', 'analytics');
301 }
369389c9
DM
302 }
303
0690a271
DM
304 if (!empty($options['evaluation'])) {
305 foreach ($timesplittings as $timesplitting) {
306 $timesplitting->set_evaluating(true);
307 }
308 }
309
369389c9
DM
310 $classname = $target->get_analyser_class();
311 if (!class_exists($classname)) {
08015e18 312 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
313 }
314
315 // Returns a \core_analytics\local\analyser\base class.
316 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
317 }
318
319 /**
1cc2b4ba 320 * Returns the model time splitting method.
369389c9 321 *
1cc2b4ba 322 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
323 */
324 public function get_time_splitting() {
325 if (empty($this->model->timesplitting)) {
326 return false;
327 }
328 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
329 }
330
331 /**
a40952d3 332 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
333 *
334 * @param \core_analytics\local\target\base $target
335 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 336 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
337 * @return \core_analytics\model
338 */
a40952d3 339 public static function create(\core_analytics\local\target\base $target, array $indicators, $timesplittingid = false) {
369389c9
DM
340 global $USER, $DB;
341
1611308b
DM
342 \core_analytics\manager::check_can_manage_models();
343
369389c9
DM
344 $indicatorclasses = self::indicator_classes($indicators);
345
346 $now = time();
347
348 $modelobj = new \stdClass();
b0c24929 349 $modelobj->target = $target->get_id();
369389c9
DM
350 $modelobj->indicators = json_encode($indicatorclasses);
351 $modelobj->version = $now;
352 $modelobj->timecreated = $now;
353 $modelobj->timemodified = $now;
354 $modelobj->usermodified = $USER->id;
355
356 $id = $DB->insert_record('analytics_models', $modelobj);
357
358 // Get db defaults.
359 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
360
a40952d3
DM
361 $model = new static($modelobj);
362
363 if ($timesplittingid) {
364 $model->enable($timesplittingid);
365 }
366
367 if ($model->is_static()) {
368 $model->mark_as_trained();
369 }
370
371 return $model;
369389c9
DM
372 }
373
e709e544
DM
374 /**
375 * Does this model exist?
376 *
377 * If no indicators are provided it considers any model with the provided
378 * target a match.
379 *
380 * @param \core_analytics\local\target\base $target
381 * @param \core_analytics\local\indicator\base[]|false $indicators
382 * @return bool
383 */
384 public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
385 global $DB;
386
387 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
388
389 if (!$indicators && $existingmodels) {
390 return true;
391 }
392
393 $indicatorids = array_keys($indicators);
394 sort($indicatorids);
395
396 foreach ($existingmodels as $modelobj) {
397 $model = new \core_analytics\model($modelobj);
398 $modelindicatorids = array_keys($model->get_indicators());
399 sort($modelindicatorids);
400
401 if ($indicatorids === $modelindicatorids) {
402 return true;
403 }
404 }
405 return false;
406 }
407
a40952d3 408 /**
1cc2b4ba 409 * Updates the model.
a40952d3
DM
410 *
411 * @param int|bool $enabled
5c140ac4
DM
412 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
413 * @param string|false $timesplittingid False to respect current time splitting method
a40952d3
DM
414 * @return void
415 */
5c140ac4 416 public function update($enabled, $indicators = false, $timesplittingid = '') {
369389c9
DM
417 global $USER, $DB;
418
1611308b
DM
419 \core_analytics\manager::check_can_manage_models();
420
369389c9
DM
421 $now = time();
422
5c140ac4
DM
423 if ($indicators !== false) {
424 $indicatorclasses = self::indicator_classes($indicators);
425 $indicatorsstr = json_encode($indicatorclasses);
426 } else {
427 // Respect current value.
428 $indicatorsstr = $this->model->indicators;
429 }
430
431 if ($timesplittingid === false) {
432 // Respect current value.
433 $timesplittingid = $this->model->timesplitting;
434 }
369389c9 435
a40952d3 436 if ($this->model->timesplitting !== $timesplittingid ||
369389c9 437 $this->model->indicators !== $indicatorsstr) {
369389c9 438
abafbc84 439 // Delete generated predictions before changing the model version.
325b3bdd 440 $this->clear();
369389c9 441
abafbc84
DM
442 // It needs to be reset as the version changes.
443 $this->uniqueid = null;
444
445 // We update the version of the model so different time splittings are not mixed up.
446 $this->model->version = $now;
447
369389c9 448 // Reset trained flag.
abafbc84
DM
449 if (!$this->is_static()) {
450 $this->model->trained = 0;
451 }
3e0f33aa
DM
452
453 } else if ($this->model->enabled != $enabled) {
454 // We purge the cached contexts with insights as some will not be visible anymore.
455 $this->purge_insights_cache();
369389c9 456 }
3e0f33aa 457
a40952d3 458 $this->model->enabled = intval($enabled);
369389c9 459 $this->model->indicators = $indicatorsstr;
a40952d3 460 $this->model->timesplitting = $timesplittingid;
369389c9
DM
461 $this->model->timemodified = $now;
462 $this->model->usermodified = $USER->id;
463
464 $DB->update_record('analytics_models', $this->model);
369389c9
DM
465 }
466
d16cf374
DM
467 /**
468 * Removes the model.
469 *
470 * @return void
471 */
d8327b60 472 public function delete() {
d16cf374 473 global $DB;
1611308b
DM
474
475 \core_analytics\manager::check_can_manage_models();
476
325b3bdd 477 $this->clear();
abafbc84 478
325b3bdd 479 // Method self::clear is already clearing the current model version.
abafbc84
DM
480 $predictor = \core_analytics\manager::get_predictions_processor();
481 $predictor->delete_output_dir($this->get_output_dir(array(), true));
482
d8327b60 483 $DB->delete_records('analytics_models', array('id' => $this->model->id));
99b84a26 484 $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
d16cf374
DM
485 }
486
369389c9 487 /**
1cc2b4ba 488 * Evaluates the model.
369389c9 489 *
1cc2b4ba
DM
490 * This method gets the site contents (through the analyser) creates a .csv dataset
491 * with them and evaluates the model prediction accuracy multiple times using the
492 * machine learning backend. It returns an object where the model score is the average
493 * prediction accuracy of all executed evaluations.
369389c9
DM
494 *
495 * @param array $options
496 * @return \stdClass[]
497 */
498 public function evaluate($options = array()) {
499
1611308b
DM
500 \core_analytics\manager::check_can_manage_models();
501
a40952d3
DM
502 if ($this->is_static()) {
503 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
504 $result = new \stdClass();
cbf4c391
DM
505 $result->status = self::NO_DATASET;
506 return array($this->get_time_splitting()->get_id() => $result);
a40952d3
DM
507 }
508
369389c9
DM
509 $options['evaluation'] = true;
510 $this->init_analyser($options);
511
512 if (empty($this->get_indicators())) {
513 throw new \moodle_exception('errornoindicators', 'analytics');
514 }
515
1611308b
DM
516 $this->heavy_duty_mode();
517
369389c9
DM
518 // Before get_labelled_data call so we get an early exception if it is not ready.
519 $predictor = \core_analytics\manager::get_predictions_processor();
520
521 $datasets = $this->get_analyser()->get_labelled_data();
522
523 // No datasets generated.
524 if (empty($datasets)) {
525 $result = new \stdClass();
526 $result->status = self::NO_DATASET;
527 $result->info = $this->get_analyser()->get_logs();
528 return array($result);
529 }
530
531 if (!PHPUNIT_TEST && CLI_SCRIPT) {
532 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
533 }
534
535 $results = array();
536 foreach ($datasets as $timesplittingid => $dataset) {
537
538 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
539
540 $result = new \stdClass();
541
542 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
543 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
544
545 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
5c5cb3ee
DM
546 if ($this->get_target()->is_linear()) {
547 $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
548 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
549 } else {
550 $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
369389c9 551 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
5c5cb3ee 552 }
369389c9
DM
553
554 $result->status = $predictorresult->status;
555 $result->info = $predictorresult->info;
556
557 if (isset($predictorresult->score)) {
558 $result->score = $predictorresult->score;
559 } else {
560 // Prediction processors may return an error, default to 0 score in that case.
561 $result->score = 0;
562 }
563
564 $dir = false;
565 if (!empty($predictorresult->dir)) {
566 $dir = $predictorresult->dir;
567 }
568
569 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
570
571 $results[$timesplitting->get_id()] = $result;
572 }
573
574 return $results;
575 }
576
577 /**
1cc2b4ba
DM
578 * Trains the model using the site contents.
579 *
580 * This method prepares a dataset from the site contents (through the analyser)
581 * and passes it to the machine learning backends. Static models are skipped as
582 * they do not require training.
369389c9
DM
583 *
584 * @return \stdClass
585 */
586 public function train() {
369389c9 587
1611308b
DM
588 \core_analytics\manager::check_can_manage_models();
589
a40952d3
DM
590 if ($this->is_static()) {
591 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
592 $result = new \stdClass();
593 $result->status = self::OK;
594 return $result;
595 }
596
a40952d3 597 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
598 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
599 }
600
601 if (empty($this->get_indicators())) {
602 throw new \moodle_exception('errornoindicators', 'analytics');
603 }
604
1611308b
DM
605 $this->heavy_duty_mode();
606
369389c9
DM
607 // Before get_labelled_data call so we get an early exception if it is not writable.
608 $outputdir = $this->get_output_dir(array('execution'));
609
610 // Before get_labelled_data call so we get an early exception if it is not ready.
611 $predictor = \core_analytics\manager::get_predictions_processor();
612
613 $datasets = $this->get_analyser()->get_labelled_data();
614
615 // No training if no files have been provided.
616 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
617
618 $result = new \stdClass();
619 $result->status = self::NO_DATASET;
620 $result->info = $this->get_analyser()->get_logs();
621 return $result;
622 }
623 $samplesfile = $datasets[$this->model->timesplitting];
624
625 // Train using the dataset.
5c5cb3ee
DM
626 if ($this->get_target()->is_linear()) {
627 $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
628 } else {
629 $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
630 }
369389c9
DM
631
632 $result = new \stdClass();
633 $result->status = $predictorresult->status;
634 $result->info = $predictorresult->info;
635
325b3bdd
DM
636 if ($result->status !== self::OK) {
637 return $result;
638 }
639
369389c9
DM
640 $this->flag_file_as_used($samplesfile, 'trained');
641
642 // Mark the model as trained if it wasn't.
643 if ($this->model->trained == false) {
644 $this->mark_as_trained();
645 }
646
647 return $result;
648 }
649
650 /**
1cc2b4ba
DM
651 * Get predictions from the site contents.
652 *
653 * It analyses the site contents (through analyser classes) looking for samples
654 * ready to receive predictions. It generates a dataset with all samples ready to
655 * get predictions and it passes it to the machine learning backends or to the
656 * targets based on assumptions to get the predictions.
369389c9
DM
657 *
658 * @return \stdClass
659 */
660 public function predict() {
661 global $DB;
662
1611308b 663 \core_analytics\manager::check_can_manage_models();
369389c9 664
a40952d3 665 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
666 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
667 }
668
669 if (empty($this->get_indicators())) {
670 throw new \moodle_exception('errornoindicators', 'analytics');
671 }
672
1611308b
DM
673 $this->heavy_duty_mode();
674
369389c9
DM
675 // Before get_unlabelled_data call so we get an early exception if it is not writable.
676 $outputdir = $this->get_output_dir(array('execution'));
677
678 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3
DM
679 if (!$this->is_static()) {
680 $predictor = \core_analytics\manager::get_predictions_processor();
681 }
369389c9
DM
682
683 $samplesdata = $this->get_analyser()->get_unlabelled_data();
684
685 // Get the prediction samples file.
686 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
687
688 $result = new \stdClass();
689 $result->status = self::NO_DATASET;
690 $result->info = $this->get_analyser()->get_logs();
691 return $result;
692 }
693 $samplesfile = $samplesdata[$this->model->timesplitting];
694
695 // We need to throw an exception if we are trying to predict stuff that was already predicted.
2dca1339 696 $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
369389c9
DM
697 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
698 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
699 }
700
a40952d3 701 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 702
a40952d3 703 // Prepare the results object.
369389c9 704 $result = new \stdClass();
369389c9 705
a40952d3
DM
706 if ($this->is_static()) {
707 // Prediction based on assumptions.
413f19bc 708 $result->status = self::OK;
a40952d3
DM
709 $result->info = [];
710 $result->predictions = $this->get_static_predictions($indicatorcalculations);
711
712 } else {
5c5cb3ee
DM
713 // Estimation and classification processes run on the machine learning backend side.
714 if ($this->get_target()->is_linear()) {
715 $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
716 } else {
717 $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
718 }
a40952d3
DM
719 $result->status = $predictorresult->status;
720 $result->info = $predictorresult->info;
1611308b
DM
721 $result->predictions = $this->format_predictor_predictions($predictorresult);
722 }
723
325b3bdd
DM
724 if ($result->status !== self::OK) {
725 return $result;
726 }
727
1611308b
DM
728 if ($result->predictions) {
729 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
730 }
731
732 if (!empty($samplecontexts) && $this->uses_insights()) {
733 $this->trigger_insights($samplecontexts);
734 }
735
736 $this->flag_file_as_used($samplesfile, 'predicted');
737
738 return $result;
739 }
740
741 /**
742 * Formats the predictor results.
743 *
744 * @param array $predictorresult
745 * @return array
746 */
747 private function format_predictor_predictions($predictorresult) {
748
749 $predictions = array();
0af2421a 750 if (!empty($predictorresult->predictions)) {
1611308b
DM
751 foreach ($predictorresult->predictions as $sampleinfo) {
752
413f19bc 753 // We parse each prediction.
1611308b
DM
754 switch (count($sampleinfo)) {
755 case 1:
756 // For whatever reason the predictions processor could not process this sample, we
757 // skip it and do nothing with it.
758 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
759 $sampleinfo[0], DEBUG_DEVELOPER);
760 continue;
761 case 2:
762 // Prediction processors that do not return a prediction score will have the maximum prediction
763 // score.
764 list($uniquesampleid, $prediction) = $sampleinfo;
765 $predictionscore = 1;
766 break;
767 case 3:
768 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
769 break;
770 default:
771 break;
a40952d3 772 }
1611308b
DM
773 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
774 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
775 }
776 }
1611308b
DM
777 return $predictions;
778 }
779
780 /**
781 * Execute the prediction callbacks defined by the target.
782 *
783 * @param \stdClass[] $predictions
413f19bc 784 * @param array $indicatorcalculations
1611308b
DM
785 * @return array
786 */
787 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
788
789 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
790 $samplecontexts = array();
325b3bdd 791 $records = array();
369389c9 792
1611308b 793 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 794
325b3bdd
DM
795 // The unique sample id contains both the sampleid and the rangeindex.
796 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 797
325b3bdd 798 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 799
325b3bdd 800 // Prepare the record to store the predicted values.
cab7abec 801 list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
413f19bc 802 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 803
cab7abec
DM
804 // We will later bulk-insert them all.
805 $records[$uniquesampleid] = $record;
806
1611308b
DM
807 // Also store all samples context to later generate insights or whatever action the target wants to perform.
808 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 809
1611308b
DM
810 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
811 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
812 }
813 }
814
0927604f
DM
815 if (!empty($records)) {
816 $this->save_predictions($records);
817 }
cab7abec 818
1611308b
DM
819 return $samplecontexts;
820 }
369389c9 821
1611308b
DM
822 /**
823 * Generates insights and updates the cache.
824 *
825 * @param \context[] $samplecontexts
826 * @return void
827 */
828 protected function trigger_insights($samplecontexts) {
829
830 // Notify the target that all predictions have been processed.
831 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
832
833 // Update cache.
834 $cache = \cache::make('core', 'contextwithinsights');
835 foreach ($samplecontexts as $context) {
836 $modelids = $cache->get($context->id);
837 if (!$modelids) {
838 // The cache is empty, but we don't know if it is empty because there are no insights
839 // in this context or because cache/s have been purged, we need to be conservative and
840 // "pay" 1 db read to fill up the cache.
841 $models = \core_analytics\manager::get_models_with_insights($context);
842 $cache->set($context->id, array_keys($models));
843 } else if (!in_array($this->get_id(), $modelids)) {
844 array_push($modelids, $this->get_id());
845 $cache->set($context->id, $modelids);
369389c9
DM
846 }
847 }
369389c9
DM
848 }
849
a40952d3 850 /**
1611308b 851 * Get predictions from a static model.
a40952d3
DM
852 *
853 * @param array $indicatorcalculations
854 * @return \stdClass[]
855 */
856 protected function get_static_predictions(&$indicatorcalculations) {
857
858 // Group samples by analysable for \core_analytics\local\target::calculate.
859 $analysables = array();
860 // List all sampleids together.
861 $sampleids = array();
862
863 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
864 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
865
866 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
867 $analysableclass = get_class($analysable);
868 if (empty($analysables[$analysableclass])) {
869 $analysables[$analysableclass] = array();
870 }
871 if (empty($analysables[$analysableclass][$rangeindex])) {
872 $analysables[$analysableclass][$rangeindex] = (object)[
873 'analysable' => $analysable,
874 'indicatorsdata' => array(),
875 'sampleids' => array()
876 ];
877 }
878 // Using the sampleid as a key so we can easily merge indicators data later.
879 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
880 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
881 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
882
883 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
884 $sampleids[$sampleid] = $sampleid;
885 }
886
887 // Get all samples data.
888 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
889
890 // Calculate the targets.
1cc2b4ba 891 $predictions = array();
a40952d3
DM
892 foreach ($analysables as $analysableclass => $rangedata) {
893 foreach ($rangedata as $rangeindex => $data) {
894
895 // Attach samples data and calculated indicators data.
896 $this->get_target()->clear_sample_data();
897 $this->get_target()->add_sample_data($samplesdata);
898 $this->get_target()->add_sample_data($data->indicatorsdata);
899
1611308b 900 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 901 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 902 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
903 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
904
905 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
906 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
907 // by self::save_prediction.
908 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
909 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
910 if (!isset($calculations[$sampleid])) {
a40952d3
DM
911 return false;
912 }
913 return true;
914 }, ARRAY_FILTER_USE_BOTH);
915
916 foreach ($calculations as $sampleid => $value) {
917
918 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
919
920 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
921 if (is_null($calculations[$sampleid])) {
a40952d3
DM
922 unset($indicatorcalculations[$uniquesampleid]);
923 continue;
924 }
925
926 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
927 // true according to what the developer defined.
928 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
929 }
930 }
931 }
932 return $predictions;
933 }
934
369389c9 935 /**
1cc2b4ba 936 * Stores the prediction in the database.
369389c9
DM
937 *
938 * @param int $sampleid
939 * @param int $rangeindex
940 * @param int $prediction
941 * @param float $predictionscore
942 * @param string $calculations
943 * @return \context
944 */
cab7abec 945 protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
369389c9
DM
946 $context = $this->get_analyser()->sample_access_context($sampleid);
947
948 $record = new \stdClass();
949 $record->modelid = $this->model->id;
950 $record->contextid = $context->id;
951 $record->sampleid = $sampleid;
952 $record->rangeindex = $rangeindex;
953 $record->prediction = $prediction;
954 $record->predictionscore = $predictionscore;
955 $record->calculations = $calculations;
956 $record->timecreated = time();
369389c9 957
f87174dc
MN
958 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
959 $timesplitting = $this->get_time_splitting();
960 $timesplitting->set_analysable($analysable);
961 $range = $timesplitting->get_range_by_index($rangeindex);
962 if ($range) {
963 $record->timestart = $range['start'];
964 $record->timeend = $range['end'];
965 }
966
cab7abec
DM
967 return array($record, $context);
968 }
969
970 /**
971 * Save the prediction objects.
972 *
973 * @param \stdClass[] $records
974 */
975 protected function save_predictions($records) {
976 global $DB;
977 $DB->insert_records('analytics_predictions', $records);
369389c9
DM
978 }
979
980 /**
1cc2b4ba 981 * Enabled the model using the provided time splitting method.
369389c9 982 *
5c140ac4 983 * @param string|false $timesplittingid False to respect the current time splitting method.
369389c9
DM
984 * @return void
985 */
986 public function enable($timesplittingid = false) {
0af2421a 987 global $DB, $USER;
369389c9 988
1611308b
DM
989 \core_analytics\manager::check_can_manage_models();
990
369389c9
DM
991 $now = time();
992
993 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
994
995 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
996 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
997 }
998
999 if (substr($timesplittingid, 0, 1) !== '\\') {
1000 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1001 }
1002
abafbc84 1003 // Delete generated predictions before changing the model version.
325b3bdd 1004 $this->clear();
abafbc84
DM
1005
1006 // It needs to be reset as the version changes.
1007 $this->uniqueid = null;
1008
369389c9
DM
1009 $this->model->timesplitting = $timesplittingid;
1010 $this->model->version = $now;
abafbc84
DM
1011
1012 // Reset trained flag.
1013 if (!$this->is_static()) {
1014 $this->model->trained = 0;
1015 }
fabe98ac
AA
1016 } else if (empty($this->model->timesplitting)) {
1017 // A valid timesplitting method needs to be supplied before a model can be enabled.
1018 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1019
369389c9 1020 }
3e0f33aa
DM
1021
1022 // Purge pages with insights as this may change things.
abafbc84 1023 if ($this->model->enabled != 1) {
3e0f33aa
DM
1024 $this->purge_insights_cache();
1025 }
1026
369389c9
DM
1027 $this->model->enabled = 1;
1028 $this->model->timemodified = $now;
0af2421a 1029 $this->model->usermodified = $USER->id;
369389c9
DM
1030
1031 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1032 $DB->update_record('analytics_models', $this->model);
369389c9
DM
1033 }
1034
a40952d3 1035 /**
1cc2b4ba
DM
1036 * Is this a static model (as defined by the target)?.
1037 *
1038 * Static models are based on assumptions instead of in machine learning
1039 * backends results.
a40952d3
DM
1040 *
1041 * @return bool
1042 */
1043 public function is_static() {
1044 return (bool)$this->get_target()->based_on_assumptions();
1045 }
1046
369389c9 1047 /**
1cc2b4ba 1048 * Is this model enabled?
369389c9
DM
1049 *
1050 * @return bool
1051 */
1052 public function is_enabled() {
1053 return (bool)$this->model->enabled;
1054 }
1055
1056 /**
1cc2b4ba 1057 * Is this model already trained?
369389c9
DM
1058 *
1059 * @return bool
1060 */
1061 public function is_trained() {
a40952d3
DM
1062 // Models which targets are based on assumptions do not need training.
1063 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
1064 }
1065
1066 /**
1cc2b4ba 1067 * Marks the model as trained
369389c9
DM
1068 *
1069 * @return void
1070 */
1071 public function mark_as_trained() {
1072 global $DB;
1073
1611308b
DM
1074 \core_analytics\manager::check_can_manage_models();
1075
369389c9
DM
1076 $this->model->trained = 1;
1077 $DB->update_record('analytics_models', $this->model);
1078 }
1079
1080 /**
1cc2b4ba 1081 * Get the contexts with predictions.
369389c9 1082 *
2e151c3c 1083 * @param bool $skiphidden Skip hidden predictions
369389c9
DM
1084 * @return \stdClass[]
1085 */
2e151c3c
DM
1086 public function get_predictions_contexts($skiphidden = true) {
1087 global $DB, $USER;
369389c9 1088
4a210b06
DM
1089 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1090 JOIN {context} ctx ON ctx.id = ap.contextid
2e151c3c
DM
1091 WHERE ap.modelid = :modelid";
1092 $params = array('modelid' => $this->model->id);
1093
1094 if ($skiphidden) {
1095 $sql .= " AND NOT EXISTS (
1096 SELECT 1
1097 FROM {analytics_prediction_actions} apa
1098 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1099 )";
1100 $params['userid'] = $USER->id;
1101 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1102 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1103 }
1104
1105 return $DB->get_records_sql($sql, $params);
369389c9
DM
1106 }
1107
f9e7447f
DM
1108 /**
1109 * Has this model generated predictions?
1110 *
1111 * We don't check analytics_predictions table because targets have the ability to
1112 * ignore some predicted values, if that is the case predictions are not even stored
1113 * in db.
1114 *
1115 * @return bool
1116 */
1117 public function any_prediction_obtained() {
1118 global $DB;
00da1e60 1119 return $DB->record_exists('analytics_predict_samples',
f9e7447f
DM
1120 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1121 }
1122
1123 /**
1124 * Whether this model generates insights or not (defined by the model's target).
1125 *
1126 * @return bool
1127 */
1128 public function uses_insights() {
1129 $target = $this->get_target();
1130 return $target::uses_insights();
1131 }
1132
369389c9
DM
1133 /**
1134 * Whether predictions exist for this context.
1135 *
1136 * @param \context $context
1137 * @return bool
1138 */
1139 public function predictions_exist(\context $context) {
1140 global $DB;
1141
1142 // Filters out previous predictions keeping only the last time range one.
1143 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 1144 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
1145 return $DB->record_exists_select('analytics_predictions', $select, $params);
1146 }
1147
1148 /**
1149 * Gets the predictions for this context.
1150 *
1151 * @param \context $context
2e151c3c 1152 * @param bool $skiphidden Skip hidden predictions
21d4ae93
DM
1153 * @param int $page The page of results to fetch. False for all results.
1154 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1155 * @return array($total, \core_analytics\prediction[])
369389c9 1156 */
025363d1
DM
1157 public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1158 global $DB, $USER;
369389c9 1159
1611308b
DM
1160 \core_analytics\manager::check_can_list_insights($context);
1161
369389c9 1162 // Filters out previous predictions keeping only the last time range one.
4a210b06
DM
1163 $sql = "SELECT ap.*
1164 FROM {analytics_predictions} ap
369389c9
DM
1165 JOIN (
1166 SELECT sampleid, max(rangeindex) AS rangeindex
1167 FROM {analytics_predictions}
025363d1 1168 WHERE modelid = :modelidsubap and contextid = :contextidsubap
369389c9 1169 GROUP BY sampleid
4a210b06
DM
1170 ) apsub
1171 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
025363d1
DM
1172 WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1173
1174 $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1175 'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1176
1177 if ($skiphidden) {
1178 $sql .= " AND NOT EXISTS (
1179 SELECT 1
1180 FROM {analytics_prediction_actions} apa
1181 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1182 )";
1183 $params['userid'] = $USER->id;
1184 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1185 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1186 }
1187
1188 $sql .= " ORDER BY ap.timecreated DESC";
369389c9
DM
1189 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1190 return array();
1191 }
1192
1193 // Get predicted samples' ids.
1194 $sampleids = array_map(function($prediction) {
1195 return $prediction->sampleid;
1196 }, $predictions);
1197
1198 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1199
68bfe1de 1200 $current = 0;
21d4ae93
DM
1201
1202 if ($page !== false) {
1203 $offset = $page * $perpage;
1204 $limit = $offset + $perpage;
1205 }
68bfe1de 1206
369389c9
DM
1207 foreach ($predictions as $predictionid => $predictiondata) {
1208
1209 $sampleid = $predictiondata->sampleid;
1210
1211 // Filter out predictions which samples are not available anymore.
1212 if (empty($samplesdata[$sampleid])) {
1213 unset($predictions[$predictionid]);
1214 continue;
1215 }
1216
68bfe1de 1217 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1218 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1219 // Replace \stdClass object by \core_analytics\prediction objects.
1220 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1221 $predictions[$predictionid] = $prediction;
1222 } else {
1223 unset($predictions[$predictionid]);
1224 }
369389c9 1225
68bfe1de 1226 $current++;
369389c9
DM
1227 }
1228
68bfe1de 1229 return [$current, $predictions];
369389c9
DM
1230 }
1231
1232 /**
1611308b 1233 * Returns the sample data of a prediction.
369389c9
DM
1234 *
1235 * @param \stdClass $predictionobj
1236 * @return array
1237 */
1238 public function prediction_sample_data($predictionobj) {
1239
1240 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1241
1242 if (empty($samplesdata[$predictionobj->sampleid])) {
1243 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1244 }
1245
1246 return $samplesdata[$predictionobj->sampleid];
1247 }
1248
1249 /**
1611308b 1250 * Returns the description of a sample
369389c9
DM
1251 *
1252 * @param \core_analytics\prediction $prediction
1253 * @return array 2 elements: list(string, \renderable)
1254 */
1255 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1256 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1257 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1258 }
1259
1260 /**
1261 * Returns the output directory for prediction processors.
1262 *
1263 * Directory structure as follows:
1264 * - Evaluation runs:
1265 * models/$model->id/$model->version/evaluation/$model->timesplitting
1266 * - Training & prediction runs:
1267 * models/$model->id/$model->version/execution
1268 *
1269 * @param array $subdirs
abafbc84 1270 * @param bool $onlymodelid Preference over $subdirs
369389c9
DM
1271 * @return string
1272 */
abafbc84 1273 protected function get_output_dir($subdirs = array(), $onlymodelid = false) {
369389c9
DM
1274 global $CFG;
1275
1276 $subdirstr = '';
1277 foreach ($subdirs as $subdir) {
1278 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1279 }
1280
1281 $outputdir = get_config('analytics', 'modeloutputdir');
1282 if (empty($outputdir)) {
1283 // Apply default value.
1284 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1285 }
1286
325b3bdd 1287 // Append model id.
abafbc84
DM
1288 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1289 if (!$onlymodelid) {
1290 // Append version + subdirs.
1291 $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1292 }
369389c9
DM
1293
1294 make_writable_directory($outputdir);
1295
1296 return $outputdir;
1297 }
1298
1299 /**
1cc2b4ba
DM
1300 * Returns a unique id for this model.
1301 *
1302 * This id should be unique for this site.
369389c9
DM
1303 *
1304 * @return string
1305 */
1306 public function get_unique_id() {
1307 global $CFG;
1308
1309 if (!is_null($this->uniqueid)) {
1310 return $this->uniqueid;
1311 }
1312
1313 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1314 // that the model target and indicators were updated.
b8fe16cd 1315 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
369389c9
DM
1316 $this->uniqueid = sha1(implode('$$', $ids));
1317
1318 return $this->uniqueid;
1319 }
1320
1321 /**
1322 * Exports the model data.
1323 *
1324 * @return \stdClass
1325 */
1326 public function export() {
1611308b
DM
1327
1328 \core_analytics\manager::check_can_manage_models();
1329
369389c9
DM
1330 $data = clone $this->model;
1331 $data->target = $this->get_target()->get_name();
1332
1333 if ($timesplitting = $this->get_time_splitting()) {
1334 $data->timesplitting = $timesplitting->get_name();
1335 }
1336
1337 $data->indicators = array();
1338 foreach ($this->get_indicators() as $indicator) {
1339 $data->indicators[] = $indicator->get_name();
1340 }
1341 return $data;
1342 }
1343
584ffa4f
DM
1344 /**
1345 * Returns the model logs data.
1346 *
1347 * @param int $limitfrom
1348 * @param int $limitnum
1349 * @return \stdClass[]
1350 */
1351 public function get_logs($limitfrom = 0, $limitnum = 0) {
1352 global $DB;
1611308b
DM
1353
1354 \core_analytics\manager::check_can_manage_models();
1355
584ffa4f
DM
1356 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1357 $limitfrom, $limitnum);
1358 }
1359
d126f838
DM
1360 /**
1361 * Merges all training data files into one and returns it.
1362 *
1363 * @return \stored_file|false
1364 */
1365 public function get_training_data() {
1366
1367 \core_analytics\manager::check_can_manage_models();
1368
1369 $timesplittingid = $this->get_time_splitting()->get_id();
1370 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1371 }
1372
369389c9 1373 /**
1cc2b4ba 1374 * Flag the provided file as used for training or prediction.
369389c9
DM
1375 *
1376 * @param \stored_file $file
1377 * @param string $action
1378 * @return void
1379 */
1380 protected function flag_file_as_used(\stored_file $file, $action) {
1381 global $DB;
1382
1383 $usedfile = new \stdClass();
1384 $usedfile->modelid = $this->model->id;
1385 $usedfile->fileid = $file->get_id();
1386 $usedfile->action = $action;
1387 $usedfile->time = time();
1388 $DB->insert_record('analytics_used_files', $usedfile);
1389 }
1390
1391 /**
1cc2b4ba 1392 * Log the evaluation results in the database.
369389c9
DM
1393 *
1394 * @param string $timesplittingid
1395 * @param float $score
1396 * @param string $dir
1397 * @param array $info
1398 * @return int The inserted log id
1399 */
1400 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1401 global $DB, $USER;
1402
1403 $log = new \stdClass();
1404 $log->modelid = $this->get_id();
1405 $log->version = $this->model->version;
1406 $log->target = $this->model->target;
1407 $log->indicators = $this->model->indicators;
1408 $log->timesplitting = $timesplittingid;
1409 $log->dir = $dir;
1410 if ($info) {
1411 // Ensure it is not an associative array.
1412 $log->info = json_encode(array_values($info));
1413 }
1414 $log->score = $score;
1415 $log->timecreated = time();
1416 $log->usermodified = $USER->id;
1417
1418 return $DB->insert_record('analytics_models_log', $log);
1419 }
1420
1421 /**
1422 * Utility method to return indicator class names from a list of indicator objects
1423 *
1424 * @param \core_analytics\local\indicator\base[] $indicators
1425 * @return string[]
1426 */
1427 private static function indicator_classes($indicators) {
1428
1429 // What we want to check and store are the indicator classes not the keys.
1430 $indicatorclasses = array();
1431 foreach ($indicators as $indicator) {
1432 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1433 if (!is_object($indicator) && !is_scalar($indicator)) {
1434 $indicator = strval($indicator);
1435 } else if (is_object($indicator)) {
3a396286 1436 $indicator = '\\' . get_class($indicator);
369389c9
DM
1437 }
1438 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1439 }
b0c24929 1440 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1441 }
1442
1443 return $indicatorclasses;
1444 }
1445
1446 /**
1447 * Clears the model training and prediction data.
1448 *
1449 * Executed after updating model critical elements like the time splitting method
1450 * or the indicators.
1451 *
1452 * @return void
1453 */
325b3bdd 1454 public function clear() {
0af2421a 1455 global $DB, $USER;
369389c9 1456
325b3bdd
DM
1457 \core_analytics\manager::check_can_manage_models();
1458
abafbc84
DM
1459 // Delete current model version stored stuff.
1460 $predictor = \core_analytics\manager::get_predictions_processor();
1461 $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1462
99b84a26
DM
1463 $predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid',
1464 array('modelid' => $this->get_id()));
1465 if ($predictionids) {
1466 list($sql, $params) = $DB->get_in_or_equal($predictionids);
1467 $DB->delete_records_select('analytics_prediction_actions', "predictionid $sql", $params);
1468 }
1469
369389c9 1470 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
00da1e60 1471 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
369389c9
DM
1472 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1473 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
dd13fc22 1474 $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
369389c9 1475
99b84a26
DM
1476 // Purge all generated files.
1477 \core_analytics\dataset_manager::clear_model_files($this->model->id);
1478
1611308b
DM
1479 // We don't expect people to clear models regularly and the cost of filling the cache is
1480 // 1 db read per context.
3e0f33aa 1481 $this->purge_insights_cache();
0af2421a
DM
1482
1483 $this->model->trained = 0;
1484 $this->model->timemodified = time();
1485 $this->model->usermodified = $USER->id;
1486 $DB->update_record('analytics_models', $this->model);
3e0f33aa
DM
1487 }
1488
1489 /**
1490 * Purges the insights cache.
1491 */
1492 private function purge_insights_cache() {
1611308b 1493 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1494 $cache->purge();
369389c9
DM
1495 }
1496
1611308b
DM
1497 /**
1498 * Increases system memory and time limits.
1499 *
1500 * @return void
1501 */
1502 private function heavy_duty_mode() {
369389c9
DM
1503 if (ini_get('memory_limit') != -1) {
1504 raise_memory_limit(MEMORY_HUGE);
1505 }
1611308b 1506 \core_php_time_limit::raise();
369389c9 1507 }
369389c9 1508}