MDL-61667 analytics: Fix checking that a given model does not exist
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
325b3bdd 56 const LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
325b3bdd 61 const NOT_ENOUGH_DATA = 8;
369389c9 62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc 82
5c5cb3ee
DM
83 /**
84 * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
85 */
86 const PREDICTION_MIN_SCORE = 0.6;
87
413f19bc
DM
88 /**
89 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
90 */
369389c9 91 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
92
93 /**
94 * Number of evaluation repetitions.
95 */
369389c9
DM
96 const EVALUATION_ITERATIONS = 10;
97
98 /**
99 * @var \stdClass
100 */
101 protected $model = null;
102
103 /**
104 * @var \core_analytics\local\analyser\base
105 */
106 protected $analyser = null;
107
108 /**
109 * @var \core_analytics\local\target\base
110 */
111 protected $target = null;
112
ed12ba6b
DM
113 /**
114 * @var \core_analytics\predictor
115 */
116 protected $predictionsprocessor = null;
117
369389c9
DM
118 /**
119 * @var \core_analytics\local\indicator\base[]
120 */
121 protected $indicators = null;
122
123 /**
124 * Unique Model id created from site info and last model modification.
125 *
126 * @var string
127 */
128 protected $uniqueid = null;
129
130 /**
1cc2b4ba 131 * Constructor.
369389c9 132 *
1cc2b4ba 133 * @param int|\stdClass $model
369389c9
DM
134 * @return void
135 */
136 public function __construct($model) {
137 global $DB;
138
139 if (is_scalar($model)) {
1611308b 140 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
141 if (!$model) {
142 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
143 }
369389c9
DM
144 }
145 $this->model = $model;
146 }
147
3a396286
DM
148 /**
149 * Quick safety check to discard site models which required components are not available anymore.
150 *
151 * @return bool
152 */
153 public function is_available() {
154 $target = $this->get_target();
155 if (!$target) {
156 return false;
157 }
3a396286
DM
158
159 $classname = $target->get_analyser_class();
160 if (!class_exists($classname)) {
161 return false;
162 }
163
164 return true;
165 }
166
369389c9 167 /**
1cc2b4ba 168 * Returns the model id.
369389c9
DM
169 *
170 * @return int
171 */
172 public function get_id() {
173 return $this->model->id;
174 }
175
176 /**
1cc2b4ba 177 * Returns a plain \stdClass with the model data.
369389c9
DM
178 *
179 * @return \stdClass
180 */
181 public function get_model_obj() {
182 return $this->model;
183 }
184
185 /**
1cc2b4ba 186 * Returns the model target.
369389c9
DM
187 *
188 * @return \core_analytics\local\target\base
189 */
190 public function get_target() {
191 if ($this->target !== null) {
192 return $this->target;
193 }
194 $instance = \core_analytics\manager::get_target($this->model->target);
195 $this->target = $instance;
196
197 return $this->target;
198 }
199
200 /**
1cc2b4ba 201 * Returns the model indicators.
369389c9
DM
202 *
203 * @return \core_analytics\local\indicator\base[]
204 */
205 public function get_indicators() {
206 if ($this->indicators !== null) {
207 return $this->indicators;
208 }
209
210 $fullclassnames = json_decode($this->model->indicators);
211
212 if (!is_array($fullclassnames)) {
213 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
214 }
215
216 $this->indicators = array();
217 foreach ($fullclassnames as $fullclassname) {
218 $instance = \core_analytics\manager::get_indicator($fullclassname);
219 if ($instance) {
220 $this->indicators[$fullclassname] = $instance;
221 } else {
222 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
223 }
224 }
225
226 return $this->indicators;
227 }
228
229 /**
230 * Returns the list of indicators that could potentially be used by the model target.
231 *
232 * It includes the indicators that are part of the model.
233 *
a40952d3 234 * @return \core_analytics\local\indicator\base[]
369389c9
DM
235 */
236 public function get_potential_indicators() {
237
238 $indicators = \core_analytics\manager::get_all_indicators();
239
240 if (empty($this->analyser)) {
241 $this->init_analyser(array('evaluation' => true));
242 }
243
244 foreach ($indicators as $classname => $indicator) {
245 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
246 unset($indicators[$classname]);
247 }
248 }
249 return $indicators;
250 }
251
252 /**
1cc2b4ba 253 * Returns the model analyser (defined by the model target).
369389c9 254 *
a8ccc5f2 255 * @param array $options Default initialisation with no options.
369389c9
DM
256 * @return \core_analytics\local\analyser\base
257 */
a8ccc5f2 258 public function get_analyser($options = array()) {
369389c9
DM
259 if ($this->analyser !== null) {
260 return $this->analyser;
261 }
262
a8ccc5f2 263 $this->init_analyser($options);
369389c9
DM
264
265 return $this->analyser;
266 }
267
268 /**
1cc2b4ba 269 * Initialises the model analyser.
369389c9 270 *
1cc2b4ba 271 * @throws \coding_exception
369389c9
DM
272 * @param array $options
273 * @return void
274 */
275 protected function init_analyser($options = array()) {
276
277 $target = $this->get_target();
278 $indicators = $this->get_indicators();
279
280 if (empty($target)) {
281 throw new \moodle_exception('errornotarget', 'analytics');
282 }
283
a8ccc5f2
DM
284 $timesplittings = array();
285 if (empty($options['notimesplitting'])) {
286 if (!empty($options['evaluation'])) {
287 // The evaluation process will run using all available time splitting methods unless one is specified.
288 if (!empty($options['timesplitting'])) {
289 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
290 $timesplittings = array($timesplitting->get_id() => $timesplitting);
291 } else {
3576b66b 292 $timesplittings = \core_analytics\manager::get_time_splitting_methods_for_evaluation();
a8ccc5f2 293 }
369389c9 294 } else {
369389c9 295
a8ccc5f2
DM
296 if (empty($this->model->timesplitting)) {
297 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
298 }
369389c9 299
a8ccc5f2
DM
300 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
301 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
302 }
369389c9 303
a8ccc5f2
DM
304 if (empty($timesplittings)) {
305 throw new \moodle_exception('errornotimesplittings', 'analytics');
306 }
369389c9
DM
307 }
308
0690a271
DM
309 if (!empty($options['evaluation'])) {
310 foreach ($timesplittings as $timesplitting) {
311 $timesplitting->set_evaluating(true);
312 }
313 }
314
369389c9
DM
315 $classname = $target->get_analyser_class();
316 if (!class_exists($classname)) {
08015e18 317 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
318 }
319
320 // Returns a \core_analytics\local\analyser\base class.
321 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
322 }
323
324 /**
1cc2b4ba 325 * Returns the model time splitting method.
369389c9 326 *
1cc2b4ba 327 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
328 */
329 public function get_time_splitting() {
330 if (empty($this->model->timesplitting)) {
331 return false;
332 }
333 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
334 }
335
336 /**
a40952d3 337 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
338 *
339 * @param \core_analytics\local\target\base $target
340 * @param \core_analytics\local\indicator\base[] $indicators
c70a7194
DM
341 * @param string|false $timesplittingid The time splitting method id (its fully qualified class name)
342 * @param string|null $processor The machine learning backend this model will use.
369389c9
DM
343 * @return \core_analytics\model
344 */
ed12ba6b 345 public static function create(\core_analytics\local\target\base $target, array $indicators,
c70a7194 346 $timesplittingid = false, $processor = null) {
369389c9
DM
347 global $USER, $DB;
348
1611308b
DM
349 \core_analytics\manager::check_can_manage_models();
350
369389c9
DM
351 $indicatorclasses = self::indicator_classes($indicators);
352
353 $now = time();
354
355 $modelobj = new \stdClass();
b0c24929 356 $modelobj->target = $target->get_id();
369389c9
DM
357 $modelobj->indicators = json_encode($indicatorclasses);
358 $modelobj->version = $now;
359 $modelobj->timecreated = $now;
360 $modelobj->timemodified = $now;
361 $modelobj->usermodified = $USER->id;
362
4de4d165
DM
363 if ($timesplittingid) {
364 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
365 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
366 }
367 if (substr($timesplittingid, 0, 1) !== '\\') {
368 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
369 }
370 $modelobj->timesplitting = $timesplittingid;
371 }
372
ed12ba6b 373 if ($processor &&
e4453adc
DM
374 !manager::is_valid($processor, '\core_analytics\classifier') &&
375 !manager::is_valid($processor, '\core_analytics\regressor')) {
ed12ba6b
DM
376 throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid');
377 } else {
378 $modelobj->predictionsprocessor = $processor;
379 }
380
369389c9
DM
381 $id = $DB->insert_record('analytics_models', $modelobj);
382
383 // Get db defaults.
384 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
385
a40952d3
DM
386 $model = new static($modelobj);
387
a40952d3
DM
388 if ($model->is_static()) {
389 $model->mark_as_trained();
390 }
391
392 return $model;
369389c9
DM
393 }
394
e709e544
DM
395 /**
396 * Does this model exist?
397 *
398 * If no indicators are provided it considers any model with the provided
399 * target a match.
400 *
401 * @param \core_analytics\local\target\base $target
402 * @param \core_analytics\local\indicator\base[]|false $indicators
403 * @return bool
404 */
405 public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
406 global $DB;
407
408 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
409
aa8af6fc
DM
410 if (!$existingmodels) {
411 return false;
412 }
413
e709e544
DM
414 if (!$indicators && $existingmodels) {
415 return true;
416 }
417
418 $indicatorids = array_keys($indicators);
419 sort($indicatorids);
420
421 foreach ($existingmodels as $modelobj) {
422 $model = new \core_analytics\model($modelobj);
423 $modelindicatorids = array_keys($model->get_indicators());
424 sort($modelindicatorids);
425
426 if ($indicatorids === $modelindicatorids) {
427 return true;
428 }
429 }
430 return false;
431 }
432
a40952d3 433 /**
1cc2b4ba 434 * Updates the model.
a40952d3
DM
435 *
436 * @param int|bool $enabled
5c140ac4
DM
437 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
438 * @param string|false $timesplittingid False to respect current time splitting method
ed12ba6b 439 * @param string|false $predictionsprocessor False to respect current predictors processor value
a40952d3
DM
440 * @return void
441 */
ed12ba6b 442 public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false) {
369389c9
DM
443 global $USER, $DB;
444
1611308b
DM
445 \core_analytics\manager::check_can_manage_models();
446
369389c9
DM
447 $now = time();
448
5c140ac4
DM
449 if ($indicators !== false) {
450 $indicatorclasses = self::indicator_classes($indicators);
451 $indicatorsstr = json_encode($indicatorclasses);
452 } else {
453 // Respect current value.
454 $indicatorsstr = $this->model->indicators;
455 }
456
457 if ($timesplittingid === false) {
458 // Respect current value.
459 $timesplittingid = $this->model->timesplitting;
460 }
369389c9 461
ed12ba6b
DM
462 if ($predictionsprocessor === false) {
463 // Respect current value.
464 $predictionsprocessor = $this->model->predictionsprocessor;
465 }
466
a40952d3 467 if ($this->model->timesplitting !== $timesplittingid ||
ed12ba6b
DM
468 $this->model->indicators !== $indicatorsstr ||
469 $this->model->predictionsprocessor !== $predictionsprocessor) {
369389c9 470
abafbc84 471 // Delete generated predictions before changing the model version.
325b3bdd 472 $this->clear();
369389c9 473
abafbc84
DM
474 // It needs to be reset as the version changes.
475 $this->uniqueid = null;
e4453adc 476 $this->indicators = null;
abafbc84
DM
477
478 // We update the version of the model so different time splittings are not mixed up.
479 $this->model->version = $now;
480
369389c9 481 // Reset trained flag.
abafbc84
DM
482 if (!$this->is_static()) {
483 $this->model->trained = 0;
484 }
3e0f33aa
DM
485
486 } else if ($this->model->enabled != $enabled) {
487 // We purge the cached contexts with insights as some will not be visible anymore.
488 $this->purge_insights_cache();
369389c9 489 }
3e0f33aa 490
a40952d3 491 $this->model->enabled = intval($enabled);
369389c9 492 $this->model->indicators = $indicatorsstr;
a40952d3 493 $this->model->timesplitting = $timesplittingid;
ed12ba6b 494 $this->model->predictionsprocessor = $predictionsprocessor;
369389c9
DM
495 $this->model->timemodified = $now;
496 $this->model->usermodified = $USER->id;
497
498 $DB->update_record('analytics_models', $this->model);
369389c9
DM
499 }
500
d16cf374
DM
501 /**
502 * Removes the model.
503 *
504 * @return void
505 */
d8327b60 506 public function delete() {
d16cf374 507 global $DB;
1611308b
DM
508
509 \core_analytics\manager::check_can_manage_models();
510
325b3bdd 511 $this->clear();
abafbc84 512
325b3bdd 513 // Method self::clear is already clearing the current model version.
d44ce97f
DM
514 $predictor = $this->get_predictions_processor(false);
515 if ($predictor->is_ready() !== true) {
516 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
517 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
518 $this->model->id . ' could not be deleted.');
519 } else {
520 $predictor->delete_output_dir($this->get_output_dir(array(), true));
521 }
abafbc84 522
d8327b60 523 $DB->delete_records('analytics_models', array('id' => $this->model->id));
99b84a26 524 $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
d16cf374
DM
525 }
526
369389c9 527 /**
1cc2b4ba 528 * Evaluates the model.
369389c9 529 *
1cc2b4ba
DM
530 * This method gets the site contents (through the analyser) creates a .csv dataset
531 * with them and evaluates the model prediction accuracy multiple times using the
532 * machine learning backend. It returns an object where the model score is the average
533 * prediction accuracy of all executed evaluations.
369389c9
DM
534 *
535 * @param array $options
536 * @return \stdClass[]
537 */
538 public function evaluate($options = array()) {
539
1611308b
DM
540 \core_analytics\manager::check_can_manage_models();
541
a40952d3
DM
542 if ($this->is_static()) {
543 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
544 $result = new \stdClass();
cbf4c391
DM
545 $result->status = self::NO_DATASET;
546 return array($this->get_time_splitting()->get_id() => $result);
a40952d3
DM
547 }
548
369389c9 549 $options['evaluation'] = true;
bc82b895
DM
550
551 if (empty($options['mode'])) {
552 $options['mode'] = 'configuration';
553 }
554
e97dfff7
DM
555 switch ($options['mode']) {
556 case 'trainedmodel':
bc82b895 557
e97dfff7
DM
558 // We are only interested on the time splitting method used by the trained model.
559 $options['timesplitting'] = $this->model->timesplitting;
bc82b895 560
e97dfff7
DM
561 // Provide the trained model directory to the ML backend if that is what we want to evaluate.
562 $trainedmodeldir = $this->get_output_dir(['execution']);
563 break;
564 case 'configuration':
565
566 $trainedmodeldir = false;
567 break;
568
569 default:
570 throw new \moodle_exception('errorunknownaction', 'analytics');
bc82b895
DM
571 }
572
369389c9
DM
573 $this->init_analyser($options);
574
575 if (empty($this->get_indicators())) {
576 throw new \moodle_exception('errornoindicators', 'analytics');
577 }
578
1611308b
DM
579 $this->heavy_duty_mode();
580
369389c9 581 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 582 $predictor = $this->get_predictions_processor();
369389c9
DM
583
584 $datasets = $this->get_analyser()->get_labelled_data();
585
586 // No datasets generated.
587 if (empty($datasets)) {
588 $result = new \stdClass();
589 $result->status = self::NO_DATASET;
590 $result->info = $this->get_analyser()->get_logs();
591 return array($result);
592 }
593
594 if (!PHPUNIT_TEST && CLI_SCRIPT) {
595 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
596 }
597
598 $results = array();
599 foreach ($datasets as $timesplittingid => $dataset) {
600
601 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
602
603 $result = new \stdClass();
604
605 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
606 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
607
608 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
5c5cb3ee
DM
609 if ($this->get_target()->is_linear()) {
610 $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
bc82b895 611 self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
5c5cb3ee
DM
612 } else {
613 $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
bc82b895 614 self::EVALUATION_ITERATIONS, $dataset, $outputdir, $trainedmodeldir);
5c5cb3ee 615 }
369389c9
DM
616
617 $result->status = $predictorresult->status;
618 $result->info = $predictorresult->info;
619
620 if (isset($predictorresult->score)) {
621 $result->score = $predictorresult->score;
622 } else {
623 // Prediction processors may return an error, default to 0 score in that case.
624 $result->score = 0;
625 }
626
627 $dir = false;
628 if (!empty($predictorresult->dir)) {
629 $dir = $predictorresult->dir;
630 }
631
e97dfff7 632 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info, $options['mode']);
369389c9
DM
633
634 $results[$timesplitting->get_id()] = $result;
635 }
636
637 return $results;
638 }
639
640 /**
1cc2b4ba
DM
641 * Trains the model using the site contents.
642 *
643 * This method prepares a dataset from the site contents (through the analyser)
644 * and passes it to the machine learning backends. Static models are skipped as
645 * they do not require training.
369389c9
DM
646 *
647 * @return \stdClass
648 */
649 public function train() {
369389c9 650
1611308b
DM
651 \core_analytics\manager::check_can_manage_models();
652
a40952d3
DM
653 if ($this->is_static()) {
654 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
655 $result = new \stdClass();
656 $result->status = self::OK;
657 return $result;
658 }
659
a40952d3 660 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
661 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
662 }
663
664 if (empty($this->get_indicators())) {
665 throw new \moodle_exception('errornoindicators', 'analytics');
666 }
667
1611308b
DM
668 $this->heavy_duty_mode();
669
369389c9
DM
670 // Before get_labelled_data call so we get an early exception if it is not writable.
671 $outputdir = $this->get_output_dir(array('execution'));
672
673 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 674 $predictor = $this->get_predictions_processor();
369389c9
DM
675
676 $datasets = $this->get_analyser()->get_labelled_data();
677
678 // No training if no files have been provided.
679 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
680
681 $result = new \stdClass();
682 $result->status = self::NO_DATASET;
683 $result->info = $this->get_analyser()->get_logs();
684 return $result;
685 }
686 $samplesfile = $datasets[$this->model->timesplitting];
687
688 // Train using the dataset.
5c5cb3ee
DM
689 if ($this->get_target()->is_linear()) {
690 $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
691 } else {
692 $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
693 }
369389c9
DM
694
695 $result = new \stdClass();
696 $result->status = $predictorresult->status;
697 $result->info = $predictorresult->info;
698
325b3bdd
DM
699 if ($result->status !== self::OK) {
700 return $result;
701 }
702
369389c9
DM
703 $this->flag_file_as_used($samplesfile, 'trained');
704
705 // Mark the model as trained if it wasn't.
706 if ($this->model->trained == false) {
707 $this->mark_as_trained();
708 }
709
710 return $result;
711 }
712
713 /**
1cc2b4ba
DM
714 * Get predictions from the site contents.
715 *
716 * It analyses the site contents (through analyser classes) looking for samples
717 * ready to receive predictions. It generates a dataset with all samples ready to
718 * get predictions and it passes it to the machine learning backends or to the
719 * targets based on assumptions to get the predictions.
369389c9
DM
720 *
721 * @return \stdClass
722 */
723 public function predict() {
724 global $DB;
725
1611308b 726 \core_analytics\manager::check_can_manage_models();
369389c9 727
a40952d3 728 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
729 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
730 }
731
732 if (empty($this->get_indicators())) {
733 throw new \moodle_exception('errornoindicators', 'analytics');
734 }
735
1611308b
DM
736 $this->heavy_duty_mode();
737
369389c9
DM
738 // Before get_unlabelled_data call so we get an early exception if it is not writable.
739 $outputdir = $this->get_output_dir(array('execution'));
740
741 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3 742 if (!$this->is_static()) {
ed12ba6b 743 $predictor = $this->get_predictions_processor();
a40952d3 744 }
369389c9
DM
745
746 $samplesdata = $this->get_analyser()->get_unlabelled_data();
747
748 // Get the prediction samples file.
749 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
750
751 $result = new \stdClass();
752 $result->status = self::NO_DATASET;
753 $result->info = $this->get_analyser()->get_logs();
754 return $result;
755 }
756 $samplesfile = $samplesdata[$this->model->timesplitting];
757
758 // We need to throw an exception if we are trying to predict stuff that was already predicted.
2dca1339 759 $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
369389c9
DM
760 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
761 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
762 }
763
a40952d3 764 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 765
a40952d3 766 // Prepare the results object.
369389c9 767 $result = new \stdClass();
369389c9 768
a40952d3
DM
769 if ($this->is_static()) {
770 // Prediction based on assumptions.
413f19bc 771 $result->status = self::OK;
a40952d3
DM
772 $result->info = [];
773 $result->predictions = $this->get_static_predictions($indicatorcalculations);
774
775 } else {
5c5cb3ee
DM
776 // Estimation and classification processes run on the machine learning backend side.
777 if ($this->get_target()->is_linear()) {
778 $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
779 } else {
780 $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
781 }
a40952d3
DM
782 $result->status = $predictorresult->status;
783 $result->info = $predictorresult->info;
1611308b
DM
784 $result->predictions = $this->format_predictor_predictions($predictorresult);
785 }
786
325b3bdd
DM
787 if ($result->status !== self::OK) {
788 return $result;
789 }
790
1611308b
DM
791 if ($result->predictions) {
792 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
793 }
794
795 if (!empty($samplecontexts) && $this->uses_insights()) {
796 $this->trigger_insights($samplecontexts);
797 }
798
799 $this->flag_file_as_used($samplesfile, 'predicted');
800
801 return $result;
802 }
803
ed12ba6b
DM
804 /**
805 * Returns the model predictions processor.
806 *
d44ce97f 807 * @param bool $checkisready
ed12ba6b
DM
808 * @return \core_analytics\predictor
809 */
d44ce97f
DM
810 public function get_predictions_processor($checkisready = true) {
811 return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready);
ed12ba6b
DM
812 }
813
1611308b
DM
814 /**
815 * Formats the predictor results.
816 *
817 * @param array $predictorresult
818 * @return array
819 */
820 private function format_predictor_predictions($predictorresult) {
821
822 $predictions = array();
0af2421a 823 if (!empty($predictorresult->predictions)) {
1611308b
DM
824 foreach ($predictorresult->predictions as $sampleinfo) {
825
413f19bc 826 // We parse each prediction.
1611308b
DM
827 switch (count($sampleinfo)) {
828 case 1:
829 // For whatever reason the predictions processor could not process this sample, we
830 // skip it and do nothing with it.
831 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
832 $sampleinfo[0], DEBUG_DEVELOPER);
bd5fdcfc 833 continue 2;
1611308b
DM
834 case 2:
835 // Prediction processors that do not return a prediction score will have the maximum prediction
836 // score.
837 list($uniquesampleid, $prediction) = $sampleinfo;
838 $predictionscore = 1;
839 break;
840 case 3:
841 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
842 break;
843 default:
844 break;
a40952d3 845 }
1611308b
DM
846 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
847 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
848 }
849 }
1611308b
DM
850 return $predictions;
851 }
852
853 /**
854 * Execute the prediction callbacks defined by the target.
855 *
856 * @param \stdClass[] $predictions
413f19bc 857 * @param array $indicatorcalculations
1611308b
DM
858 * @return array
859 */
860 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
861
862 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
863 $samplecontexts = array();
325b3bdd 864 $records = array();
369389c9 865
1611308b 866 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 867
325b3bdd
DM
868 // The unique sample id contains both the sampleid and the rangeindex.
869 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 870
325b3bdd 871 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 872
325b3bdd 873 // Prepare the record to store the predicted values.
cab7abec 874 list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
413f19bc 875 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 876
cab7abec
DM
877 // We will later bulk-insert them all.
878 $records[$uniquesampleid] = $record;
879
1611308b
DM
880 // Also store all samples context to later generate insights or whatever action the target wants to perform.
881 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 882
1611308b
DM
883 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
884 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
885 }
886 }
887
0927604f
DM
888 if (!empty($records)) {
889 $this->save_predictions($records);
890 }
cab7abec 891
1611308b
DM
892 return $samplecontexts;
893 }
369389c9 894
1611308b
DM
895 /**
896 * Generates insights and updates the cache.
897 *
898 * @param \context[] $samplecontexts
899 * @return void
900 */
901 protected function trigger_insights($samplecontexts) {
902
903 // Notify the target that all predictions have been processed.
904 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
905
906 // Update cache.
907 $cache = \cache::make('core', 'contextwithinsights');
908 foreach ($samplecontexts as $context) {
909 $modelids = $cache->get($context->id);
910 if (!$modelids) {
911 // The cache is empty, but we don't know if it is empty because there are no insights
912 // in this context or because cache/s have been purged, we need to be conservative and
913 // "pay" 1 db read to fill up the cache.
914 $models = \core_analytics\manager::get_models_with_insights($context);
915 $cache->set($context->id, array_keys($models));
916 } else if (!in_array($this->get_id(), $modelids)) {
917 array_push($modelids, $this->get_id());
918 $cache->set($context->id, $modelids);
369389c9
DM
919 }
920 }
369389c9
DM
921 }
922
a40952d3 923 /**
1611308b 924 * Get predictions from a static model.
a40952d3
DM
925 *
926 * @param array $indicatorcalculations
927 * @return \stdClass[]
928 */
929 protected function get_static_predictions(&$indicatorcalculations) {
930
931 // Group samples by analysable for \core_analytics\local\target::calculate.
932 $analysables = array();
933 // List all sampleids together.
934 $sampleids = array();
935
936 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
937 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
938
939 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
940 $analysableclass = get_class($analysable);
941 if (empty($analysables[$analysableclass])) {
942 $analysables[$analysableclass] = array();
943 }
944 if (empty($analysables[$analysableclass][$rangeindex])) {
945 $analysables[$analysableclass][$rangeindex] = (object)[
946 'analysable' => $analysable,
947 'indicatorsdata' => array(),
948 'sampleids' => array()
949 ];
950 }
951 // Using the sampleid as a key so we can easily merge indicators data later.
952 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
953 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
954 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
955
956 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
957 $sampleids[$sampleid] = $sampleid;
958 }
959
960 // Get all samples data.
961 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
962
963 // Calculate the targets.
1cc2b4ba 964 $predictions = array();
a40952d3
DM
965 foreach ($analysables as $analysableclass => $rangedata) {
966 foreach ($rangedata as $rangeindex => $data) {
967
968 // Attach samples data and calculated indicators data.
969 $this->get_target()->clear_sample_data();
970 $this->get_target()->add_sample_data($samplesdata);
971 $this->get_target()->add_sample_data($data->indicatorsdata);
972
1611308b 973 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 974 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 975 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
976 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
977
978 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
979 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
980 // by self::save_prediction.
981 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
982 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
983 if (!isset($calculations[$sampleid])) {
a40952d3
DM
984 return false;
985 }
986 return true;
987 }, ARRAY_FILTER_USE_BOTH);
988
989 foreach ($calculations as $sampleid => $value) {
990
991 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
992
993 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
994 if (is_null($calculations[$sampleid])) {
a40952d3
DM
995 unset($indicatorcalculations[$uniquesampleid]);
996 continue;
997 }
998
999 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
1000 // true according to what the developer defined.
1001 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
1002 }
1003 }
1004 }
1005 return $predictions;
1006 }
1007
369389c9 1008 /**
1cc2b4ba 1009 * Stores the prediction in the database.
369389c9
DM
1010 *
1011 * @param int $sampleid
1012 * @param int $rangeindex
1013 * @param int $prediction
1014 * @param float $predictionscore
1015 * @param string $calculations
1016 * @return \context
1017 */
cab7abec 1018 protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
369389c9
DM
1019 $context = $this->get_analyser()->sample_access_context($sampleid);
1020
1021 $record = new \stdClass();
1022 $record->modelid = $this->model->id;
1023 $record->contextid = $context->id;
1024 $record->sampleid = $sampleid;
1025 $record->rangeindex = $rangeindex;
1026 $record->prediction = $prediction;
1027 $record->predictionscore = $predictionscore;
1028 $record->calculations = $calculations;
1029 $record->timecreated = time();
369389c9 1030
f87174dc
MN
1031 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1032 $timesplitting = $this->get_time_splitting();
1033 $timesplitting->set_analysable($analysable);
1034 $range = $timesplitting->get_range_by_index($rangeindex);
1035 if ($range) {
1036 $record->timestart = $range['start'];
1037 $record->timeend = $range['end'];
1038 }
1039
cab7abec
DM
1040 return array($record, $context);
1041 }
1042
1043 /**
1044 * Save the prediction objects.
1045 *
1046 * @param \stdClass[] $records
1047 */
1048 protected function save_predictions($records) {
1049 global $DB;
1050 $DB->insert_records('analytics_predictions', $records);
369389c9
DM
1051 }
1052
1053 /**
1cc2b4ba 1054 * Enabled the model using the provided time splitting method.
369389c9 1055 *
5c140ac4 1056 * @param string|false $timesplittingid False to respect the current time splitting method.
369389c9
DM
1057 * @return void
1058 */
1059 public function enable($timesplittingid = false) {
0af2421a 1060 global $DB, $USER;
369389c9 1061
1611308b
DM
1062 \core_analytics\manager::check_can_manage_models();
1063
369389c9
DM
1064 $now = time();
1065
1066 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
1067
1068 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
1069 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1070 }
1071
1072 if (substr($timesplittingid, 0, 1) !== '\\') {
1073 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1074 }
1075
abafbc84 1076 // Delete generated predictions before changing the model version.
325b3bdd 1077 $this->clear();
abafbc84
DM
1078
1079 // It needs to be reset as the version changes.
1080 $this->uniqueid = null;
1081
369389c9
DM
1082 $this->model->timesplitting = $timesplittingid;
1083 $this->model->version = $now;
abafbc84
DM
1084
1085 // Reset trained flag.
1086 if (!$this->is_static()) {
1087 $this->model->trained = 0;
1088 }
fabe98ac
AA
1089 } else if (empty($this->model->timesplitting)) {
1090 // A valid timesplitting method needs to be supplied before a model can be enabled.
1091 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1092
369389c9 1093 }
3e0f33aa
DM
1094
1095 // Purge pages with insights as this may change things.
abafbc84 1096 if ($this->model->enabled != 1) {
3e0f33aa
DM
1097 $this->purge_insights_cache();
1098 }
1099
369389c9
DM
1100 $this->model->enabled = 1;
1101 $this->model->timemodified = $now;
0af2421a 1102 $this->model->usermodified = $USER->id;
369389c9
DM
1103
1104 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1105 $DB->update_record('analytics_models', $this->model);
369389c9
DM
1106 }
1107
a40952d3 1108 /**
1cc2b4ba
DM
1109 * Is this a static model (as defined by the target)?.
1110 *
1111 * Static models are based on assumptions instead of in machine learning
1112 * backends results.
a40952d3
DM
1113 *
1114 * @return bool
1115 */
1116 public function is_static() {
1117 return (bool)$this->get_target()->based_on_assumptions();
1118 }
1119
369389c9 1120 /**
1cc2b4ba 1121 * Is this model enabled?
369389c9
DM
1122 *
1123 * @return bool
1124 */
1125 public function is_enabled() {
1126 return (bool)$this->model->enabled;
1127 }
1128
1129 /**
1cc2b4ba 1130 * Is this model already trained?
369389c9
DM
1131 *
1132 * @return bool
1133 */
1134 public function is_trained() {
a40952d3
DM
1135 // Models which targets are based on assumptions do not need training.
1136 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
1137 }
1138
1139 /**
1cc2b4ba 1140 * Marks the model as trained
369389c9
DM
1141 *
1142 * @return void
1143 */
1144 public function mark_as_trained() {
1145 global $DB;
1146
1611308b
DM
1147 \core_analytics\manager::check_can_manage_models();
1148
369389c9
DM
1149 $this->model->trained = 1;
1150 $DB->update_record('analytics_models', $this->model);
1151 }
1152
1153 /**
1cc2b4ba 1154 * Get the contexts with predictions.
369389c9 1155 *
2e151c3c 1156 * @param bool $skiphidden Skip hidden predictions
369389c9
DM
1157 * @return \stdClass[]
1158 */
2e151c3c
DM
1159 public function get_predictions_contexts($skiphidden = true) {
1160 global $DB, $USER;
369389c9 1161
4a210b06
DM
1162 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1163 JOIN {context} ctx ON ctx.id = ap.contextid
2e151c3c
DM
1164 WHERE ap.modelid = :modelid";
1165 $params = array('modelid' => $this->model->id);
1166
1167 if ($skiphidden) {
1168 $sql .= " AND NOT EXISTS (
1169 SELECT 1
1170 FROM {analytics_prediction_actions} apa
1171 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1172 )";
1173 $params['userid'] = $USER->id;
1174 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1175 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1176 }
1177
1178 return $DB->get_records_sql($sql, $params);
369389c9
DM
1179 }
1180
f9e7447f
DM
1181 /**
1182 * Has this model generated predictions?
1183 *
1184 * We don't check analytics_predictions table because targets have the ability to
1185 * ignore some predicted values, if that is the case predictions are not even stored
1186 * in db.
1187 *
1188 * @return bool
1189 */
1190 public function any_prediction_obtained() {
1191 global $DB;
00da1e60 1192 return $DB->record_exists('analytics_predict_samples',
f9e7447f
DM
1193 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1194 }
1195
1196 /**
1197 * Whether this model generates insights or not (defined by the model's target).
1198 *
1199 * @return bool
1200 */
1201 public function uses_insights() {
1202 $target = $this->get_target();
1203 return $target::uses_insights();
1204 }
1205
369389c9
DM
1206 /**
1207 * Whether predictions exist for this context.
1208 *
1209 * @param \context $context
1210 * @return bool
1211 */
1212 public function predictions_exist(\context $context) {
1213 global $DB;
1214
1215 // Filters out previous predictions keeping only the last time range one.
1216 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 1217 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
1218 return $DB->record_exists_select('analytics_predictions', $select, $params);
1219 }
1220
1221 /**
1222 * Gets the predictions for this context.
1223 *
1224 * @param \context $context
2e151c3c 1225 * @param bool $skiphidden Skip hidden predictions
21d4ae93
DM
1226 * @param int $page The page of results to fetch. False for all results.
1227 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1228 * @return array($total, \core_analytics\prediction[])
369389c9 1229 */
025363d1
DM
1230 public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1231 global $DB, $USER;
369389c9 1232
1611308b
DM
1233 \core_analytics\manager::check_can_list_insights($context);
1234
369389c9 1235 // Filters out previous predictions keeping only the last time range one.
4a210b06
DM
1236 $sql = "SELECT ap.*
1237 FROM {analytics_predictions} ap
369389c9
DM
1238 JOIN (
1239 SELECT sampleid, max(rangeindex) AS rangeindex
1240 FROM {analytics_predictions}
025363d1 1241 WHERE modelid = :modelidsubap and contextid = :contextidsubap
369389c9 1242 GROUP BY sampleid
4a210b06
DM
1243 ) apsub
1244 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
025363d1
DM
1245 WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1246
1247 $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1248 'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1249
1250 if ($skiphidden) {
1251 $sql .= " AND NOT EXISTS (
1252 SELECT 1
1253 FROM {analytics_prediction_actions} apa
1254 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1255 )";
1256 $params['userid'] = $USER->id;
1257 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1258 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1259 }
1260
1261 $sql .= " ORDER BY ap.timecreated DESC";
369389c9
DM
1262 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1263 return array();
1264 }
1265
1266 // Get predicted samples' ids.
1267 $sampleids = array_map(function($prediction) {
1268 return $prediction->sampleid;
1269 }, $predictions);
1270
1271 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1272
68bfe1de 1273 $current = 0;
21d4ae93
DM
1274
1275 if ($page !== false) {
1276 $offset = $page * $perpage;
1277 $limit = $offset + $perpage;
1278 }
68bfe1de 1279
369389c9
DM
1280 foreach ($predictions as $predictionid => $predictiondata) {
1281
1282 $sampleid = $predictiondata->sampleid;
1283
1284 // Filter out predictions which samples are not available anymore.
1285 if (empty($samplesdata[$sampleid])) {
1286 unset($predictions[$predictionid]);
1287 continue;
1288 }
1289
68bfe1de 1290 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1291 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1292 // Replace \stdClass object by \core_analytics\prediction objects.
1293 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1294 $predictions[$predictionid] = $prediction;
1295 } else {
1296 unset($predictions[$predictionid]);
1297 }
369389c9 1298
68bfe1de 1299 $current++;
369389c9
DM
1300 }
1301
68bfe1de 1302 return [$current, $predictions];
369389c9
DM
1303 }
1304
1305 /**
1611308b 1306 * Returns the sample data of a prediction.
369389c9
DM
1307 *
1308 * @param \stdClass $predictionobj
1309 * @return array
1310 */
1311 public function prediction_sample_data($predictionobj) {
1312
1313 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1314
1315 if (empty($samplesdata[$predictionobj->sampleid])) {
1316 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1317 }
1318
1319 return $samplesdata[$predictionobj->sampleid];
1320 }
1321
1322 /**
1611308b 1323 * Returns the description of a sample
369389c9
DM
1324 *
1325 * @param \core_analytics\prediction $prediction
1326 * @return array 2 elements: list(string, \renderable)
1327 */
1328 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1329 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1330 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1331 }
1332
1333 /**
1334 * Returns the output directory for prediction processors.
1335 *
1336 * Directory structure as follows:
1337 * - Evaluation runs:
1338 * models/$model->id/$model->version/evaluation/$model->timesplitting
1339 * - Training & prediction runs:
1340 * models/$model->id/$model->version/execution
1341 *
1342 * @param array $subdirs
abafbc84 1343 * @param bool $onlymodelid Preference over $subdirs
369389c9
DM
1344 * @return string
1345 */
c70a7194 1346 public function get_output_dir($subdirs = array(), $onlymodelid = false) {
369389c9
DM
1347 global $CFG;
1348
1349 $subdirstr = '';
1350 foreach ($subdirs as $subdir) {
1351 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1352 }
1353
1354 $outputdir = get_config('analytics', 'modeloutputdir');
1355 if (empty($outputdir)) {
1356 // Apply default value.
1357 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1358 }
1359
325b3bdd 1360 // Append model id.
abafbc84
DM
1361 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1362 if (!$onlymodelid) {
1363 // Append version + subdirs.
1364 $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1365 }
369389c9
DM
1366
1367 make_writable_directory($outputdir);
1368
1369 return $outputdir;
1370 }
1371
1372 /**
1cc2b4ba
DM
1373 * Returns a unique id for this model.
1374 *
1375 * This id should be unique for this site.
369389c9
DM
1376 *
1377 * @return string
1378 */
1379 public function get_unique_id() {
1380 global $CFG;
1381
1382 if (!is_null($this->uniqueid)) {
1383 return $this->uniqueid;
1384 }
1385
1386 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1387 // that the model target and indicators were updated.
b8fe16cd 1388 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
369389c9
DM
1389 $this->uniqueid = sha1(implode('$$', $ids));
1390
1391 return $this->uniqueid;
1392 }
1393
1394 /**
c70a7194 1395 * Exports the model data for displaying it in a template.
369389c9
DM
1396 *
1397 * @return \stdClass
1398 */
1399 public function export() {
1611308b
DM
1400
1401 \core_analytics\manager::check_can_manage_models();
1402
369389c9
DM
1403 $data = clone $this->model;
1404 $data->target = $this->get_target()->get_name();
1405
1406 if ($timesplitting = $this->get_time_splitting()) {
1407 $data->timesplitting = $timesplitting->get_name();
1408 }
1409
1410 $data->indicators = array();
1411 foreach ($this->get_indicators() as $indicator) {
1412 $data->indicators[] = $indicator->get_name();
1413 }
1414 return $data;
1415 }
1416
349c4412 1417 /**
c70a7194 1418 * Exports the model data to a zip file.
349c4412 1419 *
c70a7194
DM
1420 * @param string $zipfilename
1421 * @return string Zip file path
349c4412 1422 */
c70a7194 1423 public function export_model(string $zipfilename) : string {
349c4412 1424
e4453adc 1425 \core_analytics\manager::check_can_manage_models();
349c4412 1426
e4453adc 1427 $modelconfig = new model_config($this);
c70a7194
DM
1428 return $modelconfig->export($zipfilename);
1429 }
1430
1431 /**
1432 * Imports the provided model.
1433 *
1434 * Note that this method assumes that model_config::check_dependencies has already been called.
1435 *
1436 * @throws \moodle_exception
1437 * @param string $zipfilepath Zip file path
1438 * @return \core_analytics\model
1439 */
1440 public static function import_model(string $zipfilepath) : \core_analytics\model {
1441
1442 \core_analytics\manager::check_can_manage_models();
1443
1444 $modelconfig = new \core_analytics\model_config();
1445 return $modelconfig->import($zipfilepath);
e4453adc 1446 }
349c4412 1447
e4453adc
DM
1448 /**
1449 * Can this model be exported?
1450 *
1451 * @return bool
1452 */
1453 public function can_export_configuration() : bool {
1454
1455 if (empty($this->model->timesplitting)) {
1456 return false;
1457 }
1458 if (!$this->get_indicators()) {
1459 return false;
349c4412
AA
1460 }
1461
e4453adc
DM
1462 if ($this->is_static()) {
1463 return false;
349c4412 1464 }
e4453adc
DM
1465
1466 return true;
349c4412
AA
1467 }
1468
584ffa4f
DM
1469 /**
1470 * Returns the model logs data.
1471 *
1472 * @param int $limitfrom
1473 * @param int $limitnum
1474 * @return \stdClass[]
1475 */
1476 public function get_logs($limitfrom = 0, $limitnum = 0) {
1477 global $DB;
1611308b
DM
1478
1479 \core_analytics\manager::check_can_manage_models();
1480
584ffa4f
DM
1481 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1482 $limitfrom, $limitnum);
1483 }
1484
d126f838
DM
1485 /**
1486 * Merges all training data files into one and returns it.
1487 *
1488 * @return \stored_file|false
1489 */
1490 public function get_training_data() {
1491
1492 \core_analytics\manager::check_can_manage_models();
1493
1494 $timesplittingid = $this->get_time_splitting()->get_id();
1495 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1496 }
1497
74823933
DM
1498 /**
1499 * Has the model been trained using data from this site?
1500 *
1501 * This method is useful to determine if a trained model can be evaluated as
1502 * we can not use the same data for training and for evaluation.
1503 *
1504 * @return bool
1505 */
bc82b895 1506 public function trained_locally() : bool {
74823933
DM
1507 global $DB;
1508
1509 if (!$this->is_trained() || $this->is_static()) {
1510 // Early exit.
1511 return false;
1512 }
1513
1514 if ($DB->record_exists('analytics_train_samples', ['modelid' => $this->model->id])) {
1515 return true;
1516 }
1517
1518 return false;
1519 }
1520
369389c9 1521 /**
1cc2b4ba 1522 * Flag the provided file as used for training or prediction.
369389c9
DM
1523 *
1524 * @param \stored_file $file
1525 * @param string $action
1526 * @return void
1527 */
1528 protected function flag_file_as_used(\stored_file $file, $action) {
1529 global $DB;
1530
1531 $usedfile = new \stdClass();
1532 $usedfile->modelid = $this->model->id;
1533 $usedfile->fileid = $file->get_id();
1534 $usedfile->action = $action;
1535 $usedfile->time = time();
1536 $DB->insert_record('analytics_used_files', $usedfile);
1537 }
1538
1539 /**
1cc2b4ba 1540 * Log the evaluation results in the database.
369389c9
DM
1541 *
1542 * @param string $timesplittingid
1543 * @param float $score
1544 * @param string $dir
1545 * @param array $info
e97dfff7 1546 * @param string $evaluationmode
369389c9
DM
1547 * @return int The inserted log id
1548 */
e97dfff7 1549 protected function log_result($timesplittingid, $score, $dir = false, $info = false, $evaluationmode = 'configuration') {
369389c9
DM
1550 global $DB, $USER;
1551
1552 $log = new \stdClass();
1553 $log->modelid = $this->get_id();
1554 $log->version = $this->model->version;
e97dfff7 1555 $log->evaluationmode = $evaluationmode;
369389c9
DM
1556 $log->target = $this->model->target;
1557 $log->indicators = $this->model->indicators;
1558 $log->timesplitting = $timesplittingid;
1559 $log->dir = $dir;
1560 if ($info) {
1561 // Ensure it is not an associative array.
1562 $log->info = json_encode(array_values($info));
1563 }
1564 $log->score = $score;
1565 $log->timecreated = time();
1566 $log->usermodified = $USER->id;
1567
1568 return $DB->insert_record('analytics_models_log', $log);
1569 }
1570
1571 /**
1572 * Utility method to return indicator class names from a list of indicator objects
1573 *
1574 * @param \core_analytics\local\indicator\base[] $indicators
1575 * @return string[]
1576 */
1577 private static function indicator_classes($indicators) {
1578
1579 // What we want to check and store are the indicator classes not the keys.
1580 $indicatorclasses = array();
1581 foreach ($indicators as $indicator) {
1582 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1583 if (!is_object($indicator) && !is_scalar($indicator)) {
1584 $indicator = strval($indicator);
1585 } else if (is_object($indicator)) {
3a396286 1586 $indicator = '\\' . get_class($indicator);
369389c9
DM
1587 }
1588 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1589 }
b0c24929 1590 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1591 }
1592
1593 return $indicatorclasses;
1594 }
1595
1596 /**
1597 * Clears the model training and prediction data.
1598 *
1599 * Executed after updating model critical elements like the time splitting method
1600 * or the indicators.
1601 *
1602 * @return void
1603 */
325b3bdd 1604 public function clear() {
0af2421a 1605 global $DB, $USER;
369389c9 1606
325b3bdd
DM
1607 \core_analytics\manager::check_can_manage_models();
1608
abafbc84 1609 // Delete current model version stored stuff.
d44ce97f
DM
1610 $predictor = $this->get_predictions_processor(false);
1611 if ($predictor->is_ready() !== true) {
1612 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
1613 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
1614 $this->model->id . ' could not be cleared.');
1615 } else {
1616 $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1617 }
abafbc84 1618
99b84a26
DM
1619 $predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid',
1620 array('modelid' => $this->get_id()));
1621 if ($predictionids) {
1622 list($sql, $params) = $DB->get_in_or_equal($predictionids);
1623 $DB->delete_records_select('analytics_prediction_actions', "predictionid $sql", $params);
1624 }
1625
369389c9 1626 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
00da1e60 1627 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
369389c9
DM
1628 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1629 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
dd13fc22 1630 $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
369389c9 1631
99b84a26
DM
1632 // Purge all generated files.
1633 \core_analytics\dataset_manager::clear_model_files($this->model->id);
1634
1611308b
DM
1635 // We don't expect people to clear models regularly and the cost of filling the cache is
1636 // 1 db read per context.
3e0f33aa 1637 $this->purge_insights_cache();
0af2421a 1638
c679d39c
DM
1639 if (!$this->is_static()) {
1640 $this->model->trained = 0;
1641 }
1642
0af2421a
DM
1643 $this->model->timemodified = time();
1644 $this->model->usermodified = $USER->id;
1645 $DB->update_record('analytics_models', $this->model);
3e0f33aa
DM
1646 }
1647
1648 /**
1649 * Purges the insights cache.
1650 */
1651 private function purge_insights_cache() {
1611308b 1652 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1653 $cache->purge();
369389c9
DM
1654 }
1655
1611308b
DM
1656 /**
1657 * Increases system memory and time limits.
1658 *
1659 * @return void
1660 */
1661 private function heavy_duty_mode() {
369389c9
DM
1662 if (ini_get('memory_limit') != -1) {
1663 raise_memory_limit(MEMORY_HUGE);
1664 }
1611308b 1665 \core_php_time_limit::raise();
369389c9 1666 }
369389c9 1667}