MDL-60944 tool_analytics: Adding create and delete features
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
325b3bdd 56 const LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
325b3bdd 61 const NOT_ENOUGH_DATA = 8;
369389c9 62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc 82
5c5cb3ee
DM
83 /**
84 * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
85 */
86 const PREDICTION_MIN_SCORE = 0.6;
87
413f19bc
DM
88 /**
89 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
90 */
369389c9 91 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
92
93 /**
94 * Number of evaluation repetitions.
95 */
369389c9
DM
96 const EVALUATION_ITERATIONS = 10;
97
98 /**
99 * @var \stdClass
100 */
101 protected $model = null;
102
103 /**
104 * @var \core_analytics\local\analyser\base
105 */
106 protected $analyser = null;
107
108 /**
109 * @var \core_analytics\local\target\base
110 */
111 protected $target = null;
112
ed12ba6b
DM
113 /**
114 * @var \core_analytics\predictor
115 */
116 protected $predictionsprocessor = null;
117
369389c9
DM
118 /**
119 * @var \core_analytics\local\indicator\base[]
120 */
121 protected $indicators = null;
122
123 /**
124 * Unique Model id created from site info and last model modification.
125 *
126 * @var string
127 */
128 protected $uniqueid = null;
129
130 /**
1cc2b4ba 131 * Constructor.
369389c9 132 *
1cc2b4ba 133 * @param int|\stdClass $model
369389c9
DM
134 * @return void
135 */
136 public function __construct($model) {
137 global $DB;
138
139 if (is_scalar($model)) {
1611308b 140 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
141 if (!$model) {
142 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
143 }
369389c9
DM
144 }
145 $this->model = $model;
146 }
147
3a396286
DM
148 /**
149 * Quick safety check to discard site models which required components are not available anymore.
150 *
151 * @return bool
152 */
153 public function is_available() {
154 $target = $this->get_target();
155 if (!$target) {
156 return false;
157 }
3a396286
DM
158
159 $classname = $target->get_analyser_class();
160 if (!class_exists($classname)) {
161 return false;
162 }
163
164 return true;
165 }
166
369389c9 167 /**
1cc2b4ba 168 * Returns the model id.
369389c9
DM
169 *
170 * @return int
171 */
172 public function get_id() {
173 return $this->model->id;
174 }
175
176 /**
1cc2b4ba 177 * Returns a plain \stdClass with the model data.
369389c9
DM
178 *
179 * @return \stdClass
180 */
181 public function get_model_obj() {
182 return $this->model;
183 }
184
185 /**
1cc2b4ba 186 * Returns the model target.
369389c9
DM
187 *
188 * @return \core_analytics\local\target\base
189 */
190 public function get_target() {
191 if ($this->target !== null) {
192 return $this->target;
193 }
194 $instance = \core_analytics\manager::get_target($this->model->target);
195 $this->target = $instance;
196
197 return $this->target;
198 }
199
200 /**
1cc2b4ba 201 * Returns the model indicators.
369389c9
DM
202 *
203 * @return \core_analytics\local\indicator\base[]
204 */
205 public function get_indicators() {
206 if ($this->indicators !== null) {
207 return $this->indicators;
208 }
209
210 $fullclassnames = json_decode($this->model->indicators);
211
212 if (!is_array($fullclassnames)) {
213 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
214 }
215
216 $this->indicators = array();
217 foreach ($fullclassnames as $fullclassname) {
218 $instance = \core_analytics\manager::get_indicator($fullclassname);
219 if ($instance) {
220 $this->indicators[$fullclassname] = $instance;
221 } else {
222 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
223 }
224 }
225
226 return $this->indicators;
227 }
228
229 /**
230 * Returns the list of indicators that could potentially be used by the model target.
231 *
232 * It includes the indicators that are part of the model.
233 *
a40952d3 234 * @return \core_analytics\local\indicator\base[]
369389c9
DM
235 */
236 public function get_potential_indicators() {
237
238 $indicators = \core_analytics\manager::get_all_indicators();
239
240 if (empty($this->analyser)) {
241 $this->init_analyser(array('evaluation' => true));
242 }
243
244 foreach ($indicators as $classname => $indicator) {
245 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
246 unset($indicators[$classname]);
247 }
248 }
249 return $indicators;
250 }
251
252 /**
1cc2b4ba 253 * Returns the model analyser (defined by the model target).
369389c9 254 *
a8ccc5f2 255 * @param array $options Default initialisation with no options.
369389c9
DM
256 * @return \core_analytics\local\analyser\base
257 */
a8ccc5f2 258 public function get_analyser($options = array()) {
369389c9
DM
259 if ($this->analyser !== null) {
260 return $this->analyser;
261 }
262
a8ccc5f2 263 $this->init_analyser($options);
369389c9
DM
264
265 return $this->analyser;
266 }
267
268 /**
1cc2b4ba 269 * Initialises the model analyser.
369389c9 270 *
1cc2b4ba 271 * @throws \coding_exception
369389c9
DM
272 * @param array $options
273 * @return void
274 */
275 protected function init_analyser($options = array()) {
276
277 $target = $this->get_target();
278 $indicators = $this->get_indicators();
279
280 if (empty($target)) {
281 throw new \moodle_exception('errornotarget', 'analytics');
282 }
283
a8ccc5f2
DM
284 $timesplittings = array();
285 if (empty($options['notimesplitting'])) {
286 if (!empty($options['evaluation'])) {
287 // The evaluation process will run using all available time splitting methods unless one is specified.
288 if (!empty($options['timesplitting'])) {
289 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
290 $timesplittings = array($timesplitting->get_id() => $timesplitting);
291 } else {
292 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
293 }
369389c9 294 } else {
369389c9 295
a8ccc5f2
DM
296 if (empty($this->model->timesplitting)) {
297 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
298 }
369389c9 299
a8ccc5f2
DM
300 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
301 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
302 }
369389c9 303
a8ccc5f2
DM
304 if (empty($timesplittings)) {
305 throw new \moodle_exception('errornotimesplittings', 'analytics');
306 }
369389c9
DM
307 }
308
0690a271
DM
309 if (!empty($options['evaluation'])) {
310 foreach ($timesplittings as $timesplitting) {
311 $timesplitting->set_evaluating(true);
312 }
313 }
314
369389c9
DM
315 $classname = $target->get_analyser_class();
316 if (!class_exists($classname)) {
08015e18 317 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
318 }
319
320 // Returns a \core_analytics\local\analyser\base class.
321 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
322 }
323
324 /**
1cc2b4ba 325 * Returns the model time splitting method.
369389c9 326 *
1cc2b4ba 327 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
328 */
329 public function get_time_splitting() {
330 if (empty($this->model->timesplitting)) {
331 return false;
332 }
333 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
334 }
335
336 /**
a40952d3 337 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
338 *
339 * @param \core_analytics\local\target\base $target
340 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 341 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
e4453adc 342 * @param string $processor The machine learning backend this model will use.
369389c9
DM
343 * @return \core_analytics\model
344 */
ed12ba6b
DM
345 public static function create(\core_analytics\local\target\base $target, array $indicators,
346 $timesplittingid = false, $processor = false) {
369389c9
DM
347 global $USER, $DB;
348
1611308b
DM
349 \core_analytics\manager::check_can_manage_models();
350
369389c9
DM
351 $indicatorclasses = self::indicator_classes($indicators);
352
353 $now = time();
354
355 $modelobj = new \stdClass();
b0c24929 356 $modelobj->target = $target->get_id();
369389c9
DM
357 $modelobj->indicators = json_encode($indicatorclasses);
358 $modelobj->version = $now;
359 $modelobj->timecreated = $now;
360 $modelobj->timemodified = $now;
361 $modelobj->usermodified = $USER->id;
362
ed12ba6b 363 if ($processor &&
e4453adc
DM
364 !manager::is_valid($processor, '\core_analytics\classifier') &&
365 !manager::is_valid($processor, '\core_analytics\regressor')) {
ed12ba6b
DM
366 throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid');
367 } else {
368 $modelobj->predictionsprocessor = $processor;
369 }
370
369389c9
DM
371 $id = $DB->insert_record('analytics_models', $modelobj);
372
373 // Get db defaults.
374 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
375
a40952d3
DM
376 $model = new static($modelobj);
377
378 if ($timesplittingid) {
379 $model->enable($timesplittingid);
380 }
381
382 if ($model->is_static()) {
383 $model->mark_as_trained();
384 }
385
386 return $model;
369389c9
DM
387 }
388
349c4412 389 /**
e4453adc 390 * Creates a new model from import configuration.
349c4412 391 *
e4453adc
DM
392 * It is recommended to call \core_analytics\model_config::check_dependencies first so the error message can be retrieved.
393 *
394 * @param \stdClass $modeldata Model data.
395 * @param bool $skipcheckdependencies Useful if you already checked the dependencies.
396 * @return \core_analytics\model|false False if the provided model data contain errors.
349c4412 397 */
e4453adc 398 public static function create_from_import(\stdClass $modeldata, ?bool $skipcheckdependencies = false) : ?\core_analytics\model {
349c4412
AA
399
400 \core_analytics\manager::check_can_manage_models();
e4453adc
DM
401
402 if (!$skipcheckdependencies) {
403 $modelconfig = new model_config();
404 if ($error = $modelconfig->check_dependencies($modeldata, false)) {
405 return null;
406 }
349c4412
AA
407 }
408
e4453adc
DM
409 // At this stage we should be 100% sure that the model data is safe and can be imported.
410 // If the caller explicitly set $skipcheckdependencies to false and there is a problem
411 // in this process we trigger a coding exception.
412 if (!$target = \core_analytics\manager::get_target($modeldata->target)) {
413 throw new \coding_exception('The provided target is not available. Ensure that model_config::check_dependencies
414 is called before importing the model.');
415 }
416 if (!$timesplitting = \core_analytics\manager::get_time_splitting($modeldata->timesplitting)) {
417 throw new \coding_exception('The provided time splitting method is not available. Ensure that
418 model_config::check_dependencies is called before importing the model.');
349c4412 419 }
349c4412
AA
420
421 // Indicators.
422 $indicators = [];
e4453adc
DM
423 foreach ($modeldata->indicators as $indicator) {
424 if (!$indicator = \core_analytics\manager::get_indicator($indicator)) {
425 throw new \coding_exception('The provided indicator is not available. Ensure that
426 model_config::check_dependencies is called before importing the model.');
349c4412 427 }
e4453adc 428 $indicators[] = $indicator;
349c4412
AA
429 }
430
e4453adc
DM
431 if (!empty($modeldata->processor)) {
432 if (!$processor = \core_analytics\manager::get_predictions_processor($modeldata->processor, false)) {
433 throw new \coding_exception('The provided machine learning backend is not available. Ensure that
434 model_config::check_dependencies is called before importing the model.');
435 }
436 } else {
437 $modeldata->processor = false;
349c4412 438 }
e4453adc 439 return self::create($target, $indicators, $modeldata->timesplitting, $modeldata->processor);
349c4412
AA
440 }
441
e709e544
DM
442 /**
443 * Does this model exist?
444 *
445 * If no indicators are provided it considers any model with the provided
446 * target a match.
447 *
448 * @param \core_analytics\local\target\base $target
449 * @param \core_analytics\local\indicator\base[]|false $indicators
450 * @return bool
451 */
452 public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
453 global $DB;
454
455 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
456
457 if (!$indicators && $existingmodels) {
458 return true;
459 }
460
461 $indicatorids = array_keys($indicators);
462 sort($indicatorids);
463
464 foreach ($existingmodels as $modelobj) {
465 $model = new \core_analytics\model($modelobj);
466 $modelindicatorids = array_keys($model->get_indicators());
467 sort($modelindicatorids);
468
469 if ($indicatorids === $modelindicatorids) {
470 return true;
471 }
472 }
473 return false;
474 }
475
a40952d3 476 /**
1cc2b4ba 477 * Updates the model.
a40952d3
DM
478 *
479 * @param int|bool $enabled
5c140ac4
DM
480 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
481 * @param string|false $timesplittingid False to respect current time splitting method
ed12ba6b 482 * @param string|false $predictionsprocessor False to respect current predictors processor value
a40952d3
DM
483 * @return void
484 */
ed12ba6b 485 public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false) {
369389c9
DM
486 global $USER, $DB;
487
1611308b
DM
488 \core_analytics\manager::check_can_manage_models();
489
369389c9
DM
490 $now = time();
491
5c140ac4
DM
492 if ($indicators !== false) {
493 $indicatorclasses = self::indicator_classes($indicators);
494 $indicatorsstr = json_encode($indicatorclasses);
495 } else {
496 // Respect current value.
497 $indicatorsstr = $this->model->indicators;
498 }
499
500 if ($timesplittingid === false) {
501 // Respect current value.
502 $timesplittingid = $this->model->timesplitting;
503 }
369389c9 504
ed12ba6b
DM
505 if ($predictionsprocessor === false) {
506 // Respect current value.
507 $predictionsprocessor = $this->model->predictionsprocessor;
508 }
509
a40952d3 510 if ($this->model->timesplitting !== $timesplittingid ||
ed12ba6b
DM
511 $this->model->indicators !== $indicatorsstr ||
512 $this->model->predictionsprocessor !== $predictionsprocessor) {
369389c9 513
abafbc84 514 // Delete generated predictions before changing the model version.
325b3bdd 515 $this->clear();
369389c9 516
abafbc84
DM
517 // It needs to be reset as the version changes.
518 $this->uniqueid = null;
e4453adc 519 $this->indicators = null;
abafbc84
DM
520
521 // We update the version of the model so different time splittings are not mixed up.
522 $this->model->version = $now;
523
369389c9 524 // Reset trained flag.
abafbc84
DM
525 if (!$this->is_static()) {
526 $this->model->trained = 0;
527 }
3e0f33aa
DM
528
529 } else if ($this->model->enabled != $enabled) {
530 // We purge the cached contexts with insights as some will not be visible anymore.
531 $this->purge_insights_cache();
369389c9 532 }
3e0f33aa 533
a40952d3 534 $this->model->enabled = intval($enabled);
369389c9 535 $this->model->indicators = $indicatorsstr;
a40952d3 536 $this->model->timesplitting = $timesplittingid;
ed12ba6b 537 $this->model->predictionsprocessor = $predictionsprocessor;
369389c9
DM
538 $this->model->timemodified = $now;
539 $this->model->usermodified = $USER->id;
540
541 $DB->update_record('analytics_models', $this->model);
369389c9
DM
542 }
543
d16cf374
DM
544 /**
545 * Removes the model.
546 *
547 * @return void
548 */
d8327b60 549 public function delete() {
d16cf374 550 global $DB;
1611308b
DM
551
552 \core_analytics\manager::check_can_manage_models();
553
325b3bdd 554 $this->clear();
abafbc84 555
325b3bdd 556 // Method self::clear is already clearing the current model version.
d44ce97f
DM
557 $predictor = $this->get_predictions_processor(false);
558 if ($predictor->is_ready() !== true) {
559 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
560 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
561 $this->model->id . ' could not be deleted.');
562 } else {
563 $predictor->delete_output_dir($this->get_output_dir(array(), true));
564 }
abafbc84 565
d8327b60 566 $DB->delete_records('analytics_models', array('id' => $this->model->id));
99b84a26 567 $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
d16cf374
DM
568 }
569
369389c9 570 /**
1cc2b4ba 571 * Evaluates the model.
369389c9 572 *
1cc2b4ba
DM
573 * This method gets the site contents (through the analyser) creates a .csv dataset
574 * with them and evaluates the model prediction accuracy multiple times using the
575 * machine learning backend. It returns an object where the model score is the average
576 * prediction accuracy of all executed evaluations.
369389c9
DM
577 *
578 * @param array $options
579 * @return \stdClass[]
580 */
581 public function evaluate($options = array()) {
582
1611308b
DM
583 \core_analytics\manager::check_can_manage_models();
584
a40952d3
DM
585 if ($this->is_static()) {
586 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
587 $result = new \stdClass();
cbf4c391
DM
588 $result->status = self::NO_DATASET;
589 return array($this->get_time_splitting()->get_id() => $result);
a40952d3
DM
590 }
591
369389c9
DM
592 $options['evaluation'] = true;
593 $this->init_analyser($options);
594
595 if (empty($this->get_indicators())) {
596 throw new \moodle_exception('errornoindicators', 'analytics');
597 }
598
1611308b
DM
599 $this->heavy_duty_mode();
600
369389c9 601 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 602 $predictor = $this->get_predictions_processor();
369389c9
DM
603
604 $datasets = $this->get_analyser()->get_labelled_data();
605
606 // No datasets generated.
607 if (empty($datasets)) {
608 $result = new \stdClass();
609 $result->status = self::NO_DATASET;
610 $result->info = $this->get_analyser()->get_logs();
611 return array($result);
612 }
613
614 if (!PHPUNIT_TEST && CLI_SCRIPT) {
615 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
616 }
617
618 $results = array();
619 foreach ($datasets as $timesplittingid => $dataset) {
620
621 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
622
623 $result = new \stdClass();
624
625 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
626 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
627
628 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
5c5cb3ee
DM
629 if ($this->get_target()->is_linear()) {
630 $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
631 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
632 } else {
633 $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
369389c9 634 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
5c5cb3ee 635 }
369389c9
DM
636
637 $result->status = $predictorresult->status;
638 $result->info = $predictorresult->info;
639
640 if (isset($predictorresult->score)) {
641 $result->score = $predictorresult->score;
642 } else {
643 // Prediction processors may return an error, default to 0 score in that case.
644 $result->score = 0;
645 }
646
647 $dir = false;
648 if (!empty($predictorresult->dir)) {
649 $dir = $predictorresult->dir;
650 }
651
652 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
653
654 $results[$timesplitting->get_id()] = $result;
655 }
656
657 return $results;
658 }
659
660 /**
1cc2b4ba
DM
661 * Trains the model using the site contents.
662 *
663 * This method prepares a dataset from the site contents (through the analyser)
664 * and passes it to the machine learning backends. Static models are skipped as
665 * they do not require training.
369389c9
DM
666 *
667 * @return \stdClass
668 */
669 public function train() {
369389c9 670
1611308b
DM
671 \core_analytics\manager::check_can_manage_models();
672
a40952d3
DM
673 if ($this->is_static()) {
674 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
675 $result = new \stdClass();
676 $result->status = self::OK;
677 return $result;
678 }
679
a40952d3 680 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
681 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
682 }
683
684 if (empty($this->get_indicators())) {
685 throw new \moodle_exception('errornoindicators', 'analytics');
686 }
687
1611308b
DM
688 $this->heavy_duty_mode();
689
369389c9
DM
690 // Before get_labelled_data call so we get an early exception if it is not writable.
691 $outputdir = $this->get_output_dir(array('execution'));
692
693 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 694 $predictor = $this->get_predictions_processor();
369389c9
DM
695
696 $datasets = $this->get_analyser()->get_labelled_data();
697
698 // No training if no files have been provided.
699 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
700
701 $result = new \stdClass();
702 $result->status = self::NO_DATASET;
703 $result->info = $this->get_analyser()->get_logs();
704 return $result;
705 }
706 $samplesfile = $datasets[$this->model->timesplitting];
707
708 // Train using the dataset.
5c5cb3ee
DM
709 if ($this->get_target()->is_linear()) {
710 $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
711 } else {
712 $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
713 }
369389c9
DM
714
715 $result = new \stdClass();
716 $result->status = $predictorresult->status;
717 $result->info = $predictorresult->info;
718
325b3bdd
DM
719 if ($result->status !== self::OK) {
720 return $result;
721 }
722
369389c9
DM
723 $this->flag_file_as_used($samplesfile, 'trained');
724
725 // Mark the model as trained if it wasn't.
726 if ($this->model->trained == false) {
727 $this->mark_as_trained();
728 }
729
730 return $result;
731 }
732
733 /**
1cc2b4ba
DM
734 * Get predictions from the site contents.
735 *
736 * It analyses the site contents (through analyser classes) looking for samples
737 * ready to receive predictions. It generates a dataset with all samples ready to
738 * get predictions and it passes it to the machine learning backends or to the
739 * targets based on assumptions to get the predictions.
369389c9
DM
740 *
741 * @return \stdClass
742 */
743 public function predict() {
744 global $DB;
745
1611308b 746 \core_analytics\manager::check_can_manage_models();
369389c9 747
a40952d3 748 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
749 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
750 }
751
752 if (empty($this->get_indicators())) {
753 throw new \moodle_exception('errornoindicators', 'analytics');
754 }
755
1611308b
DM
756 $this->heavy_duty_mode();
757
369389c9
DM
758 // Before get_unlabelled_data call so we get an early exception if it is not writable.
759 $outputdir = $this->get_output_dir(array('execution'));
760
761 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3 762 if (!$this->is_static()) {
ed12ba6b 763 $predictor = $this->get_predictions_processor();
a40952d3 764 }
369389c9
DM
765
766 $samplesdata = $this->get_analyser()->get_unlabelled_data();
767
768 // Get the prediction samples file.
769 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
770
771 $result = new \stdClass();
772 $result->status = self::NO_DATASET;
773 $result->info = $this->get_analyser()->get_logs();
774 return $result;
775 }
776 $samplesfile = $samplesdata[$this->model->timesplitting];
777
778 // We need to throw an exception if we are trying to predict stuff that was already predicted.
2dca1339 779 $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
369389c9
DM
780 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
781 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
782 }
783
a40952d3 784 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 785
a40952d3 786 // Prepare the results object.
369389c9 787 $result = new \stdClass();
369389c9 788
a40952d3
DM
789 if ($this->is_static()) {
790 // Prediction based on assumptions.
413f19bc 791 $result->status = self::OK;
a40952d3
DM
792 $result->info = [];
793 $result->predictions = $this->get_static_predictions($indicatorcalculations);
794
795 } else {
5c5cb3ee
DM
796 // Estimation and classification processes run on the machine learning backend side.
797 if ($this->get_target()->is_linear()) {
798 $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
799 } else {
800 $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
801 }
a40952d3
DM
802 $result->status = $predictorresult->status;
803 $result->info = $predictorresult->info;
1611308b
DM
804 $result->predictions = $this->format_predictor_predictions($predictorresult);
805 }
806
325b3bdd
DM
807 if ($result->status !== self::OK) {
808 return $result;
809 }
810
1611308b
DM
811 if ($result->predictions) {
812 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
813 }
814
815 if (!empty($samplecontexts) && $this->uses_insights()) {
816 $this->trigger_insights($samplecontexts);
817 }
818
819 $this->flag_file_as_used($samplesfile, 'predicted');
820
821 return $result;
822 }
823
ed12ba6b
DM
824 /**
825 * Returns the model predictions processor.
826 *
d44ce97f 827 * @param bool $checkisready
ed12ba6b
DM
828 * @return \core_analytics\predictor
829 */
d44ce97f
DM
830 public function get_predictions_processor($checkisready = true) {
831 return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready);
ed12ba6b
DM
832 }
833
1611308b
DM
834 /**
835 * Formats the predictor results.
836 *
837 * @param array $predictorresult
838 * @return array
839 */
840 private function format_predictor_predictions($predictorresult) {
841
842 $predictions = array();
0af2421a 843 if (!empty($predictorresult->predictions)) {
1611308b
DM
844 foreach ($predictorresult->predictions as $sampleinfo) {
845
413f19bc 846 // We parse each prediction.
1611308b
DM
847 switch (count($sampleinfo)) {
848 case 1:
849 // For whatever reason the predictions processor could not process this sample, we
850 // skip it and do nothing with it.
851 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
852 $sampleinfo[0], DEBUG_DEVELOPER);
bd5fdcfc 853 continue 2;
1611308b
DM
854 case 2:
855 // Prediction processors that do not return a prediction score will have the maximum prediction
856 // score.
857 list($uniquesampleid, $prediction) = $sampleinfo;
858 $predictionscore = 1;
859 break;
860 case 3:
861 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
862 break;
863 default:
864 break;
a40952d3 865 }
1611308b
DM
866 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
867 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
868 }
869 }
1611308b
DM
870 return $predictions;
871 }
872
873 /**
874 * Execute the prediction callbacks defined by the target.
875 *
876 * @param \stdClass[] $predictions
413f19bc 877 * @param array $indicatorcalculations
1611308b
DM
878 * @return array
879 */
880 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
881
882 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
883 $samplecontexts = array();
325b3bdd 884 $records = array();
369389c9 885
1611308b 886 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 887
325b3bdd
DM
888 // The unique sample id contains both the sampleid and the rangeindex.
889 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 890
325b3bdd 891 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 892
325b3bdd 893 // Prepare the record to store the predicted values.
cab7abec 894 list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
413f19bc 895 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 896
cab7abec
DM
897 // We will later bulk-insert them all.
898 $records[$uniquesampleid] = $record;
899
1611308b
DM
900 // Also store all samples context to later generate insights or whatever action the target wants to perform.
901 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 902
1611308b
DM
903 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
904 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
905 }
906 }
907
0927604f
DM
908 if (!empty($records)) {
909 $this->save_predictions($records);
910 }
cab7abec 911
1611308b
DM
912 return $samplecontexts;
913 }
369389c9 914
1611308b
DM
915 /**
916 * Generates insights and updates the cache.
917 *
918 * @param \context[] $samplecontexts
919 * @return void
920 */
921 protected function trigger_insights($samplecontexts) {
922
923 // Notify the target that all predictions have been processed.
924 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
925
926 // Update cache.
927 $cache = \cache::make('core', 'contextwithinsights');
928 foreach ($samplecontexts as $context) {
929 $modelids = $cache->get($context->id);
930 if (!$modelids) {
931 // The cache is empty, but we don't know if it is empty because there are no insights
932 // in this context or because cache/s have been purged, we need to be conservative and
933 // "pay" 1 db read to fill up the cache.
934 $models = \core_analytics\manager::get_models_with_insights($context);
935 $cache->set($context->id, array_keys($models));
936 } else if (!in_array($this->get_id(), $modelids)) {
937 array_push($modelids, $this->get_id());
938 $cache->set($context->id, $modelids);
369389c9
DM
939 }
940 }
369389c9
DM
941 }
942
a40952d3 943 /**
1611308b 944 * Get predictions from a static model.
a40952d3
DM
945 *
946 * @param array $indicatorcalculations
947 * @return \stdClass[]
948 */
949 protected function get_static_predictions(&$indicatorcalculations) {
950
951 // Group samples by analysable for \core_analytics\local\target::calculate.
952 $analysables = array();
953 // List all sampleids together.
954 $sampleids = array();
955
956 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
957 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
958
959 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
960 $analysableclass = get_class($analysable);
961 if (empty($analysables[$analysableclass])) {
962 $analysables[$analysableclass] = array();
963 }
964 if (empty($analysables[$analysableclass][$rangeindex])) {
965 $analysables[$analysableclass][$rangeindex] = (object)[
966 'analysable' => $analysable,
967 'indicatorsdata' => array(),
968 'sampleids' => array()
969 ];
970 }
971 // Using the sampleid as a key so we can easily merge indicators data later.
972 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
973 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
974 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
975
976 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
977 $sampleids[$sampleid] = $sampleid;
978 }
979
980 // Get all samples data.
981 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
982
983 // Calculate the targets.
1cc2b4ba 984 $predictions = array();
a40952d3
DM
985 foreach ($analysables as $analysableclass => $rangedata) {
986 foreach ($rangedata as $rangeindex => $data) {
987
988 // Attach samples data and calculated indicators data.
989 $this->get_target()->clear_sample_data();
990 $this->get_target()->add_sample_data($samplesdata);
991 $this->get_target()->add_sample_data($data->indicatorsdata);
992
1611308b 993 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 994 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 995 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
996 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
997
998 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
999 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
1000 // by self::save_prediction.
1001 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
1002 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
1003 if (!isset($calculations[$sampleid])) {
a40952d3
DM
1004 return false;
1005 }
1006 return true;
1007 }, ARRAY_FILTER_USE_BOTH);
1008
1009 foreach ($calculations as $sampleid => $value) {
1010
1011 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
1012
1013 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
1014 if (is_null($calculations[$sampleid])) {
a40952d3
DM
1015 unset($indicatorcalculations[$uniquesampleid]);
1016 continue;
1017 }
1018
1019 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
1020 // true according to what the developer defined.
1021 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
1022 }
1023 }
1024 }
1025 return $predictions;
1026 }
1027
369389c9 1028 /**
1cc2b4ba 1029 * Stores the prediction in the database.
369389c9
DM
1030 *
1031 * @param int $sampleid
1032 * @param int $rangeindex
1033 * @param int $prediction
1034 * @param float $predictionscore
1035 * @param string $calculations
1036 * @return \context
1037 */
cab7abec 1038 protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
369389c9
DM
1039 $context = $this->get_analyser()->sample_access_context($sampleid);
1040
1041 $record = new \stdClass();
1042 $record->modelid = $this->model->id;
1043 $record->contextid = $context->id;
1044 $record->sampleid = $sampleid;
1045 $record->rangeindex = $rangeindex;
1046 $record->prediction = $prediction;
1047 $record->predictionscore = $predictionscore;
1048 $record->calculations = $calculations;
1049 $record->timecreated = time();
369389c9 1050
f87174dc
MN
1051 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1052 $timesplitting = $this->get_time_splitting();
1053 $timesplitting->set_analysable($analysable);
1054 $range = $timesplitting->get_range_by_index($rangeindex);
1055 if ($range) {
1056 $record->timestart = $range['start'];
1057 $record->timeend = $range['end'];
1058 }
1059
cab7abec
DM
1060 return array($record, $context);
1061 }
1062
1063 /**
1064 * Save the prediction objects.
1065 *
1066 * @param \stdClass[] $records
1067 */
1068 protected function save_predictions($records) {
1069 global $DB;
1070 $DB->insert_records('analytics_predictions', $records);
369389c9
DM
1071 }
1072
1073 /**
1cc2b4ba 1074 * Enabled the model using the provided time splitting method.
369389c9 1075 *
5c140ac4 1076 * @param string|false $timesplittingid False to respect the current time splitting method.
369389c9
DM
1077 * @return void
1078 */
1079 public function enable($timesplittingid = false) {
0af2421a 1080 global $DB, $USER;
369389c9 1081
1611308b
DM
1082 \core_analytics\manager::check_can_manage_models();
1083
369389c9
DM
1084 $now = time();
1085
1086 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
1087
1088 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
1089 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1090 }
1091
1092 if (substr($timesplittingid, 0, 1) !== '\\') {
1093 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1094 }
1095
abafbc84 1096 // Delete generated predictions before changing the model version.
325b3bdd 1097 $this->clear();
abafbc84
DM
1098
1099 // It needs to be reset as the version changes.
1100 $this->uniqueid = null;
1101
369389c9
DM
1102 $this->model->timesplitting = $timesplittingid;
1103 $this->model->version = $now;
abafbc84
DM
1104
1105 // Reset trained flag.
1106 if (!$this->is_static()) {
1107 $this->model->trained = 0;
1108 }
fabe98ac
AA
1109 } else if (empty($this->model->timesplitting)) {
1110 // A valid timesplitting method needs to be supplied before a model can be enabled.
1111 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1112
369389c9 1113 }
3e0f33aa
DM
1114
1115 // Purge pages with insights as this may change things.
abafbc84 1116 if ($this->model->enabled != 1) {
3e0f33aa
DM
1117 $this->purge_insights_cache();
1118 }
1119
369389c9
DM
1120 $this->model->enabled = 1;
1121 $this->model->timemodified = $now;
0af2421a 1122 $this->model->usermodified = $USER->id;
369389c9
DM
1123
1124 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1125 $DB->update_record('analytics_models', $this->model);
369389c9
DM
1126 }
1127
a40952d3 1128 /**
1cc2b4ba
DM
1129 * Is this a static model (as defined by the target)?.
1130 *
1131 * Static models are based on assumptions instead of in machine learning
1132 * backends results.
a40952d3
DM
1133 *
1134 * @return bool
1135 */
1136 public function is_static() {
1137 return (bool)$this->get_target()->based_on_assumptions();
1138 }
1139
369389c9 1140 /**
1cc2b4ba 1141 * Is this model enabled?
369389c9
DM
1142 *
1143 * @return bool
1144 */
1145 public function is_enabled() {
1146 return (bool)$this->model->enabled;
1147 }
1148
1149 /**
1cc2b4ba 1150 * Is this model already trained?
369389c9
DM
1151 *
1152 * @return bool
1153 */
1154 public function is_trained() {
a40952d3
DM
1155 // Models which targets are based on assumptions do not need training.
1156 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
1157 }
1158
1159 /**
1cc2b4ba 1160 * Marks the model as trained
369389c9
DM
1161 *
1162 * @return void
1163 */
1164 public function mark_as_trained() {
1165 global $DB;
1166
1611308b
DM
1167 \core_analytics\manager::check_can_manage_models();
1168
369389c9
DM
1169 $this->model->trained = 1;
1170 $DB->update_record('analytics_models', $this->model);
1171 }
1172
1173 /**
1cc2b4ba 1174 * Get the contexts with predictions.
369389c9 1175 *
2e151c3c 1176 * @param bool $skiphidden Skip hidden predictions
369389c9
DM
1177 * @return \stdClass[]
1178 */
2e151c3c
DM
1179 public function get_predictions_contexts($skiphidden = true) {
1180 global $DB, $USER;
369389c9 1181
4a210b06
DM
1182 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1183 JOIN {context} ctx ON ctx.id = ap.contextid
2e151c3c
DM
1184 WHERE ap.modelid = :modelid";
1185 $params = array('modelid' => $this->model->id);
1186
1187 if ($skiphidden) {
1188 $sql .= " AND NOT EXISTS (
1189 SELECT 1
1190 FROM {analytics_prediction_actions} apa
1191 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1192 )";
1193 $params['userid'] = $USER->id;
1194 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1195 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1196 }
1197
1198 return $DB->get_records_sql($sql, $params);
369389c9
DM
1199 }
1200
f9e7447f
DM
1201 /**
1202 * Has this model generated predictions?
1203 *
1204 * We don't check analytics_predictions table because targets have the ability to
1205 * ignore some predicted values, if that is the case predictions are not even stored
1206 * in db.
1207 *
1208 * @return bool
1209 */
1210 public function any_prediction_obtained() {
1211 global $DB;
00da1e60 1212 return $DB->record_exists('analytics_predict_samples',
f9e7447f
DM
1213 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1214 }
1215
1216 /**
1217 * Whether this model generates insights or not (defined by the model's target).
1218 *
1219 * @return bool
1220 */
1221 public function uses_insights() {
1222 $target = $this->get_target();
1223 return $target::uses_insights();
1224 }
1225
369389c9
DM
1226 /**
1227 * Whether predictions exist for this context.
1228 *
1229 * @param \context $context
1230 * @return bool
1231 */
1232 public function predictions_exist(\context $context) {
1233 global $DB;
1234
1235 // Filters out previous predictions keeping only the last time range one.
1236 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 1237 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
1238 return $DB->record_exists_select('analytics_predictions', $select, $params);
1239 }
1240
1241 /**
1242 * Gets the predictions for this context.
1243 *
1244 * @param \context $context
2e151c3c 1245 * @param bool $skiphidden Skip hidden predictions
21d4ae93
DM
1246 * @param int $page The page of results to fetch. False for all results.
1247 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1248 * @return array($total, \core_analytics\prediction[])
369389c9 1249 */
025363d1
DM
1250 public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1251 global $DB, $USER;
369389c9 1252
1611308b
DM
1253 \core_analytics\manager::check_can_list_insights($context);
1254
369389c9 1255 // Filters out previous predictions keeping only the last time range one.
4a210b06
DM
1256 $sql = "SELECT ap.*
1257 FROM {analytics_predictions} ap
369389c9
DM
1258 JOIN (
1259 SELECT sampleid, max(rangeindex) AS rangeindex
1260 FROM {analytics_predictions}
025363d1 1261 WHERE modelid = :modelidsubap and contextid = :contextidsubap
369389c9 1262 GROUP BY sampleid
4a210b06
DM
1263 ) apsub
1264 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
025363d1
DM
1265 WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1266
1267 $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1268 'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1269
1270 if ($skiphidden) {
1271 $sql .= " AND NOT EXISTS (
1272 SELECT 1
1273 FROM {analytics_prediction_actions} apa
1274 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1275 )";
1276 $params['userid'] = $USER->id;
1277 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1278 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1279 }
1280
1281 $sql .= " ORDER BY ap.timecreated DESC";
369389c9
DM
1282 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1283 return array();
1284 }
1285
1286 // Get predicted samples' ids.
1287 $sampleids = array_map(function($prediction) {
1288 return $prediction->sampleid;
1289 }, $predictions);
1290
1291 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1292
68bfe1de 1293 $current = 0;
21d4ae93
DM
1294
1295 if ($page !== false) {
1296 $offset = $page * $perpage;
1297 $limit = $offset + $perpage;
1298 }
68bfe1de 1299
369389c9
DM
1300 foreach ($predictions as $predictionid => $predictiondata) {
1301
1302 $sampleid = $predictiondata->sampleid;
1303
1304 // Filter out predictions which samples are not available anymore.
1305 if (empty($samplesdata[$sampleid])) {
1306 unset($predictions[$predictionid]);
1307 continue;
1308 }
1309
68bfe1de 1310 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1311 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1312 // Replace \stdClass object by \core_analytics\prediction objects.
1313 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1314 $predictions[$predictionid] = $prediction;
1315 } else {
1316 unset($predictions[$predictionid]);
1317 }
369389c9 1318
68bfe1de 1319 $current++;
369389c9
DM
1320 }
1321
68bfe1de 1322 return [$current, $predictions];
369389c9
DM
1323 }
1324
1325 /**
1611308b 1326 * Returns the sample data of a prediction.
369389c9
DM
1327 *
1328 * @param \stdClass $predictionobj
1329 * @return array
1330 */
1331 public function prediction_sample_data($predictionobj) {
1332
1333 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1334
1335 if (empty($samplesdata[$predictionobj->sampleid])) {
1336 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1337 }
1338
1339 return $samplesdata[$predictionobj->sampleid];
1340 }
1341
1342 /**
1611308b 1343 * Returns the description of a sample
369389c9
DM
1344 *
1345 * @param \core_analytics\prediction $prediction
1346 * @return array 2 elements: list(string, \renderable)
1347 */
1348 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1349 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1350 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1351 }
1352
1353 /**
1354 * Returns the output directory for prediction processors.
1355 *
1356 * Directory structure as follows:
1357 * - Evaluation runs:
1358 * models/$model->id/$model->version/evaluation/$model->timesplitting
1359 * - Training & prediction runs:
1360 * models/$model->id/$model->version/execution
1361 *
1362 * @param array $subdirs
abafbc84 1363 * @param bool $onlymodelid Preference over $subdirs
369389c9
DM
1364 * @return string
1365 */
abafbc84 1366 protected function get_output_dir($subdirs = array(), $onlymodelid = false) {
369389c9
DM
1367 global $CFG;
1368
1369 $subdirstr = '';
1370 foreach ($subdirs as $subdir) {
1371 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1372 }
1373
1374 $outputdir = get_config('analytics', 'modeloutputdir');
1375 if (empty($outputdir)) {
1376 // Apply default value.
1377 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1378 }
1379
325b3bdd 1380 // Append model id.
abafbc84
DM
1381 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1382 if (!$onlymodelid) {
1383 // Append version + subdirs.
1384 $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1385 }
369389c9
DM
1386
1387 make_writable_directory($outputdir);
1388
1389 return $outputdir;
1390 }
1391
1392 /**
1cc2b4ba
DM
1393 * Returns a unique id for this model.
1394 *
1395 * This id should be unique for this site.
369389c9
DM
1396 *
1397 * @return string
1398 */
1399 public function get_unique_id() {
1400 global $CFG;
1401
1402 if (!is_null($this->uniqueid)) {
1403 return $this->uniqueid;
1404 }
1405
1406 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1407 // that the model target and indicators were updated.
b8fe16cd 1408 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
369389c9
DM
1409 $this->uniqueid = sha1(implode('$$', $ids));
1410
1411 return $this->uniqueid;
1412 }
1413
1414 /**
1415 * Exports the model data.
1416 *
1417 * @return \stdClass
1418 */
1419 public function export() {
1611308b
DM
1420
1421 \core_analytics\manager::check_can_manage_models();
1422
369389c9
DM
1423 $data = clone $this->model;
1424 $data->target = $this->get_target()->get_name();
1425
1426 if ($timesplitting = $this->get_time_splitting()) {
1427 $data->timesplitting = $timesplitting->get_name();
1428 }
1429
1430 $data->indicators = array();
1431 foreach ($this->get_indicators() as $indicator) {
1432 $data->indicators[] = $indicator->get_name();
1433 }
1434 return $data;
1435 }
1436
349c4412 1437 /**
e4453adc 1438 * Exports the model data as a JSON file.
349c4412 1439 *
e4453adc
DM
1440 * @param string $downloadfilename Download file name.
1441 * @return string The filepath
349c4412 1442 */
e4453adc 1443 public function export_config(string $downloadfilename) : string {
349c4412
AA
1444 global $CFG;
1445
e4453adc 1446 \core_analytics\manager::check_can_manage_models();
349c4412 1447
e4453adc
DM
1448 $modelconfig = new model_config($this);
1449 $modeldata = $modelconfig->export();
1450 return $modelconfig->export_to_file($modeldata, $downloadfilename);
1451 }
349c4412 1452
e4453adc
DM
1453 /**
1454 * Can this model be exported?
1455 *
1456 * @return bool
1457 */
1458 public function can_export_configuration() : bool {
1459
1460 if (empty($this->model->timesplitting)) {
1461 return false;
1462 }
1463 if (!$this->get_indicators()) {
1464 return false;
349c4412
AA
1465 }
1466
e4453adc
DM
1467 if ($this->is_static()) {
1468 return false;
349c4412 1469 }
e4453adc
DM
1470
1471 return true;
349c4412
AA
1472 }
1473
584ffa4f
DM
1474 /**
1475 * Returns the model logs data.
1476 *
1477 * @param int $limitfrom
1478 * @param int $limitnum
1479 * @return \stdClass[]
1480 */
1481 public function get_logs($limitfrom = 0, $limitnum = 0) {
1482 global $DB;
1611308b
DM
1483
1484 \core_analytics\manager::check_can_manage_models();
1485
584ffa4f
DM
1486 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1487 $limitfrom, $limitnum);
1488 }
1489
d126f838
DM
1490 /**
1491 * Merges all training data files into one and returns it.
1492 *
1493 * @return \stored_file|false
1494 */
1495 public function get_training_data() {
1496
1497 \core_analytics\manager::check_can_manage_models();
1498
1499 $timesplittingid = $this->get_time_splitting()->get_id();
1500 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1501 }
1502
369389c9 1503 /**
1cc2b4ba 1504 * Flag the provided file as used for training or prediction.
369389c9
DM
1505 *
1506 * @param \stored_file $file
1507 * @param string $action
1508 * @return void
1509 */
1510 protected function flag_file_as_used(\stored_file $file, $action) {
1511 global $DB;
1512
1513 $usedfile = new \stdClass();
1514 $usedfile->modelid = $this->model->id;
1515 $usedfile->fileid = $file->get_id();
1516 $usedfile->action = $action;
1517 $usedfile->time = time();
1518 $DB->insert_record('analytics_used_files', $usedfile);
1519 }
1520
1521 /**
1cc2b4ba 1522 * Log the evaluation results in the database.
369389c9
DM
1523 *
1524 * @param string $timesplittingid
1525 * @param float $score
1526 * @param string $dir
1527 * @param array $info
1528 * @return int The inserted log id
1529 */
1530 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1531 global $DB, $USER;
1532
1533 $log = new \stdClass();
1534 $log->modelid = $this->get_id();
1535 $log->version = $this->model->version;
1536 $log->target = $this->model->target;
1537 $log->indicators = $this->model->indicators;
1538 $log->timesplitting = $timesplittingid;
1539 $log->dir = $dir;
1540 if ($info) {
1541 // Ensure it is not an associative array.
1542 $log->info = json_encode(array_values($info));
1543 }
1544 $log->score = $score;
1545 $log->timecreated = time();
1546 $log->usermodified = $USER->id;
1547
1548 return $DB->insert_record('analytics_models_log', $log);
1549 }
1550
1551 /**
1552 * Utility method to return indicator class names from a list of indicator objects
1553 *
1554 * @param \core_analytics\local\indicator\base[] $indicators
1555 * @return string[]
1556 */
1557 private static function indicator_classes($indicators) {
1558
1559 // What we want to check and store are the indicator classes not the keys.
1560 $indicatorclasses = array();
1561 foreach ($indicators as $indicator) {
1562 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1563 if (!is_object($indicator) && !is_scalar($indicator)) {
1564 $indicator = strval($indicator);
1565 } else if (is_object($indicator)) {
3a396286 1566 $indicator = '\\' . get_class($indicator);
369389c9
DM
1567 }
1568 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1569 }
b0c24929 1570 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1571 }
1572
1573 return $indicatorclasses;
1574 }
1575
1576 /**
1577 * Clears the model training and prediction data.
1578 *
1579 * Executed after updating model critical elements like the time splitting method
1580 * or the indicators.
1581 *
1582 * @return void
1583 */
325b3bdd 1584 public function clear() {
0af2421a 1585 global $DB, $USER;
369389c9 1586
325b3bdd
DM
1587 \core_analytics\manager::check_can_manage_models();
1588
abafbc84 1589 // Delete current model version stored stuff.
d44ce97f
DM
1590 $predictor = $this->get_predictions_processor(false);
1591 if ($predictor->is_ready() !== true) {
1592 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
1593 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
1594 $this->model->id . ' could not be cleared.');
1595 } else {
1596 $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1597 }
abafbc84 1598
99b84a26
DM
1599 $predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid',
1600 array('modelid' => $this->get_id()));
1601 if ($predictionids) {
1602 list($sql, $params) = $DB->get_in_or_equal($predictionids);
1603 $DB->delete_records_select('analytics_prediction_actions', "predictionid $sql", $params);
1604 }
1605
369389c9 1606 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
00da1e60 1607 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
369389c9
DM
1608 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1609 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
dd13fc22 1610 $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
369389c9 1611
99b84a26
DM
1612 // Purge all generated files.
1613 \core_analytics\dataset_manager::clear_model_files($this->model->id);
1614
1611308b
DM
1615 // We don't expect people to clear models regularly and the cost of filling the cache is
1616 // 1 db read per context.
3e0f33aa 1617 $this->purge_insights_cache();
0af2421a
DM
1618
1619 $this->model->trained = 0;
1620 $this->model->timemodified = time();
1621 $this->model->usermodified = $USER->id;
1622 $DB->update_record('analytics_models', $this->model);
3e0f33aa
DM
1623 }
1624
1625 /**
1626 * Purges the insights cache.
1627 */
1628 private function purge_insights_cache() {
1611308b 1629 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1630 $cache->purge();
369389c9
DM
1631 }
1632
1611308b
DM
1633 /**
1634 * Increases system memory and time limits.
1635 *
1636 * @return void
1637 */
1638 private function heavy_duty_mode() {
369389c9
DM
1639 if (ini_get('memory_limit') != -1) {
1640 raise_memory_limit(MEMORY_HUGE);
1641 }
1611308b 1642 \core_php_time_limit::raise();
369389c9 1643 }
369389c9 1644}