MDL-60944 analytics: Add base support for import / export
[moodle.git] / analytics / classes / model.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
b94dbb55 18 * Prediction model representation.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
b94dbb55 30 * Prediction model representation.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36class model {
37
413f19bc
DM
38 /**
39 * All as expected.
40 */
369389c9 41 const OK = 0;
413f19bc
DM
42
43 /**
44 * There was a problem.
45 */
369389c9 46 const GENERAL_ERROR = 1;
413f19bc
DM
47
48 /**
49 * No dataset to analyse.
50 */
369389c9
DM
51 const NO_DATASET = 2;
52
413f19bc
DM
53 /**
54 * Model with low prediction accuracy.
55 */
325b3bdd 56 const LOW_SCORE = 4;
413f19bc
DM
57
58 /**
59 * Not enough data to evaluate the model properly.
60 */
325b3bdd 61 const NOT_ENOUGH_DATA = 8;
369389c9 62
413f19bc
DM
63 /**
64 * Invalid analysable for the time splitting method.
65 */
66 const ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD = 4;
67
68 /**
69 * Invalid analysable for all time splitting methods.
70 */
369389c9 71 const ANALYSABLE_STATUS_INVALID_FOR_RANGEPROCESSORS = 8;
413f19bc
DM
72
73 /**
74 * Invalid analysable for the target
75 */
369389c9
DM
76 const ANALYSABLE_STATUS_INVALID_FOR_TARGET = 16;
77
413f19bc
DM
78 /**
79 * Minimum score to consider a non-static prediction model as good.
80 */
369389c9 81 const MIN_SCORE = 0.7;
413f19bc 82
5c5cb3ee
DM
83 /**
84 * Minimum prediction confidence (from 0 to 1) to accept a prediction as reliable enough.
85 */
86 const PREDICTION_MIN_SCORE = 0.6;
87
413f19bc
DM
88 /**
89 * Maximum standard deviation between different evaluation repetitions to consider that evaluation results are stable.
90 */
369389c9 91 const ACCEPTED_DEVIATION = 0.05;
413f19bc
DM
92
93 /**
94 * Number of evaluation repetitions.
95 */
369389c9
DM
96 const EVALUATION_ITERATIONS = 10;
97
98 /**
99 * @var \stdClass
100 */
101 protected $model = null;
102
103 /**
104 * @var \core_analytics\local\analyser\base
105 */
106 protected $analyser = null;
107
108 /**
109 * @var \core_analytics\local\target\base
110 */
111 protected $target = null;
112
ed12ba6b
DM
113 /**
114 * @var \core_analytics\predictor
115 */
116 protected $predictionsprocessor = null;
117
369389c9
DM
118 /**
119 * @var \core_analytics\local\indicator\base[]
120 */
121 protected $indicators = null;
122
123 /**
124 * Unique Model id created from site info and last model modification.
125 *
126 * @var string
127 */
128 protected $uniqueid = null;
129
130 /**
1cc2b4ba 131 * Constructor.
369389c9 132 *
1cc2b4ba 133 * @param int|\stdClass $model
369389c9
DM
134 * @return void
135 */
136 public function __construct($model) {
137 global $DB;
138
139 if (is_scalar($model)) {
1611308b 140 $model = $DB->get_record('analytics_models', array('id' => $model), '*', MUST_EXIST);
f9e7447f
DM
141 if (!$model) {
142 throw new \moodle_exception('errorunexistingmodel', 'analytics', '', $model);
143 }
369389c9
DM
144 }
145 $this->model = $model;
146 }
147
3a396286
DM
148 /**
149 * Quick safety check to discard site models which required components are not available anymore.
150 *
151 * @return bool
152 */
153 public function is_available() {
154 $target = $this->get_target();
155 if (!$target) {
156 return false;
157 }
3a396286
DM
158
159 $classname = $target->get_analyser_class();
160 if (!class_exists($classname)) {
161 return false;
162 }
163
164 return true;
165 }
166
369389c9 167 /**
1cc2b4ba 168 * Returns the model id.
369389c9
DM
169 *
170 * @return int
171 */
172 public function get_id() {
173 return $this->model->id;
174 }
175
176 /**
1cc2b4ba 177 * Returns a plain \stdClass with the model data.
369389c9
DM
178 *
179 * @return \stdClass
180 */
181 public function get_model_obj() {
182 return $this->model;
183 }
184
185 /**
1cc2b4ba 186 * Returns the model target.
369389c9
DM
187 *
188 * @return \core_analytics\local\target\base
189 */
190 public function get_target() {
191 if ($this->target !== null) {
192 return $this->target;
193 }
194 $instance = \core_analytics\manager::get_target($this->model->target);
195 $this->target = $instance;
196
197 return $this->target;
198 }
199
200 /**
1cc2b4ba 201 * Returns the model indicators.
369389c9
DM
202 *
203 * @return \core_analytics\local\indicator\base[]
204 */
205 public function get_indicators() {
206 if ($this->indicators !== null) {
207 return $this->indicators;
208 }
209
210 $fullclassnames = json_decode($this->model->indicators);
211
212 if (!is_array($fullclassnames)) {
213 throw new \coding_exception('Model ' . $this->model->id . ' indicators can not be read');
214 }
215
216 $this->indicators = array();
217 foreach ($fullclassnames as $fullclassname) {
218 $instance = \core_analytics\manager::get_indicator($fullclassname);
219 if ($instance) {
220 $this->indicators[$fullclassname] = $instance;
221 } else {
222 debugging('Can\'t load ' . $fullclassname . ' indicator', DEBUG_DEVELOPER);
223 }
224 }
225
226 return $this->indicators;
227 }
228
229 /**
230 * Returns the list of indicators that could potentially be used by the model target.
231 *
232 * It includes the indicators that are part of the model.
233 *
a40952d3 234 * @return \core_analytics\local\indicator\base[]
369389c9
DM
235 */
236 public function get_potential_indicators() {
237
238 $indicators = \core_analytics\manager::get_all_indicators();
239
240 if (empty($this->analyser)) {
241 $this->init_analyser(array('evaluation' => true));
242 }
243
244 foreach ($indicators as $classname => $indicator) {
245 if ($this->analyser->check_indicator_requirements($indicator) !== true) {
246 unset($indicators[$classname]);
247 }
248 }
249 return $indicators;
250 }
251
252 /**
1cc2b4ba 253 * Returns the model analyser (defined by the model target).
369389c9 254 *
a8ccc5f2 255 * @param array $options Default initialisation with no options.
369389c9
DM
256 * @return \core_analytics\local\analyser\base
257 */
a8ccc5f2 258 public function get_analyser($options = array()) {
369389c9
DM
259 if ($this->analyser !== null) {
260 return $this->analyser;
261 }
262
a8ccc5f2 263 $this->init_analyser($options);
369389c9
DM
264
265 return $this->analyser;
266 }
267
268 /**
1cc2b4ba 269 * Initialises the model analyser.
369389c9 270 *
1cc2b4ba 271 * @throws \coding_exception
369389c9
DM
272 * @param array $options
273 * @return void
274 */
275 protected function init_analyser($options = array()) {
276
277 $target = $this->get_target();
278 $indicators = $this->get_indicators();
279
280 if (empty($target)) {
281 throw new \moodle_exception('errornotarget', 'analytics');
282 }
283
a8ccc5f2
DM
284 $timesplittings = array();
285 if (empty($options['notimesplitting'])) {
286 if (!empty($options['evaluation'])) {
287 // The evaluation process will run using all available time splitting methods unless one is specified.
288 if (!empty($options['timesplitting'])) {
289 $timesplitting = \core_analytics\manager::get_time_splitting($options['timesplitting']);
290 $timesplittings = array($timesplitting->get_id() => $timesplitting);
291 } else {
292 $timesplittings = \core_analytics\manager::get_enabled_time_splitting_methods();
293 }
369389c9 294 } else {
369389c9 295
a8ccc5f2
DM
296 if (empty($this->model->timesplitting)) {
297 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
298 }
369389c9 299
a8ccc5f2
DM
300 // Returned as an array as all actions (evaluation, training and prediction) go through the same process.
301 $timesplittings = array($this->model->timesplitting => $this->get_time_splitting());
302 }
369389c9 303
a8ccc5f2
DM
304 if (empty($timesplittings)) {
305 throw new \moodle_exception('errornotimesplittings', 'analytics');
306 }
369389c9
DM
307 }
308
0690a271
DM
309 if (!empty($options['evaluation'])) {
310 foreach ($timesplittings as $timesplitting) {
311 $timesplitting->set_evaluating(true);
312 }
313 }
314
369389c9
DM
315 $classname = $target->get_analyser_class();
316 if (!class_exists($classname)) {
08015e18 317 throw new \coding_exception($classname . ' class does not exists');
369389c9
DM
318 }
319
320 // Returns a \core_analytics\local\analyser\base class.
321 $this->analyser = new $classname($this->model->id, $target, $indicators, $timesplittings, $options);
322 }
323
324 /**
1cc2b4ba 325 * Returns the model time splitting method.
369389c9 326 *
1cc2b4ba 327 * @return \core_analytics\local\time_splitting\base|false Returns false if no time splitting.
369389c9
DM
328 */
329 public function get_time_splitting() {
330 if (empty($this->model->timesplitting)) {
331 return false;
332 }
333 return \core_analytics\manager::get_time_splitting($this->model->timesplitting);
334 }
335
336 /**
a40952d3 337 * Creates a new model. Enables it if $timesplittingid is specified.
369389c9
DM
338 *
339 * @param \core_analytics\local\target\base $target
340 * @param \core_analytics\local\indicator\base[] $indicators
a40952d3 341 * @param string $timesplittingid The time splitting method id (its fully qualified class name)
369389c9
DM
342 * @return \core_analytics\model
343 */
ed12ba6b
DM
344 public static function create(\core_analytics\local\target\base $target, array $indicators,
345 $timesplittingid = false, $processor = false) {
369389c9
DM
346 global $USER, $DB;
347
1611308b
DM
348 \core_analytics\manager::check_can_manage_models();
349
369389c9
DM
350 $indicatorclasses = self::indicator_classes($indicators);
351
352 $now = time();
353
354 $modelobj = new \stdClass();
b0c24929 355 $modelobj->target = $target->get_id();
369389c9
DM
356 $modelobj->indicators = json_encode($indicatorclasses);
357 $modelobj->version = $now;
358 $modelobj->timecreated = $now;
359 $modelobj->timemodified = $now;
360 $modelobj->usermodified = $USER->id;
361
ed12ba6b
DM
362 if ($processor &&
363 !self::is_valid($processor, '\core_analytics\classifier') &&
364 !self::is_valid($processor, '\core_analytics\regressor')) {
365 throw new \coding_exception('The provided predictions processor \\' . $processor . '\processor is not valid');
366 } else {
367 $modelobj->predictionsprocessor = $processor;
368 }
369
369389c9
DM
370 $id = $DB->insert_record('analytics_models', $modelobj);
371
372 // Get db defaults.
373 $modelobj = $DB->get_record('analytics_models', array('id' => $id), '*', MUST_EXIST);
374
a40952d3
DM
375 $model = new static($modelobj);
376
377 if ($timesplittingid) {
378 $model->enable($timesplittingid);
379 }
380
381 if ($model->is_static()) {
382 $model->mark_as_trained();
383 }
384
385 return $model;
369389c9
DM
386 }
387
349c4412
AA
388 /**
389 * Creates a new model from json configuration.
390 *
391 * @param string $json json data.
392 * @return \core_analytics\model
393 */
394 public static function create_from_json($jsondata) {
395
396 \core_analytics\manager::check_can_manage_models();
397 if (empty($jsondata) || !isset($jsondata->target) || !isset($jsondata->indicators) || !isset($jsondata->timesplitting)) {
398 throw new \coding_exception("invalid json data");
399 }
400
401 // Target.
402 $target = $jsondata->target;
403 if (!class_exists($target)) {
404 throw new \moodle_exception('classdoesnotexist', 'tool_analytics', $target);
405 }
406 $target = \core_analytics\manager::get_target($target);
407
408 // Indicators.
409 $indicators = [];
410 foreach($jsondata->indicators as $indicator) {
411 if (!class_exists($indicator)) {
412 throw new \moodle_exception('classdoesnotexist', 'tool_analytics', $indicator);
413 }
414 $indicators[] = \core_analytics\manager::get_indicator($indicator);
415 }
416
417 // Timesplitting.
418 $timesplitting = $jsondata->timesplitting;
419 if (!class_exists($timesplitting)) {
420 throw new \moodle_exception('classdoesnotexist', 'tool_analytics', $timesplitting);
421 }
422
423 return self::create($target, $indicators, $timesplitting);
424 }
425
e709e544
DM
426 /**
427 * Does this model exist?
428 *
429 * If no indicators are provided it considers any model with the provided
430 * target a match.
431 *
432 * @param \core_analytics\local\target\base $target
433 * @param \core_analytics\local\indicator\base[]|false $indicators
434 * @return bool
435 */
436 public static function exists(\core_analytics\local\target\base $target, $indicators = false) {
437 global $DB;
438
439 $existingmodels = $DB->get_records('analytics_models', array('target' => $target->get_id()));
440
441 if (!$indicators && $existingmodels) {
442 return true;
443 }
444
445 $indicatorids = array_keys($indicators);
446 sort($indicatorids);
447
448 foreach ($existingmodels as $modelobj) {
449 $model = new \core_analytics\model($modelobj);
450 $modelindicatorids = array_keys($model->get_indicators());
451 sort($modelindicatorids);
452
453 if ($indicatorids === $modelindicatorids) {
454 return true;
455 }
456 }
457 return false;
458 }
459
a40952d3 460 /**
1cc2b4ba 461 * Updates the model.
a40952d3
DM
462 *
463 * @param int|bool $enabled
5c140ac4
DM
464 * @param \core_analytics\local\indicator\base[]|false $indicators False to respect current indicators
465 * @param string|false $timesplittingid False to respect current time splitting method
ed12ba6b 466 * @param string|false $predictionsprocessor False to respect current predictors processor value
a40952d3
DM
467 * @return void
468 */
ed12ba6b 469 public function update($enabled, $indicators = false, $timesplittingid = '', $predictionsprocessor = false) {
369389c9
DM
470 global $USER, $DB;
471
1611308b
DM
472 \core_analytics\manager::check_can_manage_models();
473
369389c9
DM
474 $now = time();
475
5c140ac4
DM
476 if ($indicators !== false) {
477 $indicatorclasses = self::indicator_classes($indicators);
478 $indicatorsstr = json_encode($indicatorclasses);
479 } else {
480 // Respect current value.
481 $indicatorsstr = $this->model->indicators;
482 }
483
484 if ($timesplittingid === false) {
485 // Respect current value.
486 $timesplittingid = $this->model->timesplitting;
487 }
369389c9 488
ed12ba6b
DM
489 if ($predictionsprocessor === false) {
490 // Respect current value.
491 $predictionsprocessor = $this->model->predictionsprocessor;
492 }
493
a40952d3 494 if ($this->model->timesplitting !== $timesplittingid ||
ed12ba6b
DM
495 $this->model->indicators !== $indicatorsstr ||
496 $this->model->predictionsprocessor !== $predictionsprocessor) {
369389c9 497
abafbc84 498 // Delete generated predictions before changing the model version.
325b3bdd 499 $this->clear();
369389c9 500
abafbc84
DM
501 // It needs to be reset as the version changes.
502 $this->uniqueid = null;
503
504 // We update the version of the model so different time splittings are not mixed up.
505 $this->model->version = $now;
506
369389c9 507 // Reset trained flag.
abafbc84
DM
508 if (!$this->is_static()) {
509 $this->model->trained = 0;
510 }
3e0f33aa
DM
511
512 } else if ($this->model->enabled != $enabled) {
513 // We purge the cached contexts with insights as some will not be visible anymore.
514 $this->purge_insights_cache();
369389c9 515 }
3e0f33aa 516
a40952d3 517 $this->model->enabled = intval($enabled);
369389c9 518 $this->model->indicators = $indicatorsstr;
a40952d3 519 $this->model->timesplitting = $timesplittingid;
ed12ba6b 520 $this->model->predictionsprocessor = $predictionsprocessor;
369389c9
DM
521 $this->model->timemodified = $now;
522 $this->model->usermodified = $USER->id;
523
524 $DB->update_record('analytics_models', $this->model);
369389c9
DM
525 }
526
d16cf374
DM
527 /**
528 * Removes the model.
529 *
530 * @return void
531 */
d8327b60 532 public function delete() {
d16cf374 533 global $DB;
1611308b
DM
534
535 \core_analytics\manager::check_can_manage_models();
536
325b3bdd 537 $this->clear();
abafbc84 538
325b3bdd 539 // Method self::clear is already clearing the current model version.
d44ce97f
DM
540 $predictor = $this->get_predictions_processor(false);
541 if ($predictor->is_ready() !== true) {
542 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
543 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
544 $this->model->id . ' could not be deleted.');
545 } else {
546 $predictor->delete_output_dir($this->get_output_dir(array(), true));
547 }
abafbc84 548
d8327b60 549 $DB->delete_records('analytics_models', array('id' => $this->model->id));
99b84a26 550 $DB->delete_records('analytics_models_log', array('modelid' => $this->model->id));
d16cf374
DM
551 }
552
369389c9 553 /**
1cc2b4ba 554 * Evaluates the model.
369389c9 555 *
1cc2b4ba
DM
556 * This method gets the site contents (through the analyser) creates a .csv dataset
557 * with them and evaluates the model prediction accuracy multiple times using the
558 * machine learning backend. It returns an object where the model score is the average
559 * prediction accuracy of all executed evaluations.
369389c9
DM
560 *
561 * @param array $options
562 * @return \stdClass[]
563 */
564 public function evaluate($options = array()) {
565
1611308b
DM
566 \core_analytics\manager::check_can_manage_models();
567
a40952d3
DM
568 if ($this->is_static()) {
569 $this->get_analyser()->add_log(get_string('noevaluationbasedassumptions', 'analytics'));
570 $result = new \stdClass();
cbf4c391
DM
571 $result->status = self::NO_DATASET;
572 return array($this->get_time_splitting()->get_id() => $result);
a40952d3
DM
573 }
574
369389c9
DM
575 $options['evaluation'] = true;
576 $this->init_analyser($options);
577
578 if (empty($this->get_indicators())) {
579 throw new \moodle_exception('errornoindicators', 'analytics');
580 }
581
1611308b
DM
582 $this->heavy_duty_mode();
583
369389c9 584 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 585 $predictor = $this->get_predictions_processor();
369389c9
DM
586
587 $datasets = $this->get_analyser()->get_labelled_data();
588
589 // No datasets generated.
590 if (empty($datasets)) {
591 $result = new \stdClass();
592 $result->status = self::NO_DATASET;
593 $result->info = $this->get_analyser()->get_logs();
594 return array($result);
595 }
596
597 if (!PHPUNIT_TEST && CLI_SCRIPT) {
598 echo PHP_EOL . get_string('processingsitecontents', 'analytics') . PHP_EOL;
599 }
600
601 $results = array();
602 foreach ($datasets as $timesplittingid => $dataset) {
603
604 $timesplitting = \core_analytics\manager::get_time_splitting($timesplittingid);
605
606 $result = new \stdClass();
607
608 $dashestimesplittingid = str_replace('\\', '', $timesplittingid);
609 $outputdir = $this->get_output_dir(array('evaluation', $dashestimesplittingid));
610
611 // Evaluate the dataset, the deviation we accept in the results depends on the amount of iterations.
5c5cb3ee
DM
612 if ($this->get_target()->is_linear()) {
613 $predictorresult = $predictor->evaluate_regression($this->get_unique_id(), self::ACCEPTED_DEVIATION,
614 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
615 } else {
616 $predictorresult = $predictor->evaluate_classification($this->get_unique_id(), self::ACCEPTED_DEVIATION,
369389c9 617 self::EVALUATION_ITERATIONS, $dataset, $outputdir);
5c5cb3ee 618 }
369389c9
DM
619
620 $result->status = $predictorresult->status;
621 $result->info = $predictorresult->info;
622
623 if (isset($predictorresult->score)) {
624 $result->score = $predictorresult->score;
625 } else {
626 // Prediction processors may return an error, default to 0 score in that case.
627 $result->score = 0;
628 }
629
630 $dir = false;
631 if (!empty($predictorresult->dir)) {
632 $dir = $predictorresult->dir;
633 }
634
635 $result->logid = $this->log_result($timesplitting->get_id(), $result->score, $dir, $result->info);
636
637 $results[$timesplitting->get_id()] = $result;
638 }
639
640 return $results;
641 }
642
643 /**
1cc2b4ba
DM
644 * Trains the model using the site contents.
645 *
646 * This method prepares a dataset from the site contents (through the analyser)
647 * and passes it to the machine learning backends. Static models are skipped as
648 * they do not require training.
369389c9
DM
649 *
650 * @return \stdClass
651 */
652 public function train() {
369389c9 653
1611308b
DM
654 \core_analytics\manager::check_can_manage_models();
655
a40952d3
DM
656 if ($this->is_static()) {
657 $this->get_analyser()->add_log(get_string('notrainingbasedassumptions', 'analytics'));
658 $result = new \stdClass();
659 $result->status = self::OK;
660 return $result;
661 }
662
a40952d3 663 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
664 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
665 }
666
667 if (empty($this->get_indicators())) {
668 throw new \moodle_exception('errornoindicators', 'analytics');
669 }
670
1611308b
DM
671 $this->heavy_duty_mode();
672
369389c9
DM
673 // Before get_labelled_data call so we get an early exception if it is not writable.
674 $outputdir = $this->get_output_dir(array('execution'));
675
676 // Before get_labelled_data call so we get an early exception if it is not ready.
ed12ba6b 677 $predictor = $this->get_predictions_processor();
369389c9
DM
678
679 $datasets = $this->get_analyser()->get_labelled_data();
680
681 // No training if no files have been provided.
682 if (empty($datasets) || empty($datasets[$this->model->timesplitting])) {
683
684 $result = new \stdClass();
685 $result->status = self::NO_DATASET;
686 $result->info = $this->get_analyser()->get_logs();
687 return $result;
688 }
689 $samplesfile = $datasets[$this->model->timesplitting];
690
691 // Train using the dataset.
5c5cb3ee
DM
692 if ($this->get_target()->is_linear()) {
693 $predictorresult = $predictor->train_regression($this->get_unique_id(), $samplesfile, $outputdir);
694 } else {
695 $predictorresult = $predictor->train_classification($this->get_unique_id(), $samplesfile, $outputdir);
696 }
369389c9
DM
697
698 $result = new \stdClass();
699 $result->status = $predictorresult->status;
700 $result->info = $predictorresult->info;
701
325b3bdd
DM
702 if ($result->status !== self::OK) {
703 return $result;
704 }
705
369389c9
DM
706 $this->flag_file_as_used($samplesfile, 'trained');
707
708 // Mark the model as trained if it wasn't.
709 if ($this->model->trained == false) {
710 $this->mark_as_trained();
711 }
712
713 return $result;
714 }
715
716 /**
1cc2b4ba
DM
717 * Get predictions from the site contents.
718 *
719 * It analyses the site contents (through analyser classes) looking for samples
720 * ready to receive predictions. It generates a dataset with all samples ready to
721 * get predictions and it passes it to the machine learning backends or to the
722 * targets based on assumptions to get the predictions.
369389c9
DM
723 *
724 * @return \stdClass
725 */
726 public function predict() {
727 global $DB;
728
1611308b 729 \core_analytics\manager::check_can_manage_models();
369389c9 730
a40952d3 731 if (!$this->is_enabled() || empty($this->model->timesplitting)) {
369389c9
DM
732 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
733 }
734
735 if (empty($this->get_indicators())) {
736 throw new \moodle_exception('errornoindicators', 'analytics');
737 }
738
1611308b
DM
739 $this->heavy_duty_mode();
740
369389c9
DM
741 // Before get_unlabelled_data call so we get an early exception if it is not writable.
742 $outputdir = $this->get_output_dir(array('execution'));
743
744 // Before get_unlabelled_data call so we get an early exception if it is not ready.
a40952d3 745 if (!$this->is_static()) {
ed12ba6b 746 $predictor = $this->get_predictions_processor();
a40952d3 747 }
369389c9
DM
748
749 $samplesdata = $this->get_analyser()->get_unlabelled_data();
750
751 // Get the prediction samples file.
752 if (empty($samplesdata) || empty($samplesdata[$this->model->timesplitting])) {
753
754 $result = new \stdClass();
755 $result->status = self::NO_DATASET;
756 $result->info = $this->get_analyser()->get_logs();
757 return $result;
758 }
759 $samplesfile = $samplesdata[$this->model->timesplitting];
760
761 // We need to throw an exception if we are trying to predict stuff that was already predicted.
2dca1339 762 $params = array('modelid' => $this->model->id, 'action' => 'predicted', 'fileid' => $samplesfile->get_id());
369389c9
DM
763 if ($predicted = $DB->get_record('analytics_used_files', $params)) {
764 throw new \moodle_exception('erroralreadypredict', 'analytics', '', $samplesfile->get_id());
765 }
766
a40952d3 767 $indicatorcalculations = \core_analytics\dataset_manager::get_structured_data($samplesfile);
369389c9 768
a40952d3 769 // Prepare the results object.
369389c9 770 $result = new \stdClass();
369389c9 771
a40952d3
DM
772 if ($this->is_static()) {
773 // Prediction based on assumptions.
413f19bc 774 $result->status = self::OK;
a40952d3
DM
775 $result->info = [];
776 $result->predictions = $this->get_static_predictions($indicatorcalculations);
777
778 } else {
5c5cb3ee
DM
779 // Estimation and classification processes run on the machine learning backend side.
780 if ($this->get_target()->is_linear()) {
781 $predictorresult = $predictor->estimate($this->get_unique_id(), $samplesfile, $outputdir);
782 } else {
783 $predictorresult = $predictor->classify($this->get_unique_id(), $samplesfile, $outputdir);
784 }
a40952d3
DM
785 $result->status = $predictorresult->status;
786 $result->info = $predictorresult->info;
1611308b
DM
787 $result->predictions = $this->format_predictor_predictions($predictorresult);
788 }
789
325b3bdd
DM
790 if ($result->status !== self::OK) {
791 return $result;
792 }
793
1611308b
DM
794 if ($result->predictions) {
795 $samplecontexts = $this->execute_prediction_callbacks($result->predictions, $indicatorcalculations);
796 }
797
798 if (!empty($samplecontexts) && $this->uses_insights()) {
799 $this->trigger_insights($samplecontexts);
800 }
801
802 $this->flag_file_as_used($samplesfile, 'predicted');
803
804 return $result;
805 }
806
ed12ba6b
DM
807 /**
808 * Returns the model predictions processor.
809 *
d44ce97f 810 * @param bool $checkisready
ed12ba6b
DM
811 * @return \core_analytics\predictor
812 */
d44ce97f
DM
813 public function get_predictions_processor($checkisready = true) {
814 return manager::get_predictions_processor($this->model->predictionsprocessor, $checkisready);
ed12ba6b
DM
815 }
816
1611308b
DM
817 /**
818 * Formats the predictor results.
819 *
820 * @param array $predictorresult
821 * @return array
822 */
823 private function format_predictor_predictions($predictorresult) {
824
825 $predictions = array();
0af2421a 826 if (!empty($predictorresult->predictions)) {
1611308b
DM
827 foreach ($predictorresult->predictions as $sampleinfo) {
828
413f19bc 829 // We parse each prediction.
1611308b
DM
830 switch (count($sampleinfo)) {
831 case 1:
832 // For whatever reason the predictions processor could not process this sample, we
833 // skip it and do nothing with it.
834 debugging($this->model->id . ' model predictions processor could not process the sample with id ' .
835 $sampleinfo[0], DEBUG_DEVELOPER);
bd5fdcfc 836 continue 2;
1611308b
DM
837 case 2:
838 // Prediction processors that do not return a prediction score will have the maximum prediction
839 // score.
840 list($uniquesampleid, $prediction) = $sampleinfo;
841 $predictionscore = 1;
842 break;
843 case 3:
844 list($uniquesampleid, $prediction, $predictionscore) = $sampleinfo;
845 break;
846 default:
847 break;
a40952d3 848 }
1611308b
DM
849 $predictiondata = (object)['prediction' => $prediction, 'predictionscore' => $predictionscore];
850 $predictions[$uniquesampleid] = $predictiondata;
a40952d3
DM
851 }
852 }
1611308b
DM
853 return $predictions;
854 }
855
856 /**
857 * Execute the prediction callbacks defined by the target.
858 *
859 * @param \stdClass[] $predictions
413f19bc 860 * @param array $indicatorcalculations
1611308b
DM
861 * @return array
862 */
863 protected function execute_prediction_callbacks($predictions, $indicatorcalculations) {
369389c9
DM
864
865 // Here we will store all predictions' contexts, this will be used to limit which users will see those predictions.
866 $samplecontexts = array();
325b3bdd 867 $records = array();
369389c9 868
1611308b 869 foreach ($predictions as $uniquesampleid => $prediction) {
369389c9 870
325b3bdd
DM
871 // The unique sample id contains both the sampleid and the rangeindex.
872 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
369389c9 873
325b3bdd 874 if ($this->get_target()->triggers_callback($prediction->prediction, $prediction->predictionscore)) {
369389c9 875
325b3bdd 876 // Prepare the record to store the predicted values.
cab7abec 877 list($record, $samplecontext) = $this->prepare_prediction_record($sampleid, $rangeindex, $prediction->prediction,
413f19bc 878 $prediction->predictionscore, json_encode($indicatorcalculations[$uniquesampleid]));
369389c9 879
cab7abec
DM
880 // We will later bulk-insert them all.
881 $records[$uniquesampleid] = $record;
882
1611308b
DM
883 // Also store all samples context to later generate insights or whatever action the target wants to perform.
884 $samplecontexts[$samplecontext->id] = $samplecontext;
369389c9 885
1611308b
DM
886 $this->get_target()->prediction_callback($this->model->id, $sampleid, $rangeindex, $samplecontext,
887 $prediction->prediction, $prediction->predictionscore);
369389c9
DM
888 }
889 }
890
0927604f
DM
891 if (!empty($records)) {
892 $this->save_predictions($records);
893 }
cab7abec 894
1611308b
DM
895 return $samplecontexts;
896 }
369389c9 897
1611308b
DM
898 /**
899 * Generates insights and updates the cache.
900 *
901 * @param \context[] $samplecontexts
902 * @return void
903 */
904 protected function trigger_insights($samplecontexts) {
905
906 // Notify the target that all predictions have been processed.
907 $this->get_target()->generate_insight_notifications($this->model->id, $samplecontexts);
908
909 // Update cache.
910 $cache = \cache::make('core', 'contextwithinsights');
911 foreach ($samplecontexts as $context) {
912 $modelids = $cache->get($context->id);
913 if (!$modelids) {
914 // The cache is empty, but we don't know if it is empty because there are no insights
915 // in this context or because cache/s have been purged, we need to be conservative and
916 // "pay" 1 db read to fill up the cache.
917 $models = \core_analytics\manager::get_models_with_insights($context);
918 $cache->set($context->id, array_keys($models));
919 } else if (!in_array($this->get_id(), $modelids)) {
920 array_push($modelids, $this->get_id());
921 $cache->set($context->id, $modelids);
369389c9
DM
922 }
923 }
369389c9
DM
924 }
925
a40952d3 926 /**
1611308b 927 * Get predictions from a static model.
a40952d3
DM
928 *
929 * @param array $indicatorcalculations
930 * @return \stdClass[]
931 */
932 protected function get_static_predictions(&$indicatorcalculations) {
933
934 // Group samples by analysable for \core_analytics\local\target::calculate.
935 $analysables = array();
936 // List all sampleids together.
937 $sampleids = array();
938
939 foreach ($indicatorcalculations as $uniquesampleid => $indicators) {
940 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
941
942 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
943 $analysableclass = get_class($analysable);
944 if (empty($analysables[$analysableclass])) {
945 $analysables[$analysableclass] = array();
946 }
947 if (empty($analysables[$analysableclass][$rangeindex])) {
948 $analysables[$analysableclass][$rangeindex] = (object)[
949 'analysable' => $analysable,
950 'indicatorsdata' => array(),
951 'sampleids' => array()
952 ];
953 }
954 // Using the sampleid as a key so we can easily merge indicators data later.
955 $analysables[$analysableclass][$rangeindex]->indicatorsdata[$sampleid] = $indicators;
956 // We could use indicatorsdata keys but the amount of redundant data is not that big and leaves code below cleaner.
957 $analysables[$analysableclass][$rangeindex]->sampleids[$sampleid] = $sampleid;
958
959 // Accumulate sample ids to get all their associated data in 1 single db query (analyser::get_samples).
960 $sampleids[$sampleid] = $sampleid;
961 }
962
963 // Get all samples data.
964 list($sampleids, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
965
966 // Calculate the targets.
1cc2b4ba 967 $predictions = array();
a40952d3
DM
968 foreach ($analysables as $analysableclass => $rangedata) {
969 foreach ($rangedata as $rangeindex => $data) {
970
971 // Attach samples data and calculated indicators data.
972 $this->get_target()->clear_sample_data();
973 $this->get_target()->add_sample_data($samplesdata);
974 $this->get_target()->add_sample_data($data->indicatorsdata);
975
1611308b 976 // Append new elements (we can not get duplicates because sample-analysable relation is N-1).
a40952d3 977 $range = $this->get_time_splitting()->get_range_by_index($rangeindex);
1611308b 978 $this->get_target()->filter_out_invalid_samples($data->sampleids, $data->analysable, false);
a40952d3
DM
979 $calculations = $this->get_target()->calculate($data->sampleids, $data->analysable, $range['start'], $range['end']);
980
981 // Missing $indicatorcalculations values in $calculations are caused by is_valid_sample. We need to remove
982 // these $uniquesampleid from $indicatorcalculations because otherwise they will be stored as calculated
983 // by self::save_prediction.
984 $indicatorcalculations = array_filter($indicatorcalculations, function($indicators, $uniquesampleid) use ($calculations) {
985 list($sampleid, $rangeindex) = $this->get_time_splitting()->infer_sample_info($uniquesampleid);
986 if (!isset($calculations[$sampleid])) {
a40952d3
DM
987 return false;
988 }
989 return true;
990 }, ARRAY_FILTER_USE_BOTH);
991
992 foreach ($calculations as $sampleid => $value) {
993
994 $uniquesampleid = $this->get_time_splitting()->append_rangeindex($sampleid, $rangeindex);
995
996 // Null means that the target couldn't calculate the sample, we also remove them from $indicatorcalculations.
997 if (is_null($calculations[$sampleid])) {
a40952d3
DM
998 unset($indicatorcalculations[$uniquesampleid]);
999 continue;
1000 }
1001
1002 // Even if static predictions are based on assumptions we flag them as 100% because they are 100%
1003 // true according to what the developer defined.
1004 $predictions[$uniquesampleid] = (object)['prediction' => $value, 'predictionscore' => 1];
1005 }
1006 }
1007 }
1008 return $predictions;
1009 }
1010
369389c9 1011 /**
1cc2b4ba 1012 * Stores the prediction in the database.
369389c9
DM
1013 *
1014 * @param int $sampleid
1015 * @param int $rangeindex
1016 * @param int $prediction
1017 * @param float $predictionscore
1018 * @param string $calculations
1019 * @return \context
1020 */
cab7abec 1021 protected function prepare_prediction_record($sampleid, $rangeindex, $prediction, $predictionscore, $calculations) {
369389c9
DM
1022 $context = $this->get_analyser()->sample_access_context($sampleid);
1023
1024 $record = new \stdClass();
1025 $record->modelid = $this->model->id;
1026 $record->contextid = $context->id;
1027 $record->sampleid = $sampleid;
1028 $record->rangeindex = $rangeindex;
1029 $record->prediction = $prediction;
1030 $record->predictionscore = $predictionscore;
1031 $record->calculations = $calculations;
1032 $record->timecreated = time();
369389c9 1033
f87174dc
MN
1034 $analysable = $this->get_analyser()->get_sample_analysable($sampleid);
1035 $timesplitting = $this->get_time_splitting();
1036 $timesplitting->set_analysable($analysable);
1037 $range = $timesplitting->get_range_by_index($rangeindex);
1038 if ($range) {
1039 $record->timestart = $range['start'];
1040 $record->timeend = $range['end'];
1041 }
1042
cab7abec
DM
1043 return array($record, $context);
1044 }
1045
1046 /**
1047 * Save the prediction objects.
1048 *
1049 * @param \stdClass[] $records
1050 */
1051 protected function save_predictions($records) {
1052 global $DB;
1053 $DB->insert_records('analytics_predictions', $records);
369389c9
DM
1054 }
1055
1056 /**
1cc2b4ba 1057 * Enabled the model using the provided time splitting method.
369389c9 1058 *
5c140ac4 1059 * @param string|false $timesplittingid False to respect the current time splitting method.
369389c9
DM
1060 * @return void
1061 */
1062 public function enable($timesplittingid = false) {
0af2421a 1063 global $DB, $USER;
369389c9 1064
1611308b
DM
1065 \core_analytics\manager::check_can_manage_models();
1066
369389c9
DM
1067 $now = time();
1068
1069 if ($timesplittingid && $timesplittingid !== $this->model->timesplitting) {
1070
1071 if (!\core_analytics\manager::is_valid($timesplittingid, '\core_analytics\local\time_splitting\base')) {
1072 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1073 }
1074
1075 if (substr($timesplittingid, 0, 1) !== '\\') {
1076 throw new \moodle_exception('errorinvalidtimesplitting', 'analytics');
1077 }
1078
abafbc84 1079 // Delete generated predictions before changing the model version.
325b3bdd 1080 $this->clear();
abafbc84
DM
1081
1082 // It needs to be reset as the version changes.
1083 $this->uniqueid = null;
1084
369389c9
DM
1085 $this->model->timesplitting = $timesplittingid;
1086 $this->model->version = $now;
abafbc84
DM
1087
1088 // Reset trained flag.
1089 if (!$this->is_static()) {
1090 $this->model->trained = 0;
1091 }
fabe98ac
AA
1092 } else if (empty($this->model->timesplitting)) {
1093 // A valid timesplitting method needs to be supplied before a model can be enabled.
1094 throw new \moodle_exception('invalidtimesplitting', 'analytics', '', $this->model->id);
1095
369389c9 1096 }
3e0f33aa
DM
1097
1098 // Purge pages with insights as this may change things.
abafbc84 1099 if ($this->model->enabled != 1) {
3e0f33aa
DM
1100 $this->purge_insights_cache();
1101 }
1102
369389c9
DM
1103 $this->model->enabled = 1;
1104 $this->model->timemodified = $now;
0af2421a 1105 $this->model->usermodified = $USER->id;
369389c9
DM
1106
1107 // We don't always update timemodified intentionally as we reserve it for target, indicators or timesplitting updates.
1108 $DB->update_record('analytics_models', $this->model);
369389c9
DM
1109 }
1110
a40952d3 1111 /**
1cc2b4ba
DM
1112 * Is this a static model (as defined by the target)?.
1113 *
1114 * Static models are based on assumptions instead of in machine learning
1115 * backends results.
a40952d3
DM
1116 *
1117 * @return bool
1118 */
1119 public function is_static() {
1120 return (bool)$this->get_target()->based_on_assumptions();
1121 }
1122
369389c9 1123 /**
1cc2b4ba 1124 * Is this model enabled?
369389c9
DM
1125 *
1126 * @return bool
1127 */
1128 public function is_enabled() {
1129 return (bool)$this->model->enabled;
1130 }
1131
1132 /**
1cc2b4ba 1133 * Is this model already trained?
369389c9
DM
1134 *
1135 * @return bool
1136 */
1137 public function is_trained() {
a40952d3
DM
1138 // Models which targets are based on assumptions do not need training.
1139 return (bool)$this->model->trained || $this->is_static();
369389c9
DM
1140 }
1141
1142 /**
1cc2b4ba 1143 * Marks the model as trained
369389c9
DM
1144 *
1145 * @return void
1146 */
1147 public function mark_as_trained() {
1148 global $DB;
1149
1611308b
DM
1150 \core_analytics\manager::check_can_manage_models();
1151
369389c9
DM
1152 $this->model->trained = 1;
1153 $DB->update_record('analytics_models', $this->model);
1154 }
1155
1156 /**
1cc2b4ba 1157 * Get the contexts with predictions.
369389c9 1158 *
2e151c3c 1159 * @param bool $skiphidden Skip hidden predictions
369389c9
DM
1160 * @return \stdClass[]
1161 */
2e151c3c
DM
1162 public function get_predictions_contexts($skiphidden = true) {
1163 global $DB, $USER;
369389c9 1164
4a210b06
DM
1165 $sql = "SELECT DISTINCT ap.contextid FROM {analytics_predictions} ap
1166 JOIN {context} ctx ON ctx.id = ap.contextid
2e151c3c
DM
1167 WHERE ap.modelid = :modelid";
1168 $params = array('modelid' => $this->model->id);
1169
1170 if ($skiphidden) {
1171 $sql .= " AND NOT EXISTS (
1172 SELECT 1
1173 FROM {analytics_prediction_actions} apa
1174 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1175 )";
1176 $params['userid'] = $USER->id;
1177 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1178 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1179 }
1180
1181 return $DB->get_records_sql($sql, $params);
369389c9
DM
1182 }
1183
f9e7447f
DM
1184 /**
1185 * Has this model generated predictions?
1186 *
1187 * We don't check analytics_predictions table because targets have the ability to
1188 * ignore some predicted values, if that is the case predictions are not even stored
1189 * in db.
1190 *
1191 * @return bool
1192 */
1193 public function any_prediction_obtained() {
1194 global $DB;
00da1e60 1195 return $DB->record_exists('analytics_predict_samples',
f9e7447f
DM
1196 array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
1197 }
1198
1199 /**
1200 * Whether this model generates insights or not (defined by the model's target).
1201 *
1202 * @return bool
1203 */
1204 public function uses_insights() {
1205 $target = $this->get_target();
1206 return $target::uses_insights();
1207 }
1208
369389c9
DM
1209 /**
1210 * Whether predictions exist for this context.
1211 *
1212 * @param \context $context
1213 * @return bool
1214 */
1215 public function predictions_exist(\context $context) {
1216 global $DB;
1217
1218 // Filters out previous predictions keeping only the last time range one.
1219 $select = "modelid = :modelid AND contextid = :contextid";
6ec2ae0f 1220 $params = array('modelid' => $this->model->id, 'contextid' => $context->id);
369389c9
DM
1221 return $DB->record_exists_select('analytics_predictions', $select, $params);
1222 }
1223
1224 /**
1225 * Gets the predictions for this context.
1226 *
1227 * @param \context $context
2e151c3c 1228 * @param bool $skiphidden Skip hidden predictions
21d4ae93
DM
1229 * @param int $page The page of results to fetch. False for all results.
1230 * @param int $perpage The max number of results to fetch. Ignored if $page is false.
68bfe1de 1231 * @return array($total, \core_analytics\prediction[])
369389c9 1232 */
025363d1
DM
1233 public function get_predictions(\context $context, $skiphidden = true, $page = false, $perpage = 100) {
1234 global $DB, $USER;
369389c9 1235
1611308b
DM
1236 \core_analytics\manager::check_can_list_insights($context);
1237
369389c9 1238 // Filters out previous predictions keeping only the last time range one.
4a210b06
DM
1239 $sql = "SELECT ap.*
1240 FROM {analytics_predictions} ap
369389c9
DM
1241 JOIN (
1242 SELECT sampleid, max(rangeindex) AS rangeindex
1243 FROM {analytics_predictions}
025363d1 1244 WHERE modelid = :modelidsubap and contextid = :contextidsubap
369389c9 1245 GROUP BY sampleid
4a210b06
DM
1246 ) apsub
1247 ON ap.sampleid = apsub.sampleid AND ap.rangeindex = apsub.rangeindex
025363d1
DM
1248 WHERE ap.modelid = :modelid and ap.contextid = :contextid";
1249
1250 $params = array('modelid' => $this->model->id, 'contextid' => $context->id,
1251 'modelidsubap' => $this->model->id, 'contextidsubap' => $context->id);
1252
1253 if ($skiphidden) {
1254 $sql .= " AND NOT EXISTS (
1255 SELECT 1
1256 FROM {analytics_prediction_actions} apa
1257 WHERE apa.predictionid = ap.id AND apa.userid = :userid AND (apa.actionname = :fixed OR apa.actionname = :notuseful)
1258 )";
1259 $params['userid'] = $USER->id;
1260 $params['fixed'] = \core_analytics\prediction::ACTION_FIXED;
1261 $params['notuseful'] = \core_analytics\prediction::ACTION_NOT_USEFUL;
1262 }
1263
1264 $sql .= " ORDER BY ap.timecreated DESC";
369389c9
DM
1265 if (!$predictions = $DB->get_records_sql($sql, $params)) {
1266 return array();
1267 }
1268
1269 // Get predicted samples' ids.
1270 $sampleids = array_map(function($prediction) {
1271 return $prediction->sampleid;
1272 }, $predictions);
1273
1274 list($unused, $samplesdata) = $this->get_analyser()->get_samples($sampleids);
1275
68bfe1de 1276 $current = 0;
21d4ae93
DM
1277
1278 if ($page !== false) {
1279 $offset = $page * $perpage;
1280 $limit = $offset + $perpage;
1281 }
68bfe1de 1282
369389c9
DM
1283 foreach ($predictions as $predictionid => $predictiondata) {
1284
1285 $sampleid = $predictiondata->sampleid;
1286
1287 // Filter out predictions which samples are not available anymore.
1288 if (empty($samplesdata[$sampleid])) {
1289 unset($predictions[$predictionid]);
1290 continue;
1291 }
1292
68bfe1de 1293 // Return paginated dataset - we cannot paginate in the DB because we post filter the list.
21d4ae93 1294 if ($page === false || ($current >= $offset && $current < $limit)) {
68bfe1de
DW
1295 // Replace \stdClass object by \core_analytics\prediction objects.
1296 $prediction = new \core_analytics\prediction($predictiondata, $samplesdata[$sampleid]);
1297 $predictions[$predictionid] = $prediction;
1298 } else {
1299 unset($predictions[$predictionid]);
1300 }
369389c9 1301
68bfe1de 1302 $current++;
369389c9
DM
1303 }
1304
68bfe1de 1305 return [$current, $predictions];
369389c9
DM
1306 }
1307
1308 /**
1611308b 1309 * Returns the sample data of a prediction.
369389c9
DM
1310 *
1311 * @param \stdClass $predictionobj
1312 * @return array
1313 */
1314 public function prediction_sample_data($predictionobj) {
1315
1316 list($unused, $samplesdata) = $this->get_analyser()->get_samples(array($predictionobj->sampleid));
1317
1318 if (empty($samplesdata[$predictionobj->sampleid])) {
1319 throw new \moodle_exception('errorsamplenotavailable', 'analytics');
1320 }
1321
1322 return $samplesdata[$predictionobj->sampleid];
1323 }
1324
1325 /**
1611308b 1326 * Returns the description of a sample
369389c9
DM
1327 *
1328 * @param \core_analytics\prediction $prediction
1329 * @return array 2 elements: list(string, \renderable)
1330 */
1331 public function prediction_sample_description(\core_analytics\prediction $prediction) {
1332 return $this->get_analyser()->sample_description($prediction->get_prediction_data()->sampleid,
1333 $prediction->get_prediction_data()->contextid, $prediction->get_sample_data());
1334 }
1335
1336 /**
1337 * Returns the output directory for prediction processors.
1338 *
1339 * Directory structure as follows:
1340 * - Evaluation runs:
1341 * models/$model->id/$model->version/evaluation/$model->timesplitting
1342 * - Training & prediction runs:
1343 * models/$model->id/$model->version/execution
1344 *
1345 * @param array $subdirs
abafbc84 1346 * @param bool $onlymodelid Preference over $subdirs
369389c9
DM
1347 * @return string
1348 */
abafbc84 1349 protected function get_output_dir($subdirs = array(), $onlymodelid = false) {
369389c9
DM
1350 global $CFG;
1351
1352 $subdirstr = '';
1353 foreach ($subdirs as $subdir) {
1354 $subdirstr .= DIRECTORY_SEPARATOR . $subdir;
1355 }
1356
1357 $outputdir = get_config('analytics', 'modeloutputdir');
1358 if (empty($outputdir)) {
1359 // Apply default value.
1360 $outputdir = rtrim($CFG->dataroot, '/') . DIRECTORY_SEPARATOR . 'models';
1361 }
1362
325b3bdd 1363 // Append model id.
abafbc84
DM
1364 $outputdir .= DIRECTORY_SEPARATOR . $this->model->id;
1365 if (!$onlymodelid) {
1366 // Append version + subdirs.
1367 $outputdir .= DIRECTORY_SEPARATOR . $this->model->version . $subdirstr;
1368 }
369389c9
DM
1369
1370 make_writable_directory($outputdir);
1371
1372 return $outputdir;
1373 }
1374
1375 /**
1cc2b4ba
DM
1376 * Returns a unique id for this model.
1377 *
1378 * This id should be unique for this site.
369389c9
DM
1379 *
1380 * @return string
1381 */
1382 public function get_unique_id() {
1383 global $CFG;
1384
1385 if (!is_null($this->uniqueid)) {
1386 return $this->uniqueid;
1387 }
1388
1389 // Generate a unique id for this site, this model and this time splitting method, considering the last time
1390 // that the model target and indicators were updated.
b8fe16cd 1391 $ids = array($CFG->wwwroot, $CFG->prefix, $this->model->id, $this->model->version);
369389c9
DM
1392 $this->uniqueid = sha1(implode('$$', $ids));
1393
1394 return $this->uniqueid;
1395 }
1396
1397 /**
1398 * Exports the model data.
1399 *
1400 * @return \stdClass
1401 */
1402 public function export() {
1611308b
DM
1403
1404 \core_analytics\manager::check_can_manage_models();
1405
369389c9
DM
1406 $data = clone $this->model;
1407 $data->target = $this->get_target()->get_name();
1408
1409 if ($timesplitting = $this->get_time_splitting()) {
1410 $data->timesplitting = $timesplitting->get_name();
1411 }
1412
1413 $data->indicators = array();
1414 foreach ($this->get_indicators() as $indicator) {
1415 $data->indicators[] = $indicator->get_name();
1416 }
1417 return $data;
1418 }
1419
349c4412
AA
1420 /**
1421 * Exports the model data as JSON.
1422 *
1423 * @return string JSON encoded data.
1424 */
1425 public function export_as_json() {
1426 global $CFG;
1427
1428 $data = new \stdClass();
1429 $data->target = $this->get_target()->get_id();
1430
1431
1432 if ($timesplitting = $this->get_time_splitting()) {
1433 $data->timesplitting = $timesplitting->get_id();
1434 } else {
1435 // We don't want to allow models without timesplitting to be exported.
1436 throw new \moodle_exception('errornotimesplittings', 'analytics');
1437 }
1438
1439 $data->indicators = [];
1440 foreach ($this->get_indicators() as $indicator) {
1441 $data->indicators[] = $indicator->get_id();
1442 }
1443 $data->moodleversion = $CFG->version;
1444 return json_encode($data);
1445 }
1446
584ffa4f
DM
1447 /**
1448 * Returns the model logs data.
1449 *
1450 * @param int $limitfrom
1451 * @param int $limitnum
1452 * @return \stdClass[]
1453 */
1454 public function get_logs($limitfrom = 0, $limitnum = 0) {
1455 global $DB;
1611308b
DM
1456
1457 \core_analytics\manager::check_can_manage_models();
1458
584ffa4f
DM
1459 return $DB->get_records('analytics_models_log', array('modelid' => $this->get_id()), 'timecreated DESC', '*',
1460 $limitfrom, $limitnum);
1461 }
1462
d126f838
DM
1463 /**
1464 * Merges all training data files into one and returns it.
1465 *
1466 * @return \stored_file|false
1467 */
1468 public function get_training_data() {
1469
1470 \core_analytics\manager::check_can_manage_models();
1471
1472 $timesplittingid = $this->get_time_splitting()->get_id();
1473 return \core_analytics\dataset_manager::export_training_data($this->get_id(), $timesplittingid);
1474 }
1475
369389c9 1476 /**
1cc2b4ba 1477 * Flag the provided file as used for training or prediction.
369389c9
DM
1478 *
1479 * @param \stored_file $file
1480 * @param string $action
1481 * @return void
1482 */
1483 protected function flag_file_as_used(\stored_file $file, $action) {
1484 global $DB;
1485
1486 $usedfile = new \stdClass();
1487 $usedfile->modelid = $this->model->id;
1488 $usedfile->fileid = $file->get_id();
1489 $usedfile->action = $action;
1490 $usedfile->time = time();
1491 $DB->insert_record('analytics_used_files', $usedfile);
1492 }
1493
1494 /**
1cc2b4ba 1495 * Log the evaluation results in the database.
369389c9
DM
1496 *
1497 * @param string $timesplittingid
1498 * @param float $score
1499 * @param string $dir
1500 * @param array $info
1501 * @return int The inserted log id
1502 */
1503 protected function log_result($timesplittingid, $score, $dir = false, $info = false) {
1504 global $DB, $USER;
1505
1506 $log = new \stdClass();
1507 $log->modelid = $this->get_id();
1508 $log->version = $this->model->version;
1509 $log->target = $this->model->target;
1510 $log->indicators = $this->model->indicators;
1511 $log->timesplitting = $timesplittingid;
1512 $log->dir = $dir;
1513 if ($info) {
1514 // Ensure it is not an associative array.
1515 $log->info = json_encode(array_values($info));
1516 }
1517 $log->score = $score;
1518 $log->timecreated = time();
1519 $log->usermodified = $USER->id;
1520
1521 return $DB->insert_record('analytics_models_log', $log);
1522 }
1523
1524 /**
1525 * Utility method to return indicator class names from a list of indicator objects
1526 *
1527 * @param \core_analytics\local\indicator\base[] $indicators
1528 * @return string[]
1529 */
1530 private static function indicator_classes($indicators) {
1531
1532 // What we want to check and store are the indicator classes not the keys.
1533 $indicatorclasses = array();
1534 foreach ($indicators as $indicator) {
1535 if (!\core_analytics\manager::is_valid($indicator, '\core_analytics\local\indicator\base')) {
1536 if (!is_object($indicator) && !is_scalar($indicator)) {
1537 $indicator = strval($indicator);
1538 } else if (is_object($indicator)) {
3a396286 1539 $indicator = '\\' . get_class($indicator);
369389c9
DM
1540 }
1541 throw new \moodle_exception('errorinvalidindicator', 'analytics', '', $indicator);
1542 }
b0c24929 1543 $indicatorclasses[] = $indicator->get_id();
369389c9
DM
1544 }
1545
1546 return $indicatorclasses;
1547 }
1548
1549 /**
1550 * Clears the model training and prediction data.
1551 *
1552 * Executed after updating model critical elements like the time splitting method
1553 * or the indicators.
1554 *
1555 * @return void
1556 */
325b3bdd 1557 public function clear() {
0af2421a 1558 global $DB, $USER;
369389c9 1559
325b3bdd
DM
1560 \core_analytics\manager::check_can_manage_models();
1561
abafbc84 1562 // Delete current model version stored stuff.
d44ce97f
DM
1563 $predictor = $this->get_predictions_processor(false);
1564 if ($predictor->is_ready() !== true) {
1565 $predictorname = \core_analytics\manager::get_predictions_processor_name($predictor);
1566 debugging('Prediction processor ' . $predictorname . ' is not ready to be used. Model ' .
1567 $this->model->id . ' could not be cleared.');
1568 } else {
1569 $predictor->clear_model($this->get_unique_id(), $this->get_output_dir());
1570 }
abafbc84 1571
99b84a26
DM
1572 $predictionids = $DB->get_fieldset_select('analytics_predictions', 'id', 'modelid = :modelid',
1573 array('modelid' => $this->get_id()));
1574 if ($predictionids) {
1575 list($sql, $params) = $DB->get_in_or_equal($predictionids);
1576 $DB->delete_records_select('analytics_prediction_actions', "predictionid $sql", $params);
1577 }
1578
369389c9 1579 $DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
00da1e60 1580 $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
369389c9
DM
1581 $DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
1582 $DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
dd13fc22 1583 $DB->delete_records('analytics_used_analysables', array('modelid' => $this->model->id));
369389c9 1584
99b84a26
DM
1585 // Purge all generated files.
1586 \core_analytics\dataset_manager::clear_model_files($this->model->id);
1587
1611308b
DM
1588 // We don't expect people to clear models regularly and the cost of filling the cache is
1589 // 1 db read per context.
3e0f33aa 1590 $this->purge_insights_cache();
0af2421a
DM
1591
1592 $this->model->trained = 0;
1593 $this->model->timemodified = time();
1594 $this->model->usermodified = $USER->id;
1595 $DB->update_record('analytics_models', $this->model);
3e0f33aa
DM
1596 }
1597
1598 /**
1599 * Purges the insights cache.
1600 */
1601 private function purge_insights_cache() {
1611308b 1602 $cache = \cache::make('core', 'contextwithinsights');
1cc2b4ba 1603 $cache->purge();
369389c9
DM
1604 }
1605
1611308b
DM
1606 /**
1607 * Increases system memory and time limits.
1608 *
1609 * @return void
1610 */
1611 private function heavy_duty_mode() {
369389c9
DM
1612 if (ini_get('memory_limit') != -1) {
1613 raise_memory_limit(MEMORY_HUGE);
1614 }
1611308b 1615 \core_php_time_limit::raise();
369389c9 1616 }
369389c9 1617}