MDL-59694 analytics: Track processed analysables
[moodle.git] / analytics / classes / local / analyser / base.php
CommitLineData
369389c9
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
413f19bc 18 * Analysers base class.
369389c9
DM
19 *
20 * @package core_analytics
21 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25namespace core_analytics\local\analyser;
26
27defined('MOODLE_INTERNAL') || die();
28
29/**
413f19bc 30 * Analysers base class.
369389c9
DM
31 *
32 * @package core_analytics
33 * @copyright 2016 David Monllao {@link http://www.davidmonllao.com}
34 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
35 */
36abstract class base {
37
413f19bc
DM
38 /**
39 * @var int
40 */
369389c9
DM
41 protected $modelid;
42
413f19bc
DM
43 /**
44 * The model target.
45 *
46 * @var \core_analytics\local\target\base
47 */
369389c9 48 protected $target;
413f19bc 49
0690a271
DM
50 /**
51 * A $this->$target copy loaded with the ongoing analysis analysable.
52 *
53 * @var \core_analytics\local\target\base
54 */
55 protected $analysabletarget;
56
413f19bc
DM
57 /**
58 * The model indicators.
59 *
60 * @var \core_analytics\local\indicator\base[]
61 */
369389c9 62 protected $indicators;
413f19bc
DM
63
64 /**
65 * Time splitting methods to use.
66 *
67 * Multiple time splitting methods during evaluation and 1 single
68 * time splitting method once the model is enabled.
69 *
70 * @var \core_analytics\local\time_splitting\base[]
71 */
369389c9
DM
72 protected $timesplittings;
73
413f19bc
DM
74 /**
75 * Execution options.
76 *
77 * @var array
78 */
369389c9
DM
79 protected $options;
80
413f19bc
DM
81 /**
82 * Simple log array.
83 *
84 * @var string[]
85 */
369389c9
DM
86 protected $log;
87
413f19bc
DM
88 /**
89 * Constructor method.
90 *
91 * @param int $modelid
92 * @param \core_analytics\local\target\base $target
93 * @param \core_analytics\local\indicator\base[] $indicators
94 * @param \core_analytics\local\time_splitting\base[] $timesplittings
95 * @param array $options
96 * @return void
97 */
369389c9
DM
98 public function __construct($modelid, \core_analytics\local\target\base $target, $indicators, $timesplittings, $options) {
99 $this->modelid = $modelid;
100 $this->target = $target;
101 $this->indicators = $indicators;
102 $this->timesplittings = $timesplittings;
103
104 if (empty($options['evaluation'])) {
105 $options['evaluation'] = false;
106 }
107 $this->options = $options;
108
109 // Checks if the analyser satisfies the indicators requirements.
110 $this->check_indicators_requirements();
111
112 $this->log = array();
113 }
114
a8ccc5f2
DM
115 /**
116 * Returns the list of analysable elements available on the site.
117 *
118 * \core_analytics\local\analyser\by_course and \core_analytics\local\analyser\sitewide are implementing
119 * this method returning site courses (by_course) and the whole system (sitewide) as analysables.
120 *
dd13fc22 121 * @return \core_analytics\analysable[] Array of analysable elements using the analysable id as array key.
a8ccc5f2
DM
122 */
123 abstract public function get_analysables();
124
369389c9 125 /**
413f19bc 126 * This function returns this analysable list of samples.
369389c9
DM
127 *
128 * @param \core_analytics\analysable $analysable
a40952d3 129 * @return array array[0] = int[] (sampleids) and array[1] = array (samplesdata)
369389c9
DM
130 */
131 abstract protected function get_all_samples(\core_analytics\analysable $analysable);
132
a40952d3 133 /**
413f19bc 134 * This function returns the samples data from a list of sample ids.
a40952d3
DM
135 *
136 * @param int[] $sampleids
137 * @return array array[0] = int[] (sampleids) and array[1] = array (samplesdata)
138 */
369389c9
DM
139 abstract public function get_samples($sampleids);
140
a40952d3 141 /**
413f19bc 142 * Returns the analysable of a sample.
a40952d3
DM
143 *
144 * @param int $sampleid
145 * @return \core_analytics\analysable
146 */
147 abstract public function get_sample_analysable($sampleid);
148
149 /**
413f19bc 150 * Returns the sample's origin in moodle database.
a40952d3
DM
151 *
152 * @return string
153 */
a8ccc5f2 154 abstract public function get_samples_origin();
369389c9
DM
155
156 /**
413f19bc
DM
157 * Returns the context of a sample.
158 *
369389c9
DM
159 * moodle/analytics:listinsights will be required at this level to access the sample predictions.
160 *
161 * @param int $sampleid
162 * @return \context
163 */
164 abstract public function sample_access_context($sampleid);
165
a40952d3 166 /**
413f19bc 167 * Describes a sample with a description summary and a \renderable (an image for example)
a40952d3
DM
168 *
169 * @param int $sampleid
170 * @param int $contextid
171 * @param array $sampledata
172 * @return array array(string, \renderable)
173 */
369389c9
DM
174 abstract public function sample_description($sampleid, $contextid, $sampledata);
175
369389c9
DM
176 /**
177 * Main analyser method which processes the site analysables.
178 *
413f19bc 179 * @param bool $includetarget
369389c9
DM
180 * @return \stored_file[]
181 */
a8ccc5f2 182 public function get_analysable_data($includetarget) {
dd13fc22
DM
183 global $DB;
184
185 // Time limit control.
186 $modeltimelimit = intval(get_config('analytics', 'modeltimelimit'));
a8ccc5f2
DM
187
188 $filesbytimesplitting = array();
189
dd13fc22
DM
190 list($analysables, $processedanalysables) = $this->get_sorted_analysables($includetarget);
191
192 $inittime = time();
a8ccc5f2
DM
193 foreach ($analysables as $analysable) {
194
195 $files = $this->process_analysable($analysable, $includetarget);
196
197 // Later we will need to aggregate data by time splitting method.
198 foreach ($files as $timesplittingid => $file) {
199 $filesbytimesplitting[$timesplittingid][$analysable->get_id()] = $file;
200 }
dd13fc22
DM
201
202 $this->update_analysable_analysed_time($processedanalysables, $analysable->get_id(), $includetarget);
203
204 // Apply time limit.
205 if (!$this->options['evaluation']) {
206 $timespent = time() - $inittime;
207 if ($modeltimelimit <= $timespent) {
208 break;
209 }
210 }
a8ccc5f2
DM
211 }
212
213 // We join the datasets by time splitting method.
214 $timesplittingfiles = $this->merge_analysable_files($filesbytimesplitting, $includetarget);
215
216 return $timesplittingfiles;
217 }
369389c9 218
413f19bc
DM
219 /**
220 * Samples data this analyser provides.
221 *
222 * @return string[]
223 */
224 protected function provided_sample_data() {
225 return array($this->get_samples_origin());
226 }
227
228 /**
229 * Returns labelled data (training and evaluation).
230 *
231 * @return array
232 */
369389c9
DM
233 public function get_labelled_data() {
234 return $this->get_analysable_data(true);
235 }
236
413f19bc
DM
237 /**
238 * Returns unlabelled data (prediction).
239 *
240 * @return array
241 */
369389c9
DM
242 public function get_unlabelled_data() {
243 return $this->get_analysable_data(false);
244 }
245
246 /**
247 * Checks if the analyser satisfies all the model indicators requirements.
248 *
249 * @throws \core_analytics\requirements_exception
250 * @return void
251 */
252 protected function check_indicators_requirements() {
253
254 foreach ($this->indicators as $indicator) {
255 $missingrequired = $this->check_indicator_requirements($indicator);
256 if ($missingrequired !== true) {
257 throw new \core_analytics\requirements_exception(get_class($indicator) . ' indicator requires ' .
258 json_encode($missingrequired) . ' sample data which is not provided by ' . get_class($this));
259 }
260 }
261 }
262
a8ccc5f2
DM
263 /**
264 * Merges analysable dataset files into 1.
265 *
266 * @param array $filesbytimesplitting
267 * @param bool $includetarget
268 * @return \stored_file[]
269 */
270 protected function merge_analysable_files($filesbytimesplitting, $includetarget) {
271
272 $timesplittingfiles = array();
273 foreach ($filesbytimesplitting as $timesplittingid => $files) {
274
275 if ($this->options['evaluation'] === true) {
276 // Delete the previous copy. Only when evaluating.
277 \core_analytics\dataset_manager::delete_previous_evaluation_file($this->modelid, $timesplittingid);
278 }
279
280 // Merge all course files into one.
281 if ($includetarget) {
282 $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
283 } else {
284 $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
285 }
286 $timesplittingfiles[$timesplittingid] = \core_analytics\dataset_manager::merge_datasets($files,
287 $this->modelid, $timesplittingid, $filearea, $this->options['evaluation']);
288 }
289
290 return $timesplittingfiles;
291 }
292
369389c9 293 /**
413f19bc 294 * Checks that this analyser satisfies the provided indicator requirements.
369389c9
DM
295 *
296 * @param \core_analytics\local\indicator\base $indicator
297 * @return true|string[] True if all good, missing requirements list otherwise
298 */
299 public function check_indicator_requirements(\core_analytics\local\indicator\base $indicator) {
300
301 $providedsampledata = $this->provided_sample_data();
302
303 $requiredsampledata = $indicator::required_sample_data();
304 if (empty($requiredsampledata)) {
305 // The indicator does not need any sample data.
306 return true;
307 }
308 $missingrequired = array_diff($requiredsampledata, $providedsampledata);
309
310 if (empty($missingrequired)) {
311 return true;
312 }
313
314 return $missingrequired;
315 }
316
317 /**
318 * Processes an analysable
319 *
320 * This method returns the general analysable status, an array of files by time splitting method and
321 * an error message if there is any problem.
322 *
323 * @param \core_analytics\analysable $analysable
324 * @param bool $includetarget
325 * @return \stored_file[] Files by time splitting method
326 */
327 public function process_analysable($analysable, $includetarget) {
328
329 // Default returns.
330 $files = array();
331 $message = null;
332
333 // Target instances scope is per-analysable (it can't be lower as calculations run once per
334 // analysable, not time splitting method nor time range).
0690a271 335 $this->analysabletarget = call_user_func(array($this->target, 'instance'));
369389c9
DM
336
337 // We need to check that the analysable is valid for the target even if we don't include targets
338 // as we still need to discard invalid analysables for the target.
0690a271 339 $result = $this->analysabletarget->is_valid_analysable($analysable, $includetarget);
369389c9
DM
340 if ($result !== true) {
341 $a = new \stdClass();
342 $a->analysableid = $analysable->get_id();
343 $a->result = $result;
a40952d3 344 $this->add_log(get_string('analysablenotvalidfortarget', 'analytics', $a));
369389c9
DM
345 return array();
346 }
347
348 // Process all provided time splitting methods.
349 $results = array();
350 foreach ($this->timesplittings as $timesplitting) {
351
352 // For evaluation purposes we don't need to be that strict about how updated the data is,
353 // if this analyser was analysed less that 1 week ago we skip generating a new one. This
354 // helps scale the evaluation process as sites with tons of courses may a lot of time to
355 // complete an evaluation.
356 if (!empty($this->options['evaluation']) && !empty($this->options['reuseprevanalysed'])) {
357
358 $previousanalysis = \core_analytics\dataset_manager::get_evaluation_analysable_file($this->modelid,
359 $analysable->get_id(), $timesplitting->get_id());
1611308b 360 // 1 week is a partly random time interval, no need to worry about DST.
369389c9
DM
361 $boundary = time() - WEEKSECS;
362 if ($previousanalysis && $previousanalysis->get_timecreated() > $boundary) {
363 // Recover the previous analysed file and avoid generating a new one.
364
365 // Don't bother filling a result object as it is only useful when there are no files generated.
366 $files[$timesplitting->get_id()] = $previousanalysis;
367 continue;
368 }
369 }
370
0690a271 371 $result = $this->process_time_splitting($timesplitting, $analysable, $includetarget);
369389c9
DM
372
373 if (!empty($result->file)) {
374 $files[$timesplitting->get_id()] = $result->file;
375 }
376 $results[] = $result;
377 }
378
379 if (empty($files)) {
380 $errors = array();
381 foreach ($results as $timesplittingid => $result) {
382 $errors[] = $timesplittingid . ': ' . $result->message;
383 }
384
385 $a = new \stdClass();
386 $a->analysableid = $analysable->get_id();
413f19bc 387 $a->errors = implode(', ', $errors);
a40952d3 388 $this->add_log(get_string('analysablenotused', 'analytics', $a));
369389c9
DM
389 }
390
391 return $files;
392 }
393
a40952d3 394 /**
413f19bc 395 * Adds a register to the analysis log.
a40952d3
DM
396 *
397 * @param string $string
398 * @return void
399 */
400 public function add_log($string) {
401 $this->log[] = $string;
402 }
403
404 /**
413f19bc 405 * Returns the analysis logs.
a40952d3
DM
406 *
407 * @return string[]
408 */
369389c9
DM
409 public function get_logs() {
410 return $this->log;
411 }
412
413f19bc
DM
413 /**
414 * Processes the analysable samples using the provided time splitting method.
415 *
416 * @param \core_analytics\local\time_splitting\base $timesplitting
417 * @param \core_analytics\analysable $analysable
0690a271 418 * @param bool $includetarget
413f19bc
DM
419 * @return \stdClass Results object.
420 */
0690a271 421 protected function process_time_splitting($timesplitting, $analysable, $includetarget = false) {
369389c9
DM
422
423 $result = new \stdClass();
424
425 if (!$timesplitting->is_valid_analysable($analysable)) {
413f19bc 426 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
369389c9
DM
427 $result->message = get_string('invalidanalysablefortimesplitting', 'analytics',
428 $timesplitting->get_name());
429 return $result;
430 }
431 $timesplitting->set_analysable($analysable);
432
433 if (CLI_SCRIPT && !PHPUNIT_TEST) {
413f19bc
DM
434 mtrace('Analysing id "' . $analysable->get_id() . '" with "' . $timesplitting->get_name() .
435 '" time splitting method...');
369389c9
DM
436 }
437
438 // What is a sample is defined by the analyser, it can be an enrolment, a course, a user, a question
439 // attempt... it is on what we will base indicators calculations.
440 list($sampleids, $samplesdata) = $this->get_all_samples($analysable);
441
442 if (count($sampleids) === 0) {
413f19bc 443 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
369389c9
DM
444 $result->message = get_string('nodata', 'analytics');
445 return $result;
446 }
447
0690a271 448 if ($includetarget) {
369389c9
DM
449 // All ranges are used when we are calculating data for training.
450 $ranges = $timesplitting->get_all_ranges();
451 } else {
00da1e60
DM
452 // The latest range that has not yet been used for prediction (it depends on the time range where we are right now).
453 $ranges = $this->get_most_recent_prediction_range($timesplitting);
369389c9
DM
454 }
455
456 // There is no need to keep track of the evaluated samples and ranges as we always evaluate the whole dataset.
457 if ($this->options['evaluation'] === false) {
458
459 if (empty($ranges)) {
413f19bc 460 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
00da1e60 461 $result->message = get_string('noranges', 'analytics');
369389c9
DM
462 return $result;
463 }
464
00da1e60
DM
465 // We skip all samples that are already part of a training dataset, even if they have not been used for prediction.
466 $this->filter_out_train_samples($sampleids, $timesplitting);
369389c9
DM
467
468 if (count($sampleids) === 0) {
413f19bc 469 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
369389c9
DM
470 $result->message = get_string('nonewdata', 'analytics');
471 return $result;
472 }
473
369389c9 474 // Only when processing data for predictions.
0690a271 475 if (!$includetarget) {
00da1e60
DM
476 // We also filter out samples and ranges that have already been used for predictions.
477 $this->filter_out_prediction_samples_and_ranges($sampleids, $ranges, $timesplitting);
478 }
479
480 if (count($sampleids) === 0) {
481 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
482 $result->message = get_string('nonewdata', 'analytics');
483 return $result;
369389c9
DM
484 }
485
486 if (count($ranges) === 0) {
413f19bc 487 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
00da1e60 488 $result->message = get_string('nonewranges', 'analytics');
369389c9
DM
489 return $result;
490 }
491 }
492
56d4981e
DM
493 if (!empty($target)) {
494 $filearea = \core_analytics\dataset_manager::LABELLED_FILEAREA;
495 } else {
496 $filearea = \core_analytics\dataset_manager::UNLABELLED_FILEAREA;
497 }
369389c9 498 $dataset = new \core_analytics\dataset_manager($this->modelid, $analysable->get_id(), $timesplitting->get_id(),
56d4981e 499 $filearea, $this->options['evaluation']);
369389c9
DM
500
501 // Flag the model + analysable + timesplitting as being analysed (prevent concurrent executions).
1611308b
DM
502 if (!$dataset->init_process()) {
503 // If this model + analysable + timesplitting combination is being analysed we skip this process.
504 $result->status = \core_analytics\model::NO_DATASET;
505 $result->message = get_string('analysisinprogress', 'analytics');
506 return $result;
507 }
508
0690a271
DM
509 // Remove samples the target consider invalid.
510 $this->analysabletarget->add_sample_data($samplesdata);
511 $this->analysabletarget->filter_out_invalid_samples($sampleids, $analysable, $includetarget);
1611308b
DM
512
513 if (!$sampleids) {
514 $result->status = \core_analytics\model::NO_DATASET;
515 $result->message = get_string('novalidsamples', 'analytics');
516 $dataset->close_process();
517 return $result;
518 }
369389c9
DM
519
520 foreach ($this->indicators as $key => $indicator) {
521 // The analyser attaches the main entities the sample depends on and are provided to the
522 // indicator to calculate the sample.
a40952d3
DM
523 $this->indicators[$key]->add_sample_data($samplesdata);
524 }
369389c9
DM
525
526 // Here we start the memory intensive process that will last until $data var is
527 // unset (until the method is finished basically).
0690a271
DM
528 if ($includetarget) {
529 $data = $timesplitting->calculate($sampleids, $this->get_samples_origin(), $this->indicators, $ranges,
530 $this->analysabletarget);
531 } else {
532 $data = $timesplitting->calculate($sampleids, $this->get_samples_origin(), $this->indicators, $ranges);
533 }
369389c9
DM
534
535 if (!$data) {
413f19bc 536 $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
369389c9 537 $result->message = get_string('novaliddata', 'analytics');
1611308b 538 $dataset->close_process();
369389c9
DM
539 return $result;
540 }
541
10658a1c
DM
542 // Add extra metadata.
543 $this->add_model_metadata($data);
5c5cb3ee 544
369389c9
DM
545 // Write all calculated data to a file.
546 $file = $dataset->store($data);
547
548 // Flag the model + analysable + timesplitting as analysed.
549 $dataset->close_process();
550
551 // No need to keep track of analysed stuff when evaluating.
552 if ($this->options['evaluation'] === false) {
553 // Save the samples that have been already analysed so they are not analysed again in future.
554
0690a271 555 if ($includetarget) {
369389c9
DM
556 $this->save_train_samples($sampleids, $timesplitting, $file);
557 } else {
00da1e60 558 $this->save_prediction_samples($sampleids, $ranges, $timesplitting);
369389c9
DM
559 }
560 }
561
562 $result->status = \core_analytics\model::OK;
563 $result->message = get_string('successfullyanalysed', 'analytics');
564 $result->file = $file;
565 return $result;
566 }
567
413f19bc 568 /**
00da1e60 569 * Returns the most recent range that can be used to predict.
413f19bc
DM
570 *
571 * @param \core_analytics\local\time_splitting\base $timesplitting
572 * @return array
573 */
00da1e60 574 protected function get_most_recent_prediction_range($timesplitting) {
369389c9
DM
575
576 $now = time();
00da1e60
DM
577 $ranges = $timesplitting->get_all_ranges();
578
579 // Opposite order as we are interested in the last range that can be used for prediction.
e4584b81 580 krsort($ranges);
369389c9
DM
581
582 // We already provided the analysable to the time splitting method, there is no need to feed it back.
00da1e60 583 foreach ($ranges as $rangeindex => $range) {
369389c9
DM
584 if ($timesplitting->ready_to_predict($range)) {
585 // We need to maintain the same indexes.
00da1e60 586 return array($rangeindex => $range);
369389c9
DM
587 }
588 }
589
00da1e60 590 return array();
369389c9
DM
591 }
592
413f19bc
DM
593 /**
594 * Filters out samples that have already been used for training.
595 *
596 * @param int[] $sampleids
597 * @param \core_analytics\local\time_splitting\base $timesplitting
413f19bc 598 */
00da1e60 599 protected function filter_out_train_samples(&$sampleids, $timesplitting) {
369389c9
DM
600 global $DB;
601
602 $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
603 'timesplitting' => $timesplitting->get_id());
604
605 $trainingsamples = $DB->get_records('analytics_train_samples', $params);
606
607 // Skip each file trained samples.
608 foreach ($trainingsamples as $trainingfile) {
609
610 $usedsamples = json_decode($trainingfile->sampleids, true);
611
612 if (!empty($usedsamples)) {
613 // Reset $sampleids to $sampleids minus this file's $usedsamples.
614 $sampleids = array_diff_key($sampleids, $usedsamples);
615 }
616 }
369389c9
DM
617 }
618
413f19bc
DM
619 /**
620 * Filters out samples that have already been used for prediction.
621 *
00da1e60 622 * @param int[] $sampleids
413f19bc
DM
623 * @param array $ranges
624 * @param \core_analytics\local\time_splitting\base $timesplitting
413f19bc 625 */
00da1e60 626 protected function filter_out_prediction_samples_and_ranges(&$sampleids, &$ranges, $timesplitting) {
369389c9
DM
627 global $DB;
628
00da1e60
DM
629 if (count($ranges) > 1) {
630 throw new \coding_exception('$ranges argument should only contain one range');
631 }
632
633 $rangeindex = key($ranges);
634
369389c9 635 $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
00da1e60
DM
636 'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
637 $predictedrange = $DB->get_record('analytics_predict_samples', $params);
369389c9 638
00da1e60
DM
639 if (!$predictedrange) {
640 // Nothing to filter out.
641 return;
369389c9
DM
642 }
643
00da1e60
DM
644 $predictedrange->sampleids = json_decode($predictedrange->sampleids, true);
645 $missingsamples = array_diff_key($sampleids, $predictedrange->sampleids);
646 if (count($missingsamples) === 0) {
647 // All samples already calculated.
648 unset($ranges[$rangeindex]);
649 return;
650 }
369389c9 651
00da1e60
DM
652 // Replace the list of samples by the one excluding samples that already got predictions at this range.
653 $sampleids = $missingsamples;
369389c9
DM
654 }
655
413f19bc
DM
656 /**
657 * Saves samples that have just been used for training.
658 *
659 * @param int[] $sampleids
660 * @param \core_analytics\local\time_splitting\base $timesplitting
661 * @param \stored_file $file
00da1e60 662 * @return void
413f19bc 663 */
369389c9
DM
664 protected function save_train_samples($sampleids, $timesplitting, $file) {
665 global $DB;
666
667 $trainingsamples = new \stdClass();
668 $trainingsamples->modelid = $this->modelid;
669 $trainingsamples->analysableid = $timesplitting->get_analysable()->get_id();
670 $trainingsamples->timesplitting = $timesplitting->get_id();
671 $trainingsamples->fileid = $file->get_id();
672
369389c9
DM
673 $trainingsamples->sampleids = json_encode($sampleids);
674 $trainingsamples->timecreated = time();
675
00da1e60 676 $DB->insert_record('analytics_train_samples', $trainingsamples);
369389c9
DM
677 }
678
413f19bc
DM
679 /**
680 * Saves samples that have just been used for prediction.
681 *
00da1e60 682 * @param int[] $sampleids
413f19bc
DM
683 * @param array $ranges
684 * @param \core_analytics\local\time_splitting\base $timesplitting
685 * @return void
686 */
00da1e60 687 protected function save_prediction_samples($sampleids, $ranges, $timesplitting) {
369389c9
DM
688 global $DB;
689
00da1e60
DM
690 if (count($ranges) > 1) {
691 throw new \coding_exception('$ranges argument should only contain one range');
692 }
693
694 $rangeindex = key($ranges);
369389c9 695
00da1e60
DM
696 $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
697 'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
698 if ($predictionrange = $DB->get_record('analytics_predict_samples', $params)) {
699 // Append the new samples used for prediction.
700 $prevsamples = json_decode($predictionrange->sampleids, true);
701 $predictionrange->sampleids = json_encode($prevsamples + $sampleids);
702 $predictionrange->timemodified = time();
703 $DB->update_record('analytics_predict_samples', $predictionrange);
704 } else {
705 $predictionrange = (object)$params;
706 $predictionrange->sampleids = json_encode($sampleids);
707 $predictionrange->timecreated = time();
708 $predictionrange->timemodified = $predictionrange->timecreated;
709 $DB->insert_record('analytics_predict_samples', $predictionrange);
369389c9
DM
710 }
711 }
5c5cb3ee
DM
712
713 /**
714 * Adds target metadata to the dataset.
715 *
716 * @param array $data
717 * @return void
718 */
10658a1c
DM
719 protected function add_model_metadata(&$data) {
720 global $CFG;
721
722 $metadata = array(
723 'moodleversion' => $CFG->version,
724 'targetcolumn' => $this->analysabletarget->get_id()
725 );
5c5cb3ee 726 if ($this->analysabletarget->is_linear()) {
10658a1c
DM
727 $metadata['targettype'] = 'linear';
728 $metadata['targetmin'] = $this->analysabletarget::get_min_value();
729 $metadata['targetmax'] = $this->analysabletarget::get_max_value();
5c5cb3ee 730 } else {
10658a1c
DM
731 $metadata['targettype'] = 'discrete';
732 $metadata['targetclasses'] = json_encode($this->analysabletarget::get_classes());
733 }
734
735 foreach ($metadata as $varname => $value) {
736 $data[0][] = $varname;
737 $data[1][] = $value;
5c5cb3ee
DM
738 }
739 }
dd13fc22
DM
740
741 /**
742 * Returns the list of analysables sorted in processing priority order.
743 *
744 * It will first return analysables that have never been analysed before
745 * and it will continue with the ones we have already seen by timeanalysed DESC
746 * order.
747 *
748 * @param bool $includetarget
749 * @return array(0 => \core_analytics\analysable[], 1 => \stdClass[])
750 */
751 protected function get_sorted_analysables($includetarget) {
752
753 $analysables = $this->get_analysables();
754
755 // Get the list of analysables that have been already processed.
756 $processedanalysables = $this->get_processed_analysables($includetarget);
757
758 // We want to start processing analysables we have not yet processed and later continue
759 // with analysables that we already processed.
760 $unseen = array_diff_key($analysables, $processedanalysables);
761
762 // Var $processed first as we want to respect its timeanalysed DESC order so analysables that
763 // have recently been processed are on the bottom of the stack.
764 $seen = array_intersect_key($processedanalysables, $analysables);
765 array_walk($seen, function(&$value, $analysableid) use ($analysables) {
766 // We replace the analytics_used_analysables record by the analysable object.
767 $value = $analysables[$analysableid];
768 });
769
770 return array($unseen + $seen, $processedanalysables);
771 }
772
773 /**
774 * Get analysables that have been already processed.
775 *
776 * @param bool $includetarget
777 * @return \stdClass[]
778 */
779 protected function get_processed_analysables($includetarget) {
780 global $DB;
781
782 $params = array('modelid' => $this->modelid);
783 $params['action'] = ($includetarget) ? 'training' : 'prediction';
784 $select = 'modelid = :modelid and action = :action';
785
786 // Weird select fields ordering for performance (analysableid key matching, analysableid is also unique by modelid).
787 return $DB->get_records_select('analytics_used_analysables', $select,
788 $params, 'timeanalysed DESC', 'analysableid, modelid, action, timeanalysed, id AS primarykey');
789 }
790
791 /**
792 * Updates the analysable analysis time.
793 *
794 * @param array $processedanalysables
795 * @param int $analysableid
796 * @param bool $includetarget
797 * @return null
798 */
799 protected function update_analysable_analysed_time($processedanalysables, $analysableid, $includetarget) {
800 global $DB;
801
802 if (!empty($processedanalysables[$analysableid])) {
803 $obj = $processedanalysables[$analysableid];
804
805 $obj->id = $obj->primarykey;
806 unset($obj->primarykey);
807
808 $obj->timeanalysed = time();
809 $DB->update_record('analytics_used_analysables', $obj);
810
811 } else {
812
813 $obj = new \stdClass();
814 $obj->modelid = $this->modelid;
815 $obj->action = ($includetarget) ? 'training' : 'prediction';
816 $obj->analysableid = $analysableid;
817 $obj->timeanalysed = time();
818
819 $DB->insert_record('analytics_used_analysables', $obj);
820 }
821 }
369389c9 822}