MDL-59030 analytics: Social breadth accepting more than level 2
[moodle.git] / analytics / tests / prediction_test.php
CommitLineData
ff656bae
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Unit tests for evaluation, training and prediction.
19 *
20 * @package core_analytics
21 * @copyright 2017 David Monlla├│ {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25defined('MOODLE_INTERNAL') || die();
26
27require_once(__DIR__ . '/fixtures/test_indicator_max.php');
28require_once(__DIR__ . '/fixtures/test_indicator_min.php');
29require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
30require_once(__DIR__ . '/fixtures/test_indicator_random.php');
31require_once(__DIR__ . '/fixtures/test_target_shortname.php');
b3d68794 32require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
ff656bae 33
00da1e60
DM
34require_once(__DIR__ . '/../../course/lib.php');
35
ff656bae
DM
36/**
37 * Unit tests for evaluation, training and prediction.
38 *
39 * @package core_analytics
40 * @copyright 2017 David Monlla├│ {@link http://www.davidmonllao.com}
41 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
42 */
43class core_analytics_prediction_testcase extends advanced_testcase {
44
b3d68794
DM
45 /**
46 * test_static_prediction
47 *
48 * @return void
49 */
50 public function test_static_prediction() {
51 global $DB;
52
53 $this->resetAfterTest(true);
54 $this->setAdminuser();
55
56 $model = $this->add_perfect_model('test_static_target_shortname');
206d7aa9 57 $model->enable('\core\analytics\time_splitting\no_splitting');
b3d68794
DM
58 $this->assertEquals(1, $model->is_enabled());
59 $this->assertEquals(1, $model->is_trained());
60
61 // No training for static models.
62 $results = $model->train();
63 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
64 $this->assertEmpty($trainedsamples);
65 $this->assertEmpty($DB->count_records('analytics_used_files',
66 array('modelid' => $model->get_id(), 'action' => 'trained')));
67
68 // Now we create 2 hidden courses (only hidden courses are getting predictions).
69 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
70 $course1 = $this->getDataGenerator()->create_course($courseparams);
71 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
72 $course2 = $this->getDataGenerator()->create_course($courseparams);
73
74 $result = $model->predict();
75
76 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
77 $correct = array($course1->id => 1, $course2->id => 0);
78 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
79 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
80
81 // The range index is not important here, both ranges prediction will be the same.
82 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
83 }
84
85 // 1 range for each analysable.
00da1e60 86 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
b3d68794
DM
87 $this->assertCount(2, $predictedranges);
88 $this->assertEquals(1, $DB->count_records('analytics_used_files',
89 array('modelid' => $model->get_id(), 'action' => 'predicted')));
90 // 2 predictions for each range.
91 $this->assertEquals(2, $DB->count_records('analytics_predictions',
92 array('modelid' => $model->get_id())));
93
94 // No new generated files nor records as there are no new courses available.
95 $model->predict();
00da1e60 96 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
b3d68794
DM
97 $this->assertCount(2, $predictedranges);
98 $this->assertEquals(1, $DB->count_records('analytics_used_files',
99 array('modelid' => $model->get_id(), 'action' => 'predicted')));
100 $this->assertEquals(2, $DB->count_records('analytics_predictions',
101 array('modelid' => $model->get_id())));
102 }
103
ff656bae 104 /**
413f19bc
DM
105 * test_ml_training_and_prediction
106 *
e499074f 107 * @dataProvider provider_ml_training_and_prediction
ff656bae 108 * @param string $timesplittingid
00da1e60 109 * @param int $predictedrangeindex
0690a271 110 * @param int $nranges
413f19bc 111 * @param string $predictionsprocessorclass
ff656bae
DM
112 * @return void
113 */
0690a271 114 public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $nranges, $predictionsprocessorclass) {
ff656bae
DM
115 global $DB;
116
584ffa4f
DM
117 $this->resetAfterTest(true);
118 $this->setAdminuser();
cad36252
DM
119 set_config('enabled_stores', 'logstore_standard', 'tool_log');
120
ff656bae
DM
121 $ncourses = 10;
122
ff656bae
DM
123 // Generate training data.
124 $params = array(
125 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
126 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
127 );
128 for ($i = 0; $i < $ncourses; $i++) {
129 $name = 'a' . random_string(10);
130 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
131 $this->getDataGenerator()->create_course($courseparams);
132 }
133 for ($i = 0; $i < $ncourses; $i++) {
134 $name = 'b' . random_string(10);
135 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
136 $this->getDataGenerator()->create_course($courseparams);
137 }
138
139 // We repeat the test for all prediction processors.
140 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
141 if ($predictionsprocessor->is_ready() !== true) {
142 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
143 }
144
145 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
146
147 $model = $this->add_perfect_model();
148 $model->enable($timesplittingid);
149
150 // No samples trained yet.
151 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
152
153 $results = $model->train();
b3d68794
DM
154 $this->assertEquals(1, $model->is_enabled());
155 $this->assertEquals(1, $model->is_trained());
ff656bae 156
0690a271
DM
157 // 20 courses * the 3 model indicators * the number of time ranges of this time splitting method.
158 $indicatorcalc = 20 * 3 * $nranges;
159 $this->assertEquals($indicatorcalc, $DB->count_records('analytics_indicator_calc'));
160
ff656bae
DM
161 // 1 training file was created.
162 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
b3d68794 163 $this->assertCount(1, $trainedsamples);
ff656bae 164 $samples = json_decode(reset($trainedsamples)->sampleids, true);
b3d68794 165 $this->assertCount($ncourses * 2, $samples);
ff656bae
DM
166 $this->assertEquals(1, $DB->count_records('analytics_used_files',
167 array('modelid' => $model->get_id(), 'action' => 'trained')));
168
ff656bae
DM
169 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
170 $course1 = $this->getDataGenerator()->create_course($courseparams);
171 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
172 $course2 = $this->getDataGenerator()->create_course($courseparams);
173
ff656bae
DM
174 // They will not be skipped for prediction though.
175 $result = $model->predict();
176
413f19bc 177 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
ff656bae 178 $correct = array($course1->id => 1, $course2->id => 0);
e499074f
DM
179 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
180 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
ff656bae
DM
181
182 // The range index is not important here, both ranges prediction will be the same.
e499074f 183 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
ff656bae
DM
184 }
185
00da1e60
DM
186 // 1 range will be predicted.
187 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
188 $this->assertCount(1, $predictedranges);
189 foreach ($predictedranges as $predictedrange) {
190 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
191 $sampleids = json_decode($predictedrange->sampleids, true);
192 $this->assertCount(2, $sampleids);
193 $this->assertContains($course1->id, $sampleids);
194 $this->assertContains($course2->id, $sampleids);
195 }
ff656bae
DM
196 $this->assertEquals(1, $DB->count_records('analytics_used_files',
197 array('modelid' => $model->get_id(), 'action' => 'predicted')));
00da1e60
DM
198 // 2 predictions.
199 $this->assertEquals(2, $DB->count_records('analytics_predictions',
413f19bc 200 array('modelid' => $model->get_id())));
ff656bae
DM
201
202 // No new generated files nor records as there are no new courses available.
203 $model->predict();
00da1e60
DM
204 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
205 $this->assertCount(1, $predictedranges);
206 foreach ($predictedranges as $predictedrange) {
207 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
208 }
ff656bae
DM
209 $this->assertEquals(1, $DB->count_records('analytics_used_files',
210 array('modelid' => $model->get_id(), 'action' => 'predicted')));
00da1e60
DM
211 $this->assertEquals(2, $DB->count_records('analytics_predictions',
212 array('modelid' => $model->get_id())));
213
214 // New samples that can be used for prediction.
215 $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
216 $course3 = $this->getDataGenerator()->create_course($courseparams);
217 $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
218 $course4 = $this->getDataGenerator()->create_course($courseparams);
219
220 $result = $model->predict();
221
222 $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
223 $this->assertCount(1, $predictedranges);
224 foreach ($predictedranges as $predictedrange) {
225 $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
226 $sampleids = json_decode($predictedrange->sampleids, true);
227 $this->assertCount(4, $sampleids);
228 $this->assertContains($course1->id, $sampleids);
229 $this->assertContains($course2->id, $sampleids);
230 $this->assertContains($course3->id, $sampleids);
231 $this->assertContains($course4->id, $sampleids);
232 }
233 $this->assertEquals(2, $DB->count_records('analytics_used_files',
234 array('modelid' => $model->get_id(), 'action' => 'predicted')));
235 $this->assertEquals(4, $DB->count_records('analytics_predictions',
236 array('modelid' => $model->get_id())));
237
238 // New visible course (for training).
239 $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
240 $course6 = $this->getDataGenerator()->create_course();
241 $result = $model->train();
242 $this->assertEquals(2, $DB->count_records('analytics_used_files',
243 array('modelid' => $model->get_id(), 'action' => 'trained')));
244
245 // Update one of the courses to not visible, it should be used again for prediction.
246 $course5->visible = 0;
247 update_course($course5);
248
249 $model->predict();
250 $this->assertEquals(1, $DB->count_records('analytics_predict_samples',
251 array('modelid' => $model->get_id())));
252 $this->assertEquals(2, $DB->count_records('analytics_used_files',
253 array('modelid' => $model->get_id(), 'action' => 'predicted')));
254 $this->assertEquals(4, $DB->count_records('analytics_predictions',
413f19bc 255 array('modelid' => $model->get_id())));
382a0890
DM
256
257 set_config('enabled_stores', '', 'tool_log');
258 get_log_manager(true);
ff656bae
DM
259 }
260
413f19bc
DM
261 /**
262 * provider_ml_training_and_prediction
263 *
264 * @return array
265 */
e499074f 266 public function provider_ml_training_and_prediction() {
ff656bae 267 $cases = array(
0690a271
DM
268 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0, 1),
269 'quarters' => array('\core\analytics\time_splitting\quarters', 3, 4)
ff656bae
DM
270 );
271
272 // We need to test all system prediction processors.
273 return $this->add_prediction_processors($cases);
274 }
275
276
277 /**
278 * Basic test to check that prediction processors work as expected.
279 *
e499074f 280 * @dataProvider provider_ml_test_evaluation
413f19bc
DM
281 * @param string $modelquality
282 * @param int $ncourses
283 * @param array $expected
284 * @param string $predictionsprocessorclass
285 * @return void
ff656bae 286 */
e499074f 287 public function test_ml_evaluation($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
ff656bae 288 $this->resetAfterTest(true);
584ffa4f 289 $this->setAdminuser();
cad36252
DM
290 set_config('enabled_stores', 'logstore_standard', 'tool_log');
291
206d7aa9
DM
292 $sometimesplittings = '\core\analytics\time_splitting\weekly,' .
293 '\core\analytics\time_splitting\single_range,' .
294 '\core\analytics\time_splitting\quarters';
ff656bae
DM
295 set_config('timesplittings', $sometimesplittings, 'analytics');
296
297 if ($modelquality === 'perfect') {
298 $model = $this->add_perfect_model();
299 } else if ($modelquality === 'random') {
300 $model = $this->add_random_model();
301 } else {
302 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
303 }
304
ff656bae
DM
305 // Generate training data.
306 $params = array(
307 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
308 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
309 );
310 for ($i = 0; $i < $ncourses; $i++) {
311 $name = 'a' . random_string(10);
312 $params = array('shortname' => $name, 'fullname' => $name) + $params;
313 $this->getDataGenerator()->create_course($params);
314 }
315 for ($i = 0; $i < $ncourses; $i++) {
316 $name = 'b' . random_string(10);
317 $params = array('shortname' => $name, 'fullname' => $name) + $params;
318 $this->getDataGenerator()->create_course($params);
319 }
320
321 // We repeat the test for all prediction processors.
322 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
323 if ($predictionsprocessor->is_ready() !== true) {
324 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
325 }
326
327 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
328
329 $results = $model->evaluate();
330
331 // We check that the returned status includes at least $expectedcode code.
332 foreach ($results as $timesplitting => $result) {
333 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
413f19bc
DM
334 $filtered = $result->status & $expected[$timesplitting];
335 $this->assertEquals($expected[$timesplitting], $filtered, $message);
ff656bae 336 }
382a0890
DM
337
338 set_config('enabled_stores', '', 'tool_log');
339 get_log_manager(true);
ff656bae
DM
340 }
341
0690a271
DM
342 /**
343 * test_read_indicator_calculations
344 *
345 * @return void
346 */
347 public function test_read_indicator_calculations() {
348 global $DB;
349
350 $this->resetAfterTest(true);
351
352 $starttime = 123;
353 $endtime = 321;
354 $sampleorigin = 'whatever';
355
356 $indicator = $this->getMockBuilder('test_indicator_max')->setMethods(['calculate_sample'])->getMock();
357 $indicator->expects($this->never())->method('calculate_sample');
358
359 $existingcalcs = array(111 => 1, 222 => 0.5);
360 $sampleids = array(111 => 111, 222 => 222);
361 list($values, $unused) = $indicator->calculate($sampleids, $sampleorigin, $starttime, $endtime, $existingcalcs);
362 }
363
413f19bc
DM
364 /**
365 * provider_ml_test_evaluation
366 *
367 * @return array
368 */
e499074f 369 public function provider_ml_test_evaluation() {
ff656bae
DM
370
371 $cases = array(
ff656bae
DM
372 'bad' => array(
373 'modelquality' => 'random',
374 'ncourses' => 50,
375 'expectedresults' => array(
376 // The course duration is too much to be processed by in weekly basis.
206d7aa9
DM
377 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
378 '\core\analytics\time_splitting\single_range' => \core_analytics\model::EVALUATE_LOW_SCORE,
379 '\core\analytics\time_splitting\quarters' => \core_analytics\model::EVALUATE_LOW_SCORE,
ff656bae
DM
380 )
381 ),
382 'good' => array(
383 'modelquality' => 'perfect',
384 'ncourses' => 50,
385 'expectedresults' => array(
386 // The course duration is too much to be processed by in weekly basis.
206d7aa9
DM
387 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
388 '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
389 '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
ff656bae
DM
390 )
391 )
392 );
393 return $this->add_prediction_processors($cases);
394 }
395
413f19bc
DM
396 /**
397 * add_random_model
398 *
399 * @return \core_analytics\model
400 */
ff656bae
DM
401 protected function add_random_model() {
402
403 $target = \core_analytics\manager::get_target('test_target_shortname');
404 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
405 foreach ($indicators as $key => $indicator) {
406 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
407 }
408
409 $model = \core_analytics\model::create($target, $indicators);
410
411 // To load db defaults as well.
412 return new \core_analytics\model($model->get_id());
413 }
414
413f19bc
DM
415 /**
416 * add_perfect_model
417 *
b3d68794 418 * @param string $targetclass
413f19bc
DM
419 * @return \core_analytics\model
420 */
b3d68794 421 protected function add_perfect_model($targetclass = 'test_target_shortname') {
ff656bae 422
b3d68794 423 $target = \core_analytics\manager::get_target($targetclass);
ff656bae
DM
424 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
425 foreach ($indicators as $key => $indicator) {
426 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
427 }
428
429 $model = \core_analytics\model::create($target, $indicators);
430
431 // To load db defaults as well.
432 return new \core_analytics\model($model->get_id());
433 }
434
413f19bc
DM
435 /**
436 * add_prediction_processors
437 *
438 * @param array $cases
439 * @return array
440 */
ff656bae
DM
441 protected function add_prediction_processors($cases) {
442
443 $return = array();
444
445 // We need to test all system prediction processors.
446 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
447 foreach ($predictionprocessors as $classfullname => $unused) {
448 foreach ($cases as $key => $case) {
449 $newkey = $key . '-' . $classfullname;
450 $return[$newkey] = $case + array('predictionsprocessorclass' => $classfullname);
451 }
452 }
453
454 return $return;
455 }
456}