MDL-57791 analytics: Compatibility with oracle
[moodle.git] / analytics / tests / prediction_test.php
CommitLineData
ff656bae
DM
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Unit tests for evaluation, training and prediction.
19 *
20 * @package core_analytics
21 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
23 */
24
25defined('MOODLE_INTERNAL') || die();
26
27require_once(__DIR__ . '/fixtures/test_indicator_max.php');
28require_once(__DIR__ . '/fixtures/test_indicator_min.php');
29require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
30require_once(__DIR__ . '/fixtures/test_indicator_random.php');
31require_once(__DIR__ . '/fixtures/test_target_shortname.php');
b3d68794 32require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
ff656bae
DM
33
34/**
35 * Unit tests for evaluation, training and prediction.
36 *
37 * @package core_analytics
38 * @copyright 2017 David MonllaĆ³ {@link http://www.davidmonllao.com}
39 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
40 */
41class core_analytics_prediction_testcase extends advanced_testcase {
42
b3d68794
DM
43 /**
44 * test_static_prediction
45 *
46 * @return void
47 */
48 public function test_static_prediction() {
49 global $DB;
50
51 $this->resetAfterTest(true);
52 $this->setAdminuser();
53
54 $model = $this->add_perfect_model('test_static_target_shortname');
206d7aa9 55 $model->enable('\core\analytics\time_splitting\no_splitting');
b3d68794
DM
56 $this->assertEquals(1, $model->is_enabled());
57 $this->assertEquals(1, $model->is_trained());
58
59 // No training for static models.
60 $results = $model->train();
61 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
62 $this->assertEmpty($trainedsamples);
63 $this->assertEmpty($DB->count_records('analytics_used_files',
64 array('modelid' => $model->get_id(), 'action' => 'trained')));
65
66 // Now we create 2 hidden courses (only hidden courses are getting predictions).
67 $courseparams = array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
68 $course1 = $this->getDataGenerator()->create_course($courseparams);
69 $courseparams = array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
70 $course2 = $this->getDataGenerator()->create_course($courseparams);
71
72 $result = $model->predict();
73
74 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
75 $correct = array($course1->id => 1, $course2->id => 0);
76 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
77 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
78
79 // The range index is not important here, both ranges prediction will be the same.
80 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
81 }
82
83 // 1 range for each analysable.
84 $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
85 $this->assertCount(2, $predictedranges);
86 $this->assertEquals(1, $DB->count_records('analytics_used_files',
87 array('modelid' => $model->get_id(), 'action' => 'predicted')));
88 // 2 predictions for each range.
89 $this->assertEquals(2, $DB->count_records('analytics_predictions',
90 array('modelid' => $model->get_id())));
91
92 // No new generated files nor records as there are no new courses available.
93 $model->predict();
94 $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
95 $this->assertCount(2, $predictedranges);
96 $this->assertEquals(1, $DB->count_records('analytics_used_files',
97 array('modelid' => $model->get_id(), 'action' => 'predicted')));
98 $this->assertEquals(2, $DB->count_records('analytics_predictions',
99 array('modelid' => $model->get_id())));
100 }
101
ff656bae 102 /**
413f19bc
DM
103 * test_ml_training_and_prediction
104 *
e499074f 105 * @dataProvider provider_ml_training_and_prediction
ff656bae
DM
106 * @param string $timesplittingid
107 * @param int $npredictedranges
413f19bc 108 * @param string $predictionsprocessorclass
ff656bae
DM
109 * @return void
110 */
e499074f 111 public function test_ml_training_and_prediction($timesplittingid, $npredictedranges, $predictionsprocessorclass) {
ff656bae
DM
112 global $DB;
113
584ffa4f
DM
114 $this->resetAfterTest(true);
115 $this->setAdminuser();
cad36252
DM
116 set_config('enabled_stores', 'logstore_standard', 'tool_log');
117
ff656bae
DM
118 $ncourses = 10;
119
ff656bae
DM
120 // Generate training data.
121 $params = array(
122 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
123 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
124 );
125 for ($i = 0; $i < $ncourses; $i++) {
126 $name = 'a' . random_string(10);
127 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
128 $this->getDataGenerator()->create_course($courseparams);
129 }
130 for ($i = 0; $i < $ncourses; $i++) {
131 $name = 'b' . random_string(10);
132 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
133 $this->getDataGenerator()->create_course($courseparams);
134 }
135
136 // We repeat the test for all prediction processors.
137 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
138 if ($predictionsprocessor->is_ready() !== true) {
139 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
140 }
141
142 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
143
144 $model = $this->add_perfect_model();
145 $model->enable($timesplittingid);
146
147 // No samples trained yet.
148 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
149
150 $results = $model->train();
b3d68794
DM
151 $this->assertEquals(1, $model->is_enabled());
152 $this->assertEquals(1, $model->is_trained());
ff656bae
DM
153
154 // 1 training file was created.
155 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
b3d68794 156 $this->assertCount(1, $trainedsamples);
ff656bae 157 $samples = json_decode(reset($trainedsamples)->sampleids, true);
b3d68794 158 $this->assertCount($ncourses * 2, $samples);
ff656bae
DM
159 $this->assertEquals(1, $DB->count_records('analytics_used_files',
160 array('modelid' => $model->get_id(), 'action' => 'trained')));
161
ff656bae
DM
162 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
163 $course1 = $this->getDataGenerator()->create_course($courseparams);
164 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
165 $course2 = $this->getDataGenerator()->create_course($courseparams);
166
ff656bae
DM
167 // They will not be skipped for prediction though.
168 $result = $model->predict();
169
413f19bc 170 // Var $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
ff656bae 171 $correct = array($course1->id => 1, $course2->id => 0);
e499074f
DM
172 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
173 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
ff656bae
DM
174
175 // The range index is not important here, both ranges prediction will be the same.
e499074f 176 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
ff656bae
DM
177 }
178
179 // 2 ranges will be predicted.
b3d68794
DM
180 $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
181 $this->assertCount($npredictedranges, $predictedranges);
ff656bae
DM
182 $this->assertEquals(1, $DB->count_records('analytics_used_files',
183 array('modelid' => $model->get_id(), 'action' => 'predicted')));
184 // 2 predictions for each range.
413f19bc
DM
185 $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions',
186 array('modelid' => $model->get_id())));
ff656bae
DM
187
188 // No new generated files nor records as there are no new courses available.
189 $model->predict();
b3d68794
DM
190 $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
191 $this->assertCount($npredictedranges, $predictedranges);
ff656bae
DM
192 $this->assertEquals(1, $DB->count_records('analytics_used_files',
193 array('modelid' => $model->get_id(), 'action' => 'predicted')));
413f19bc
DM
194 $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions',
195 array('modelid' => $model->get_id())));
382a0890
DM
196
197 set_config('enabled_stores', '', 'tool_log');
198 get_log_manager(true);
ff656bae
DM
199 }
200
413f19bc
DM
201 /**
202 * provider_ml_training_and_prediction
203 *
204 * @return array
205 */
e499074f 206 public function provider_ml_training_and_prediction() {
ff656bae 207 $cases = array(
206d7aa9
DM
208 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 1),
209 'quarters' => array('\core\analytics\time_splitting\quarters', 4)
ff656bae
DM
210 );
211
212 // We need to test all system prediction processors.
213 return $this->add_prediction_processors($cases);
214 }
215
216
217 /**
218 * Basic test to check that prediction processors work as expected.
219 *
e499074f 220 * @dataProvider provider_ml_test_evaluation
413f19bc
DM
221 * @param string $modelquality
222 * @param int $ncourses
223 * @param array $expected
224 * @param string $predictionsprocessorclass
225 * @return void
ff656bae 226 */
e499074f 227 public function test_ml_evaluation($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
ff656bae 228 $this->resetAfterTest(true);
584ffa4f 229 $this->setAdminuser();
cad36252
DM
230 set_config('enabled_stores', 'logstore_standard', 'tool_log');
231
206d7aa9
DM
232 $sometimesplittings = '\core\analytics\time_splitting\weekly,' .
233 '\core\analytics\time_splitting\single_range,' .
234 '\core\analytics\time_splitting\quarters';
ff656bae
DM
235 set_config('timesplittings', $sometimesplittings, 'analytics');
236
237 if ($modelquality === 'perfect') {
238 $model = $this->add_perfect_model();
239 } else if ($modelquality === 'random') {
240 $model = $this->add_random_model();
241 } else {
242 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
243 }
244
ff656bae
DM
245 // Generate training data.
246 $params = array(
247 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
248 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
249 );
250 for ($i = 0; $i < $ncourses; $i++) {
251 $name = 'a' . random_string(10);
252 $params = array('shortname' => $name, 'fullname' => $name) + $params;
253 $this->getDataGenerator()->create_course($params);
254 }
255 for ($i = 0; $i < $ncourses; $i++) {
256 $name = 'b' . random_string(10);
257 $params = array('shortname' => $name, 'fullname' => $name) + $params;
258 $this->getDataGenerator()->create_course($params);
259 }
260
261 // We repeat the test for all prediction processors.
262 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
263 if ($predictionsprocessor->is_ready() !== true) {
264 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
265 }
266
267 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
268
269 $results = $model->evaluate();
270
271 // We check that the returned status includes at least $expectedcode code.
272 foreach ($results as $timesplitting => $result) {
273 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
413f19bc
DM
274 $filtered = $result->status & $expected[$timesplitting];
275 $this->assertEquals($expected[$timesplitting], $filtered, $message);
ff656bae 276 }
382a0890
DM
277
278 set_config('enabled_stores', '', 'tool_log');
279 get_log_manager(true);
ff656bae
DM
280 }
281
413f19bc
DM
282 /**
283 * provider_ml_test_evaluation
284 *
285 * @return array
286 */
e499074f 287 public function provider_ml_test_evaluation() {
ff656bae
DM
288
289 $cases = array(
ff656bae
DM
290 'bad' => array(
291 'modelquality' => 'random',
292 'ncourses' => 50,
293 'expectedresults' => array(
294 // The course duration is too much to be processed by in weekly basis.
206d7aa9
DM
295 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
296 '\core\analytics\time_splitting\single_range' => \core_analytics\model::EVALUATE_LOW_SCORE,
297 '\core\analytics\time_splitting\quarters' => \core_analytics\model::EVALUATE_LOW_SCORE,
ff656bae
DM
298 )
299 ),
300 'good' => array(
301 'modelquality' => 'perfect',
302 'ncourses' => 50,
303 'expectedresults' => array(
304 // The course duration is too much to be processed by in weekly basis.
206d7aa9
DM
305 '\core\analytics\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
306 '\core\analytics\time_splitting\single_range' => \core_analytics\model::OK,
307 '\core\analytics\time_splitting\quarters' => \core_analytics\model::OK,
ff656bae
DM
308 )
309 )
310 );
311 return $this->add_prediction_processors($cases);
312 }
313
413f19bc
DM
314 /**
315 * add_random_model
316 *
317 * @return \core_analytics\model
318 */
ff656bae
DM
319 protected function add_random_model() {
320
321 $target = \core_analytics\manager::get_target('test_target_shortname');
322 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
323 foreach ($indicators as $key => $indicator) {
324 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
325 }
326
327 $model = \core_analytics\model::create($target, $indicators);
328
329 // To load db defaults as well.
330 return new \core_analytics\model($model->get_id());
331 }
332
413f19bc
DM
333 /**
334 * add_perfect_model
335 *
b3d68794 336 * @param string $targetclass
413f19bc
DM
337 * @return \core_analytics\model
338 */
b3d68794 339 protected function add_perfect_model($targetclass = 'test_target_shortname') {
ff656bae 340
b3d68794 341 $target = \core_analytics\manager::get_target($targetclass);
ff656bae
DM
342 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
343 foreach ($indicators as $key => $indicator) {
344 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
345 }
346
347 $model = \core_analytics\model::create($target, $indicators);
348
349 // To load db defaults as well.
350 return new \core_analytics\model($model->get_id());
351 }
352
413f19bc
DM
353 /**
354 * add_prediction_processors
355 *
356 * @param array $cases
357 * @return array
358 */
ff656bae
DM
359 protected function add_prediction_processors($cases) {
360
361 $return = array();
362
363 // We need to test all system prediction processors.
364 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
365 foreach ($predictionprocessors as $classfullname => $unused) {
366 foreach ($cases as $key => $case) {
367 $newkey = $key . '-' . $classfullname;
368 $return[$newkey] = $case + array('predictionsprocessorclass' => $classfullname);
369 }
370 }
371
372 return $return;
373 }
374}