2 // This file is part of Moodle - http://moodle.org/
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 * Unit tests for evaluation, training and prediction.
20 * @package core_analytics
21 * @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
22 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 defined('MOODLE_INTERNAL') || die();
27 require_once(__DIR__ . '/fixtures/test_indicator_max.php');
28 require_once(__DIR__ . '/fixtures/test_indicator_min.php');
29 require_once(__DIR__ . '/fixtures/test_indicator_fullname.php');
30 require_once(__DIR__ . '/fixtures/test_indicator_random.php');
31 require_once(__DIR__ . '/fixtures/test_target_shortname.php');
34 * Unit tests for evaluation, training and prediction.
36 * @package core_analytics
37 * @copyright 2017 David Monllaó {@link http://www.davidmonllao.com}
38 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
40 class core_analytics_prediction_testcase extends advanced_testcase {
43 * @dataProvider provider_ml_training_and_prediction
44 * @param string $timesplittingid
45 * @param int $npredictedranges
48 public function test_ml_training_and_prediction($timesplittingid, $npredictedranges, $predictionsprocessorclass) {
51 set_config('enabled_stores', 'logstore_standard', 'tool_log');
55 $this->resetAfterTest(true);
57 // Generate training data.
59 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
60 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
62 for ($i = 0; $i < $ncourses; $i++) {
63 $name = 'a' . random_string(10);
64 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
65 $this->getDataGenerator()->create_course($courseparams);
67 for ($i = 0; $i < $ncourses; $i++) {
68 $name = 'b' . random_string(10);
69 $courseparams = array('shortname' => $name, 'fullname' => $name) + $params;
70 $this->getDataGenerator()->create_course($courseparams);
73 // We repeat the test for all prediction processors.
74 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
75 if ($predictionsprocessor->is_ready() !== true) {
76 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
79 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
81 $model = $this->add_perfect_model();
82 $model->enable($timesplittingid);
84 // No samples trained yet.
85 $this->assertEquals(0, $DB->count_records('analytics_train_samples', array('modelid' => $model->get_id())));
87 $results = $model->train();
88 $this->assertEquals(1, $model->get_model_obj()->enabled);
89 $this->assertEquals(1, $model->get_model_obj()->trained);
91 // 1 training file was created.
92 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
93 $this->assertEquals(1, count($trainedsamples));
94 $samples = json_decode(reset($trainedsamples)->sampleids, true);
95 $this->assertEquals($ncourses * 2, count($samples));
96 $this->assertEquals(1, $DB->count_records('analytics_used_files',
97 array('modelid' => $model->get_id(), 'action' => 'trained')));
99 // Now we create 2 hidden courses (they should not be used for training by the target).
100 $courseparams = $params + array('shortname' => 'aaaaaa', 'fullname' => 'aaaaaa', 'visible' => 0);
101 $course1 = $this->getDataGenerator()->create_course($courseparams);
102 $courseparams = $params + array('shortname' => 'bbbbbb', 'fullname' => 'bbbbbb', 'visible' => 0);
103 $course2 = $this->getDataGenerator()->create_course($courseparams);
105 // No more files should be created as the 2 new courses should be skipped by the target (not ready for training).
106 $results = $model->train();
107 $trainedsamples = $DB->get_records('analytics_train_samples', array('modelid' => $model->get_id()));
108 $this->assertEquals(1, count($trainedsamples));
109 $this->assertEquals(1, $DB->count_records('analytics_used_files',
110 array('modelid' => $model->get_id(), 'action' => 'trained')));
112 // They will not be skipped for prediction though.
113 $result = $model->predict();
115 // $course1 predictions should be 1 == 'a', $course2 predictions should be 0 == 'b'.
116 $correct = array($course1->id => 1, $course2->id => 0);
117 foreach ($result->predictions as $uniquesampleid => $predictiondata) {
118 list($sampleid, $rangeindex) = $model->get_time_splitting()->infer_sample_info($uniquesampleid);
120 // The range index is not important here, both ranges prediction will be the same.
121 $this->assertEquals($correct[$sampleid], $predictiondata->prediction);
124 // 2 ranges will be predicted.
125 $trainedsamples = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
126 $this->assertEquals($npredictedranges, count($trainedsamples));
127 $this->assertEquals(1, $DB->count_records('analytics_used_files',
128 array('modelid' => $model->get_id(), 'action' => 'predicted')));
129 // 2 predictions for each range.
130 $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions', array('modelid' => $model->get_id())));
132 // No new generated files nor records as there are no new courses available.
134 $trainedsamples = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
135 $this->assertEquals($npredictedranges, count($trainedsamples));
136 $this->assertEquals(1, $DB->count_records('analytics_used_files',
137 array('modelid' => $model->get_id(), 'action' => 'predicted')));
138 $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions', array('modelid' => $model->get_id())));
141 public function provider_ml_training_and_prediction() {
143 'no_splitting' => array('\core_analytics\local\time_splitting\no_splitting', 1),
144 'quarters' => array('\core_analytics\local\time_splitting\quarters', 4)
147 // We need to test all system prediction processors.
148 return $this->add_prediction_processors($cases);
153 * Basic test to check that prediction processors work as expected.
155 * @dataProvider provider_ml_test_evaluation
157 public function test_ml_evaluation($modelquality, $ncourses, $expected, $predictionsprocessorclass) {
158 $this->resetAfterTest(true);
160 set_config('enabled_stores', 'logstore_standard', 'tool_log');
162 $sometimesplittings = '\core_analytics\local\time_splitting\weekly,' .
163 '\core_analytics\local\time_splitting\single_range,' .
164 '\core_analytics\local\time_splitting\quarters';
165 set_config('timesplittings', $sometimesplittings, 'analytics');
167 if ($modelquality === 'perfect') {
168 $model = $this->add_perfect_model();
169 } else if ($modelquality === 'random') {
170 $model = $this->add_random_model();
172 throw new \coding_exception('Only perfect and random accepted as $modelquality values');
176 // Generate training data.
178 'startdate' => mktime(0, 0, 0, 10, 24, 2015),
179 'enddate' => mktime(0, 0, 0, 2, 24, 2016),
181 for ($i = 0; $i < $ncourses; $i++) {
182 $name = 'a' . random_string(10);
183 $params = array('shortname' => $name, 'fullname' => $name) + $params;
184 $this->getDataGenerator()->create_course($params);
186 for ($i = 0; $i < $ncourses; $i++) {
187 $name = 'b' . random_string(10);
188 $params = array('shortname' => $name, 'fullname' => $name) + $params;
189 $this->getDataGenerator()->create_course($params);
192 // We repeat the test for all prediction processors.
193 $predictionsprocessor = \core_analytics\manager::get_predictions_processor($predictionsprocessorclass, false);
194 if ($predictionsprocessor->is_ready() !== true) {
195 $this->markTestSkipped('Skipping ' . $predictionsprocessorclass . ' as the predictor is not ready.');
198 set_config('predictionsprocessor', $predictionsprocessorclass, 'analytics');
200 $results = $model->evaluate();
202 // We check that the returned status includes at least $expectedcode code.
203 foreach ($results as $timesplitting => $result) {
204 $message = 'The returned status code ' . $result->status . ' should include ' . $expected[$timesplitting];
205 $this->assertEquals($expected[$timesplitting], $result->status & $expected[$timesplitting], $message);
209 public function provider_ml_test_evaluation() {
212 'bad-and-no-enough-data' => array(
213 'modelquality' => 'random',
215 'expectedresults' => array(
216 // The course duration is too much to be processed by in weekly basis.
217 '\core_analytics\local\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
218 // 10 samples is not enough to process anything.
219 '\core_analytics\local\time_splitting\single_range' => \core_analytics\model::EVALUATE_NOT_ENOUGH_DATA + \core_analytics\model::EVALUATE_LOW_SCORE,
220 '\core_analytics\local\time_splitting\quarters' => \core_analytics\model::EVALUATE_NOT_ENOUGH_DATA + \core_analytics\model::EVALUATE_LOW_SCORE,
224 'modelquality' => 'random',
226 'expectedresults' => array(
227 // The course duration is too much to be processed by in weekly basis.
228 '\core_analytics\local\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
229 '\core_analytics\local\time_splitting\single_range' => \core_analytics\model::EVALUATE_LOW_SCORE,
230 '\core_analytics\local\time_splitting\quarters' => \core_analytics\model::EVALUATE_LOW_SCORE,
234 'modelquality' => 'perfect',
236 'expectedresults' => array(
237 // The course duration is too much to be processed by in weekly basis.
238 '\core_analytics\local\time_splitting\weekly' => \core_analytics\model::NO_DATASET,
239 '\core_analytics\local\time_splitting\single_range' => \core_analytics\model::OK,
240 '\core_analytics\local\time_splitting\quarters' => \core_analytics\model::OK,
244 return $this->add_prediction_processors($cases);
247 protected function add_random_model() {
249 $target = \core_analytics\manager::get_target('test_target_shortname');
250 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_random');
251 foreach ($indicators as $key => $indicator) {
252 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
255 $model = \core_analytics\model::create($target, $indicators);
257 // To load db defaults as well.
258 return new \core_analytics\model($model->get_id());
261 protected function add_perfect_model() {
263 $target = \core_analytics\manager::get_target('test_target_shortname');
264 $indicators = array('test_indicator_max', 'test_indicator_min', 'test_indicator_fullname');
265 foreach ($indicators as $key => $indicator) {
266 $indicators[$key] = \core_analytics\manager::get_indicator($indicator);
269 $model = \core_analytics\model::create($target, $indicators);
271 // To load db defaults as well.
272 return new \core_analytics\model($model->get_id());
275 protected function add_prediction_processors($cases) {
279 // We need to test all system prediction processors.
280 $predictionprocessors = \core_analytics\manager::get_all_prediction_processors();
281 foreach ($predictionprocessors as $classfullname => $unused) {
282 foreach ($cases as $key => $case) {
283 $newkey = $key . '-' . $classfullname;
284 $return[$newkey] = $case + array('predictionsprocessorclass' => $classfullname);