// All ranges are used when we are calculating data for training.
$ranges = $timesplitting->get_all_ranges();
} else {
- // Only some ranges can be used for prediction (it depends on the time range where we are right now).
- $ranges = $this->get_prediction_ranges($timesplitting);
+ // The latest range that has not yet been used for prediction (it depends on the time range where we are right now).
+ $ranges = $this->get_most_recent_prediction_range($timesplitting);
}
// There is no need to keep track of the evaluated samples and ranges as we always evaluate the whole dataset.
if (empty($ranges)) {
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
- $result->message = get_string('nonewdata', 'analytics');
+ $result->message = get_string('noranges', 'analytics');
return $result;
}
- // We skip all samples that are already part of a training dataset, even if they have noe been used for training yet.
- $sampleids = $this->filter_out_train_samples($sampleids, $timesplitting);
+ // We skip all samples that are already part of a training dataset, even if they have not been used for prediction.
+ $this->filter_out_train_samples($sampleids, $timesplitting);
if (count($sampleids) === 0) {
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
// Only when processing data for predictions.
if ($target === false) {
- // We also filter out ranges that have already been used for predictions.
- $ranges = $this->filter_out_prediction_ranges($ranges, $timesplitting);
+ // We also filter out samples and ranges that have already been used for predictions.
+ $this->filter_out_prediction_samples_and_ranges($sampleids, $ranges, $timesplitting);
+ }
+
+ if (count($sampleids) === 0) {
+ $result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
+ $result->message = get_string('nonewdata', 'analytics');
+ return $result;
}
if (count($ranges) === 0) {
$result->status = \core_analytics\model::ANALYSABLE_REJECTED_TIME_SPLITTING_METHOD;
- $result->message = get_string('nonewtimeranges', 'analytics');
+ $result->message = get_string('nonewranges', 'analytics');
return $result;
}
}
if ($target) {
$this->save_train_samples($sampleids, $timesplitting, $file);
} else {
- $this->save_prediction_ranges($ranges, $timesplitting);
+ $this->save_prediction_samples($sampleids, $ranges, $timesplitting);
}
}
}
/**
- * Returns the ranges of a time splitting that can be used to predict.
+ * Returns the most recent range that can be used to predict.
*
* @param \core_analytics\local\time_splitting\base $timesplitting
* @return array
*/
- protected function get_prediction_ranges($timesplitting) {
+ protected function get_most_recent_prediction_range($timesplitting) {
$now = time();
+ $ranges = $timesplitting->get_all_ranges();
+
+ // Opposite order as we are interested in the last range that can be used for prediction.
+ arsort($ranges);
// We already provided the analysable to the time splitting method, there is no need to feed it back.
- $predictionranges = array();
- foreach ($timesplitting->get_all_ranges() as $rangeindex => $range) {
+ foreach ($ranges as $rangeindex => $range) {
if ($timesplitting->ready_to_predict($range)) {
// We need to maintain the same indexes.
- $predictionranges[$rangeindex] = $range;
+ return array($rangeindex => $range);
}
}
- return $predictionranges;
+ return array();
}
/**
*
* @param int[] $sampleids
* @param \core_analytics\local\time_splitting\base $timesplitting
- * @return int[]
*/
- protected function filter_out_train_samples($sampleids, $timesplitting) {
+ protected function filter_out_train_samples(&$sampleids, $timesplitting) {
global $DB;
$params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
$sampleids = array_diff_key($sampleids, $usedsamples);
}
}
-
- return $sampleids;
}
/**
* Filters out samples that have already been used for prediction.
*
+ * @param int[] $sampleids
* @param array $ranges
* @param \core_analytics\local\time_splitting\base $timesplitting
- * @return int[]
*/
- protected function filter_out_prediction_ranges($ranges, $timesplitting) {
+ protected function filter_out_prediction_samples_and_ranges(&$sampleids, &$ranges, $timesplitting) {
global $DB;
+ if (count($ranges) > 1) {
+ throw new \coding_exception('$ranges argument should only contain one range');
+ }
+
+ $rangeindex = key($ranges);
+
$params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
- 'timesplitting' => $timesplitting->get_id());
+ 'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
+ $predictedrange = $DB->get_record('analytics_predict_samples', $params);
- $predictedranges = $DB->get_records('analytics_predict_ranges', $params);
- foreach ($predictedranges as $predictedrange) {
- if (!empty($ranges[$predictedrange->rangeindex])) {
- unset($ranges[$predictedrange->rangeindex]);
- }
+ if (!$predictedrange) {
+ // Nothing to filter out.
+ return;
}
- return $ranges;
+ $predictedrange->sampleids = json_decode($predictedrange->sampleids, true);
+ $missingsamples = array_diff_key($sampleids, $predictedrange->sampleids);
+ if (count($missingsamples) === 0) {
+ // All samples already calculated.
+ unset($ranges[$rangeindex]);
+ return;
+ }
+ // Replace the list of samples by the one excluding samples that already got predictions at this range.
+ $sampleids = $missingsamples;
}
/**
* @param int[] $sampleids
* @param \core_analytics\local\time_splitting\base $timesplitting
* @param \stored_file $file
- * @return bool
+ * @return void
*/
protected function save_train_samples($sampleids, $timesplitting, $file) {
global $DB;
$trainingsamples->sampleids = json_encode($sampleids);
$trainingsamples->timecreated = time();
- return $DB->insert_record('analytics_train_samples', $trainingsamples);
+ $DB->insert_record('analytics_train_samples', $trainingsamples);
}
/**
* Saves samples that have just been used for prediction.
*
+ * @param int[] $sampleids
* @param array $ranges
* @param \core_analytics\local\time_splitting\base $timesplitting
* @return void
*/
- protected function save_prediction_ranges($ranges, $timesplitting) {
+ protected function save_prediction_samples($sampleids, $ranges, $timesplitting) {
global $DB;
- $predictionrange = new \stdClass();
- $predictionrange->modelid = $this->modelid;
- $predictionrange->analysableid = $timesplitting->get_analysable()->get_id();
- $predictionrange->timesplitting = $timesplitting->get_id();
- $predictionrange->timecreated = time();
+ if (count($ranges) > 1) {
+ throw new \coding_exception('$ranges argument should only contain one range');
+ }
+
+ $rangeindex = key($ranges);
- foreach ($ranges as $rangeindex => $unused) {
- $predictionrange->rangeindex = $rangeindex;
- $DB->insert_record('analytics_predict_ranges', $predictionrange);
+ $params = array('modelid' => $this->modelid, 'analysableid' => $timesplitting->get_analysable()->get_id(),
+ 'timesplitting' => $timesplitting->get_id(), 'rangeindex' => $rangeindex);
+ if ($predictionrange = $DB->get_record('analytics_predict_samples', $params)) {
+ // Append the new samples used for prediction.
+ $prevsamples = json_decode($predictionrange->sampleids, true);
+ $predictionrange->sampleids = json_encode($prevsamples + $sampleids);
+ $predictionrange->timemodified = time();
+ $DB->update_record('analytics_predict_samples', $predictionrange);
+ } else {
+ $predictionrange = (object)$params;
+ $predictionrange->sampleids = json_encode($sampleids);
+ $predictionrange->timecreated = time();
+ $predictionrange->timemodified = $predictionrange->timecreated;
+ $DB->insert_record('analytics_predict_samples', $predictionrange);
}
}
}
*/
public function any_prediction_obtained() {
global $DB;
- return $DB->record_exists('analytics_predict_ranges',
+ return $DB->record_exists('analytics_predict_samples',
array('modelid' => $this->model->id, 'timesplitting' => $this->model->timesplitting));
}
private function clear_model() {
global $DB;
- $DB->delete_records('analytics_predict_ranges', array('modelid' => $this->model->id));
$DB->delete_records('analytics_predictions', array('modelid' => $this->model->id));
+ $DB->delete_records('analytics_predict_samples', array('modelid' => $this->model->id));
$DB->delete_records('analytics_train_samples', array('modelid' => $this->model->id));
$DB->delete_records('analytics_used_files', array('modelid' => $this->model->id));
require_once(__DIR__ . '/fixtures/test_target_shortname.php');
require_once(__DIR__ . '/fixtures/test_static_target_shortname.php');
+require_once(__DIR__ . '/../../course/lib.php');
+
/**
* Unit tests for evaluation, training and prediction.
*
}
// 1 range for each analysable.
- $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
+ $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
$this->assertCount(2, $predictedranges);
$this->assertEquals(1, $DB->count_records('analytics_used_files',
array('modelid' => $model->get_id(), 'action' => 'predicted')));
// No new generated files nor records as there are no new courses available.
$model->predict();
- $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
+ $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
$this->assertCount(2, $predictedranges);
$this->assertEquals(1, $DB->count_records('analytics_used_files',
array('modelid' => $model->get_id(), 'action' => 'predicted')));
*
* @dataProvider provider_ml_training_and_prediction
* @param string $timesplittingid
- * @param int $npredictedranges
+ * @param int $predictedrangeindex
* @param string $predictionsprocessorclass
* @return void
*/
- public function test_ml_training_and_prediction($timesplittingid, $npredictedranges, $predictionsprocessorclass) {
+ public function test_ml_training_and_prediction($timesplittingid, $predictedrangeindex, $predictionsprocessorclass) {
global $DB;
$this->resetAfterTest(true);
$this->assertEquals($correct[$sampleid], $predictiondata->prediction);
}
- // 2 ranges will be predicted.
- $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
- $this->assertCount($npredictedranges, $predictedranges);
+ // 1 range will be predicted.
+ $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
+ $this->assertCount(1, $predictedranges);
+ foreach ($predictedranges as $predictedrange) {
+ $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
+ $sampleids = json_decode($predictedrange->sampleids, true);
+ $this->assertCount(2, $sampleids);
+ $this->assertContains($course1->id, $sampleids);
+ $this->assertContains($course2->id, $sampleids);
+ }
$this->assertEquals(1, $DB->count_records('analytics_used_files',
array('modelid' => $model->get_id(), 'action' => 'predicted')));
- // 2 predictions for each range.
- $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions',
+ // 2 predictions.
+ $this->assertEquals(2, $DB->count_records('analytics_predictions',
array('modelid' => $model->get_id())));
// No new generated files nor records as there are no new courses available.
$model->predict();
- $predictedranges = $DB->get_records('analytics_predict_ranges', array('modelid' => $model->get_id()));
- $this->assertCount($npredictedranges, $predictedranges);
+ $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
+ $this->assertCount(1, $predictedranges);
+ foreach ($predictedranges as $predictedrange) {
+ $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
+ }
$this->assertEquals(1, $DB->count_records('analytics_used_files',
array('modelid' => $model->get_id(), 'action' => 'predicted')));
- $this->assertEquals(2 * $npredictedranges, $DB->count_records('analytics_predictions',
+ $this->assertEquals(2, $DB->count_records('analytics_predictions',
+ array('modelid' => $model->get_id())));
+
+ // New samples that can be used for prediction.
+ $courseparams = $params + array('shortname' => 'cccccc', 'fullname' => 'cccccc', 'visible' => 0);
+ $course3 = $this->getDataGenerator()->create_course($courseparams);
+ $courseparams = $params + array('shortname' => 'dddddd', 'fullname' => 'dddddd', 'visible' => 0);
+ $course4 = $this->getDataGenerator()->create_course($courseparams);
+
+ $result = $model->predict();
+
+ $predictedranges = $DB->get_records('analytics_predict_samples', array('modelid' => $model->get_id()));
+ $this->assertCount(1, $predictedranges);
+ foreach ($predictedranges as $predictedrange) {
+ $this->assertEquals($predictedrangeindex, $predictedrange->rangeindex);
+ $sampleids = json_decode($predictedrange->sampleids, true);
+ $this->assertCount(4, $sampleids);
+ $this->assertContains($course1->id, $sampleids);
+ $this->assertContains($course2->id, $sampleids);
+ $this->assertContains($course3->id, $sampleids);
+ $this->assertContains($course4->id, $sampleids);
+ }
+ $this->assertEquals(2, $DB->count_records('analytics_used_files',
+ array('modelid' => $model->get_id(), 'action' => 'predicted')));
+ $this->assertEquals(4, $DB->count_records('analytics_predictions',
+ array('modelid' => $model->get_id())));
+
+ // New visible course (for training).
+ $course5 = $this->getDataGenerator()->create_course(array('shortname' => 'aaa', 'fullname' => 'aa'));
+ $course6 = $this->getDataGenerator()->create_course();
+ $result = $model->train();
+ $this->assertEquals(2, $DB->count_records('analytics_used_files',
+ array('modelid' => $model->get_id(), 'action' => 'trained')));
+
+ // Update one of the courses to not visible, it should be used again for prediction.
+ $course5->visible = 0;
+ update_course($course5);
+
+ $model->predict();
+ $this->assertEquals(1, $DB->count_records('analytics_predict_samples',
+ array('modelid' => $model->get_id())));
+ $this->assertEquals(2, $DB->count_records('analytics_used_files',
+ array('modelid' => $model->get_id(), 'action' => 'predicted')));
+ $this->assertEquals(4, $DB->count_records('analytics_predictions',
array('modelid' => $model->get_id())));
set_config('enabled_stores', '', 'tool_log');
*/
public function provider_ml_training_and_prediction() {
$cases = array(
- 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 1),
- 'quarters' => array('\core\analytics\time_splitting\quarters', 4)
+ 'no_splitting' => array('\core\analytics\time_splitting\no_splitting', 0),
+ 'quarters' => array('\core\analytics\time_splitting\quarters', 3)
);
// We need to test all system prediction processors.
$string['invalidtimesplitting'] = 'Model with id {$a} needs a time splitting method before it can be used to train';
$string['invalidanalysablefortimesplitting'] = 'It can not be analysed using {$a} time splitting method';
$string['nocourses'] = 'No courses to analyse';
-$string['nodata'] = 'No data available';
$string['modeloutputdir'] = 'Models output directory';
$string['modeloutputdirinfo'] = 'Directory where prediction processors store all evaluation info. Useful for debugging and research.';
$string['noevaluationbasedassumptions'] = 'Models based on assumptions can not be evaluated';
+$string['nodata'] = 'No data to analyse';
$string['noinsightsmodel'] = 'This model does not generate insights';
$string['noinsights'] = 'No insights reported';
$string['nonewdata'] = 'No new data available';
+$string['nonewranges'] = 'No new predictions yet';
$string['nonewtimeranges'] = 'No new time ranges, nothing to predict';
$string['nopredictionsyet'] = 'No predictions available yet';
+$string['noranges'] = 'No predictions yet';
$string['notrainingbasedassumptions'] = 'Models based on assumptions do not need training';
$string['novaliddata'] = 'No valid data available';
$string['novalidsamples'] = 'No valid samples available';
<?xml version="1.0" encoding="UTF-8" ?>
-<XMLDB PATH="lib/db" VERSION="20170502" COMMENT="XMLDB file for core Moodle tables"
+<XMLDB PATH="lib/db" VERSION="20170721" COMMENT="XMLDB file for core Moodle tables"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="../../lib/xmldb/xmldb.xsd"
>
<FIELD NAME="target" TYPE="char" LENGTH="255" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="indicators" TYPE="text" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timesplitting" TYPE="char" LENGTH="255" NOTNULL="false" SEQUENCE="false"/>
- <FIELD NAME="score" TYPE="number" LENGTH="10" DECIMALS="5" NOTNULL="true" DEFAULT="0" SEQUENCE="false"/>
+ <FIELD NAME="score" TYPE="number" LENGTH="10" NOTNULL="true" DEFAULT="0" SEQUENCE="false" DECIMALS="5"/>
<FIELD NAME="info" TYPE="text" NOTNULL="false" SEQUENCE="false"/>
<FIELD NAME="dir" TYPE="text" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timecreated" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<INDEX NAME="modelid" UNIQUE="false" FIELDS="modelid" COMMENT="Index on modelid"/>
</INDEXES>
</TABLE>
-
<TABLE NAME="analytics_predictions" COMMENT="Predictions">
<FIELDS>
<FIELD NAME="id" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="true"/>
<FIELD NAME="sampleid" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="rangeindex" TYPE="int" LENGTH="5" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="prediction" TYPE="int" LENGTH="2" NOTNULL="true" SEQUENCE="false"/>
- <FIELD NAME="predictionscore" TYPE="number" LENGTH="10" DECIMALS="5" NOTNULL="true" SEQUENCE="false"/>
+ <FIELD NAME="predictionscore" TYPE="number" LENGTH="10" NOTNULL="true" SEQUENCE="false" DECIMALS="5"/>
<FIELD NAME="calculations" TYPE="text" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timecreated" TYPE="int" LENGTH="10" NOTNULL="true" DEFAULT="0" SEQUENCE="false"/>
</FIELDS>
<INDEX NAME="modelidandanalysableidandtimesplitting" UNIQUE="false" FIELDS="modelid, analysableid, timesplitting" COMMENT="Index on modelid and analysableid and timesplitting"/>
</INDEXES>
</TABLE>
- <TABLE NAME="analytics_predict_ranges" COMMENT="Time ranges already used for predictions.">
+ <TABLE NAME="analytics_predict_samples" COMMENT="Samples already used for predictions.">
<FIELDS>
<FIELD NAME="id" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="true"/>
<FIELD NAME="modelid" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="analysableid" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timesplitting" TYPE="char" LENGTH="255" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="rangeindex" TYPE="int" LENGTH="10" NOTNULL="true" SEQUENCE="false"/>
+ <FIELD NAME="sampleids" TYPE="text" NOTNULL="true" SEQUENCE="false"/>
<FIELD NAME="timecreated" TYPE="int" LENGTH="10" NOTNULL="true" DEFAULT="0" SEQUENCE="false"/>
+ <FIELD NAME="timemodified" TYPE="int" LENGTH="10" NOTNULL="true" DEFAULT="0" SEQUENCE="false"/>
</FIELDS>
<KEYS>
<KEY NAME="primary" TYPE="primary" FIELDS="id"/>
</KEYS>
<INDEXES>
- <INDEX NAME="modelidandanalysableidandtimesplitting" UNIQUE="false" FIELDS="modelid, analysableid, timesplitting" COMMENT="Index on modelid and analysableid and timesplitting"/>
+ <INDEX NAME="modelidandanalysableidandtimesplittingandrangeindex" UNIQUE="false" FIELDS="modelid, analysableid, timesplitting, rangeindex" COMMENT="Index on modelid and analysableid and timesplitting"/>
</INDEXES>
</TABLE>
<TABLE NAME="analytics_used_files" COMMENT="Files that have already been used for training and prediction.">
</INDEXES>
</TABLE>
</TABLES>
-</XMLDB>
+</XMLDB>
\ No newline at end of file
upgrade_main_savepoint(true, 2017072700.02);
}
+ if ($oldversion < 2017080400.01) {
+
+ // Get the table by its previous name.
+ $table = new xmldb_table('analytics_predict_ranges');
+ if ($dbman->table_exists($table)) {
+
+ // We can only accept this because we are in master.
+ $DB->delete_records('analytics_predictions');
+ $DB->delete_records('analytics_used_files', array('action' => 'predicted'));
+ $DB->delete_records('analytics_predict_ranges');
+
+ // Define field sampleids to be added to analytics_predict_ranges (renamed below to analytics_predict_samples).
+ $field = new xmldb_field('sampleids', XMLDB_TYPE_TEXT, null, null, XMLDB_NOTNULL, null, null, 'rangeindex');
+
+ // Conditionally launch add field sampleids.
+ if (!$dbman->field_exists($table, $field)) {
+ $dbman->add_field($table, $field);
+ }
+
+ // Define field timemodified to be added to analytics_predict_ranges (renamed below to analytics_predict_samples).
+ $field = new xmldb_field('timemodified', XMLDB_TYPE_INTEGER, '10', null, XMLDB_NOTNULL, null, '0', 'timecreated');
+
+ // Conditionally launch add field timemodified.
+ if (!$dbman->field_exists($table, $field)) {
+ $dbman->add_field($table, $field);
+ }
+
+ // Rename the table to its new name.
+ $dbman->rename_table($table, 'analytics_predict_samples');
+ }
+
+ // Main savepoint reached.
+ upgrade_main_savepoint(true, 2017080400.01);
+ }
+
return true;
}
defined('MOODLE_INTERNAL') || die();
-$version = 2017080400.00; // YYYYMMDD = weekly release date of this DEV branch.
+$version = 2017080400.01; // YYYYMMDD = weekly release date of this DEV branch.
// RR = release increments - 00 in DEV branches.
// .XX = incremental changes.