2 // This file is part of Moodle - http://moodle.org/
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 // GNU General Public License for more details.
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
18 * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
21 * @subpackage questionbank
22 * @copyright 2013 Open University
23 * @author Jamie Pratt <me@jamiep.org>
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
31 * This class has methods to compute the question statistics from the raw data.
33 * @copyright 2013 Open University
34 * @author Jamie Pratt <me@jamiep.org>
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
42 public $questionstats = array();
45 * @var calculated_for_subquestion[]
47 public $subquestionstats = array();
52 protected $sumofmarkvariance = 0;
54 protected $randomselectors = array();
57 * @var \progress_trace
64 * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
65 * we expect some extra fields - slot, maxmark and number on the full question data objects.
66 * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\null}.
68 public function __construct($questions, $progress = null) {
70 if ($progress === null) {
71 $progress = new \core\progress\null();
73 $this->progress = $progress;
75 foreach ($questions as $slot => $question) {
76 $this->questionstats[$slot] = $this->new_slot_stats($question, $slot);
81 * Set up a calculated instance ready to store a questions stats.
87 protected function new_slot_stats($question, $slot) {
88 $toreturn = new calculated();
89 $toreturn->questionid = $question->id;
90 $toreturn->maxmark = $question->maxmark;
91 $toreturn->question = $question;
92 $toreturn->slot = $slot;
93 $toreturn->positions = $question->number;
94 $toreturn->randomguessscore = $this->get_random_guess_score($question);
99 * Set up a calculated instance ready to store a randomly selected question's stats.
102 * @return calculated_for_subquestion
104 protected function new_subq_stats($step) {
105 $toreturn = new calculated_for_subquestion();
106 $toreturn->questionid = $step->questionid;
107 $toreturn->maxmark = $step->maxmark;
112 * @param $qubaids \qubaid_condition
113 * @return array containing two arrays calculated[] and calculated_for_subquestion[].
115 public function calculate($qubaids) {
117 $this->progress->start_progress('', 6);
119 list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
122 $this->progress->start_progress('', count($lateststeps), 1);
123 // Compute the statistics of position, and for random questions, work
124 // out which questions appear in which positions.
125 foreach ($lateststeps as $step) {
127 $this->progress->increment_progress();
129 $israndomquestion = ($step->questionid != $this->questionstats[$step->slot]->questionid);
130 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
131 if (!$israndomquestion && !isset($this->questionstats[$step->slot]->variantstats[$step->variant])) {
132 $this->questionstats[$step->slot]->variantstats[$step->variant] =
133 $this->new_slot_stats($this->questionstats[$step->slot]->question, $step->slot);
134 $this->questionstats[$step->slot]->variantstats[$step->variant]->variant = $step->variant;
138 // Step data walker for main question.
139 $this->initial_steps_walker($step, $this->questionstats[$step->slot], $summarks, true, !$israndomquestion);
141 // If this is a random question do the calculations for sub question stats.
142 if ($israndomquestion) {
143 if (!isset($this->subquestionstats[$step->questionid])) {
144 $this->subquestionstats[$step->questionid] = $this->new_subq_stats($step);
145 } else if ($this->subquestionstats[$step->questionid]->maxmark != $step->maxmark) {
146 $this->subquestionstats[$step->questionid]->differentweights = true;
149 // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
150 if (!isset($this->subquestionstats[$step->questionid]->variantstats[$step->variant])) {
151 $this->subquestionstats[$step->questionid]->variantstats[$step->variant] = $this->new_subq_stats($step);
152 $this->subquestionstats[$step->questionid]->variantstats[$step->variant]->variant = $step->variant;
155 $this->initial_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, false);
157 // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
159 $number = $this->questionstats[$step->slot]->question->number;
160 $this->subquestionstats[$step->questionid]->usedin[$number] = $number;
162 // Keep track of which random questions are actually selected from each pool of questions that random
163 // questions are pulled from.
164 $randomselectorstring = $this->questionstats[$step->slot]->question->category. '/'
165 .$this->questionstats[$step->slot]->question->questiontext;
166 if (!isset($this->randomselectors[$randomselectorstring])) {
167 $this->randomselectors[$randomselectorstring] = array();
169 $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
172 $this->progress->end_progress();
174 foreach ($this->randomselectors as $key => $notused) {
175 ksort($this->randomselectors[$key]);
178 $subquestions = question_load_questions(array_keys($this->subquestionstats));
179 // Compute the statistics for sub questions, if there are any.
180 $this->progress->start_progress('', count($subquestions), 1);
181 foreach ($subquestions as $qid => $subquestion) {
182 $this->progress->increment_progress();
183 $subquestion->maxmark = $this->subquestionstats[$qid]->maxmark;
184 $this->subquestionstats[$qid]->question = $subquestion;
185 $this->subquestionstats[$qid]->randomguessscore = $this->get_random_guess_score($subquestion);
187 foreach ($this->subquestionstats[$qid]->variantstats as $variantstat) {
188 $variantstat->question = $subquestion;
189 $variantstat->randomguessscore = $this->get_random_guess_score($subquestion);
192 $this->initial_question_walker($this->subquestionstats[$qid]);
194 if ($this->subquestionstats[$qid]->differentweights) {
195 // TODO output here really sucks, but throwing is too severe.
197 $name = $this->subquestionstats[$qid]->question->name;
198 echo $OUTPUT->notification( get_string('erroritemappearsmorethanoncewithdifferentweight',
199 'quiz_statistics', $name));
202 if ($this->subquestionstats[$qid]->usedin) {
203 sort($this->subquestionstats[$qid]->usedin, SORT_NUMERIC);
204 $this->subquestionstats[$qid]->positions = implode(',', $this->subquestionstats[$qid]->usedin);
206 $this->subquestionstats[$qid]->positions = '';
209 $this->progress->end_progress();
211 // Finish computing the averages, and put the subquestion data into the
212 // corresponding questions.
214 // This cannot be a foreach loop because we need to have both
215 // $question and $nextquestion available, but apart from that it is
216 // foreach ($this->questions as $qid => $question).
217 reset($this->questionstats);
218 $this->progress->start_progress('', count($this->questionstats), 1);
219 while (list(, $questionstat) = each($this->questionstats)) {
220 $this->progress->increment_progress();
221 $nextquestionstats = current($this->questionstats);
223 $this->initial_question_walker($questionstat);
225 // The rest of this loop is again to work out where randomly selected question stats should be displayed.
226 if ($questionstat->question->qtype == 'random') {
227 $randomselectorstring = $questionstat->question->category .'/'. $questionstat->question->questiontext;
228 if ($nextquestionstats && $nextquestionstats->question->qtype == 'random') {
229 $nextrandomselectorstring =
230 $nextquestionstats->question->category .'/'. $nextquestionstats->question->questiontext;
231 if ($randomselectorstring == $nextrandomselectorstring) {
232 continue; // Next loop iteration.
235 if (isset($this->randomselectors[$randomselectorstring])) {
236 $questionstat->subquestions = implode(',', $this->randomselectors[$randomselectorstring]);
240 $this->progress->end_progress();
242 // Go through the records one more time.
243 $this->progress->start_progress('', count($lateststeps), 1);
244 foreach ($lateststeps as $step) {
245 $this->progress->increment_progress();
246 $israndomquestion = ($this->questionstats[$step->slot]->question->qtype == 'random');
247 $this->secondary_steps_walker($step, $this->questionstats[$step->slot], $summarks, !$israndomquestion);
249 if ($this->questionstats[$step->slot]->subquestions) {
250 $this->secondary_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks);
253 $this->progress->end_progress();
255 $this->progress->start_progress('', count($this->questionstats), 1);
256 $sumofcovariancewithoverallmark = 0;
257 foreach ($this->questionstats as $questionstat) {
258 $this->progress->increment_progress();
259 $this->secondary_question_walker($questionstat);
261 $this->sumofmarkvariance += $questionstat->markvariance;
263 if ($questionstat->covariancewithoverallmark >= 0) {
264 $sumofcovariancewithoverallmark += sqrt($questionstat->covariancewithoverallmark);
267 $this->progress->end_progress();
269 $this->progress->start_progress('', count($this->subquestionstats), 1);
270 foreach ($this->subquestionstats as $subquestionstat) {
271 $this->progress->increment_progress();
272 $this->secondary_question_walker($subquestionstat);
274 $this->progress->end_progress();
276 foreach ($this->questionstats as $questionstat) {
277 if ($sumofcovariancewithoverallmark) {
278 if ($questionstat->negcovar) {
279 $questionstat->effectiveweight = null;
281 $questionstat->effectiveweight = 100 * sqrt($questionstat->covariancewithoverallmark) /
282 $sumofcovariancewithoverallmark;
285 $questionstat->effectiveweight = null;
288 $this->cache_stats($qubaids);
291 $this->progress->end_progress();
293 return array($this->questionstats, $this->subquestionstats);
297 * Load cached statistics from the database.
299 * @param $qubaids \qubaid_condition
300 * @return array containing two arrays calculated[] and calculated_for_subquestion[].
302 public function get_cached($qubaids) {
304 $timemodified = time() - self::TIME_TO_CACHE;
305 $questionstatrecs = $DB->get_records_select('question_statistics', 'hashcode = ? AND timemodified > ?',
306 array($qubaids->get_hash_code(), $timemodified));
308 $questionids = array();
309 foreach ($questionstatrecs as $fromdb) {
310 if (is_null($fromdb->variant) && !$fromdb->slot) {
311 $questionids[] = $fromdb->questionid;
314 $subquestions = question_load_questions($questionids);
315 foreach ($questionstatrecs as $fromdb) {
316 if (is_null($fromdb->variant)) {
318 $this->questionstats[$fromdb->slot]->populate_from_record($fromdb);
319 // Array created in constructor and populated from question.
321 $this->subquestionstats[$fromdb->questionid] = new calculated_for_subquestion();
322 $this->subquestionstats[$fromdb->questionid]->populate_from_record($fromdb);
323 $this->subquestionstats[$fromdb->questionid]->question = $subquestions[$fromdb->questionid];
327 // Add cached variant stats to data structure.
328 foreach ($questionstatrecs as $fromdb) {
329 if (!is_null($fromdb->variant)) {
331 $newcalcinstance = new calculated();
332 $this->questionstats[$fromdb->slot]->variantstats[$fromdb->variant] = $newcalcinstance;
333 $newcalcinstance->question = $this->questionstats[$fromdb->slot]->question;
335 $newcalcinstance = new calculated_for_subquestion();
336 $this->subquestionstats[$fromdb->questionid]->variantstats[$fromdb->variant] = $newcalcinstance;
337 $newcalcinstance->question = $subquestions[$fromdb->questionid];
339 $newcalcinstance->populate_from_record($fromdb);
342 return array($this->questionstats, $this->subquestionstats);
346 * Find time of non-expired statistics in the database.
348 * @param $qubaids \qubaid_condition
349 * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
351 public function get_last_calculated_time($qubaids) {
354 $timemodified = time() - self::TIME_TO_CACHE;
355 return $DB->get_field_select('question_statistics', 'timemodified', 'hashcode = ? AND timemodified > ?',
356 array($qubaids->get_hash_code(), $timemodified), IGNORE_MULTIPLE);
359 /** @var integer Time after which statistics are automatically recomputed. */
360 const TIME_TO_CACHE = 900; // 15 minutes.
363 * Used when computing Coefficient of Internal Consistency by quiz statistics.
367 public function get_sum_of_mark_variance() {
368 return $this->sumofmarkvariance;
372 * @param $qubaids \qubaid_condition
373 * @return array with two items
374 * - $lateststeps array of latest step data for the question usages
375 * - $summarks array of total marks for each usage, indexed by usage id
377 protected function get_latest_steps($qubaids) {
378 $dm = new \question_engine_data_mapper();
386 qas.fraction * qa.maxmark as mark";
388 $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, array_keys($this->questionstats), $fields);
391 foreach ($lateststeps as $step) {
392 if (!isset($summarks[$step->questionusageid])) {
393 $summarks[$step->questionusageid] = 0;
395 $summarks[$step->questionusageid] += $step->mark;
399 return array($lateststeps, $summarks);
403 * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
404 * and $stats->othermarksarray to include another state.
406 * @param object $step the state to add to the statistics.
407 * @param calculated $stats the question statistics we are accumulating.
408 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
409 * @param bool $positionstat whether this is a statistic of position of question.
410 * @param bool $dovariantalso do we also want to do the same calculations for this variant?
412 protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
414 $stats->totalmarks += $step->mark;
415 $stats->markarray[] = $step->mark;
418 $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
419 $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
422 $stats->totalothermarks += $summarks[$step->questionusageid];
423 $stats->othermarksarray[] = $summarks[$step->questionusageid];
425 if ($dovariantalso) {
426 $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
432 * Perform some computations on the per-question statistics calculations after
433 * we have been through all the step data.
435 * @param calculated $stats question stats to update.
436 * @param bool $dovariantsalso do we also want to do the same calculations for the variants?
438 protected function initial_question_walker($stats, $dovariantsalso = true) {
439 $stats->markaverage = $stats->totalmarks / $stats->s;
441 if ($stats->maxmark != 0) {
442 $stats->facility = $stats->markaverage / $stats->maxmark;
444 $stats->facility = null;
447 $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
449 $stats->summarksaverage = $stats->totalsummarks / $stats->s;
451 sort($stats->markarray, SORT_NUMERIC);
452 sort($stats->othermarksarray, SORT_NUMERIC);
454 if ($dovariantsalso) {
455 foreach ($stats->variantstats as $variantstat) {
456 $this->initial_question_walker($variantstat, false);
462 * Now we know the averages, accumulate the date needed to compute the higher
463 * moments of the question scores.
465 * @param object $step the state to add to the statistics.
466 * @param calculated $stats the question statistics we are accumulating.
467 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
468 * @param bool $dovariantalso do we also want to do the same calculations for the variant?
470 protected function secondary_steps_walker($step, $stats, $summarks, $dovariantalso = true) {
471 $markdifference = $step->mark - $stats->markaverage;
472 if ($stats->subquestion) {
473 $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
475 $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
477 $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
479 $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
480 $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
482 $stats->markvariancesum += pow($markdifference, 2);
483 $stats->othermarkvariancesum += pow($othermarkdifference, 2);
484 $stats->covariancesum += $markdifference * $othermarkdifference;
485 $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
486 $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
488 if ($dovariantalso) {
489 $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks, false);
494 * Perform more per-question statistics calculations.
496 * @param calculated $stats question stats to update.
497 * @param bool $dovariantsalso do we also want to do the same calculations for the variants?
499 protected function secondary_question_walker($stats, $dovariantsalso = true) {
502 $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
503 $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
504 $stats->covariance = $stats->covariancesum / ($stats->s - 1);
505 $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
506 $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
508 $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
510 if ($stats->covariancewithoverallmark >= 0) {
511 $stats->negcovar = 0;
513 $stats->negcovar = 1;
516 $stats->markvariance = null;
517 $stats->othermarkvariance = null;
518 $stats->covariance = null;
519 $stats->covariancemax = null;
520 $stats->covariancewithoverallmark = null;
522 $stats->negcovar = 0;
525 if ($stats->markvariance * $stats->othermarkvariance) {
526 $stats->discriminationindex = 100 * $stats->covariance /
527 sqrt($stats->markvariance * $stats->othermarkvariance);
529 $stats->discriminationindex = null;
532 if ($stats->covariancemax) {
533 $stats->discriminativeefficiency = 100 * $stats->covariance /
534 $stats->covariancemax;
536 $stats->discriminativeefficiency = null;
540 if ($dovariantsalso) {
541 foreach ($stats->variantstats as $variantstat) {
542 $this->secondary_question_walker($variantstat, false);
548 * @param object $questiondata
549 * @return number the random guess score for this question.
551 protected function get_random_guess_score($questiondata) {
552 return \question_bank::get_qtype(
553 $questiondata->qtype, false)->get_random_guess_score($questiondata);
557 * @param $qubaids \qubaid_condition
559 protected function cache_stats($qubaids) {
560 foreach ($this->questionstats as $questionstat) {
561 $questionstat->cache($qubaids);
564 foreach ($this->subquestionstats as $subquestionstat) {
565 $subquestionstat->cache($qubaids);