MDL-70374 qtype_multichoice: improve alignment of choices
[moodle.git] / question / classes / statistics / questions / calculator.php
CommitLineData
e68e4ccf
JP
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
515b3ae6 18 * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
e68e4ccf
JP
19 *
20 * @package core
21 * @subpackage questionbank
22 * @copyright 2013 Open University
23 * @author Jamie Pratt <me@jamiep.org>
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 */
26
515b3ae6 27namespace core_question\statistics\questions;
e68e4ccf
JP
28defined('MOODLE_INTERNAL') || die();
29
e68e4ccf
JP
30/**
31 * This class has methods to compute the question statistics from the raw data.
32 *
33 * @copyright 2013 Open University
34 * @author Jamie Pratt <me@jamiep.org>
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36 */
515b3ae6 37class calculator {
e68e4ccf
JP
38
39 /**
c3e2e754
JP
40 * @var all_calculated_for_qubaid_condition all the stats calculated for slots and sub-questions and variants of those
41 * questions.
e68e4ccf 42 */
c3e2e754 43 protected $stats;
e68e4ccf
JP
44
45 /**
515b3ae6 46 * @var float
e68e4ccf 47 */
515b3ae6 48 protected $sumofmarkvariance = 0;
e68e4ccf 49
c3e2e754
JP
50 /**
51 * @var array[] keyed by a string representing the pool of questions that this random question draws from.
52 * string as returned from {@link \core_question\statistics\questions\calculated::random_selector_string}
53 */
515b3ae6 54 protected $randomselectors = array();
e68e4ccf 55
8da6fc9d
JP
56 /**
57 * @var \progress_trace
58 */
59 protected $progress;
60
fcdd04db
JP
61 /**
62 * @var string The class name of the class to instantiate to store statistics calculated.
63 */
c3e2e754
JP
64 protected $statscollectionclassname = '\core_question\statistics\questions\all_calculated_for_qubaid_condition';
65
515b3ae6
JP
66 /**
67 * Constructor.
68 *
69 * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
70 * we expect some extra fields - slot, maxmark and number on the full question data objects.
303936aa 71 * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\none}.
515b3ae6 72 */
8da6fc9d
JP
73 public function __construct($questions, $progress = null) {
74
75 if ($progress === null) {
303936aa 76 $progress = new \core\progress\none();
8da6fc9d
JP
77 }
78 $this->progress = $progress;
c3e2e754 79 $this->stats = new $this->statscollectionclassname();
515b3ae6 80 foreach ($questions as $slot => $question) {
c3e2e754
JP
81 $this->stats->initialise_for_slot($slot, $question);
82 $this->stats->for_slot($slot)->randomguessscore = $this->get_random_guess_score($question);
e68e4ccf 83 }
e68e4ccf
JP
84 }
85
86 /**
fcdd04db
JP
87 * Calculate the stats.
88 *
89 * @param \qubaid_condition $qubaids Which question usages to calculate the stats for?
90 * @return all_calculated_for_qubaid_condition The calculated stats.
e68e4ccf
JP
91 */
92 public function calculate($qubaids) {
8da6fc9d
JP
93
94 $this->progress->start_progress('', 6);
e68e4ccf 95
8e328617 96 list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
e68e4ccf
JP
97
98 if ($lateststeps) {
8da6fc9d 99 $this->progress->start_progress('', count($lateststeps), 1);
e68e4ccf
JP
100 // Compute the statistics of position, and for random questions, work
101 // out which questions appear in which positions.
102 foreach ($lateststeps as $step) {
1239d287 103
aa05ae5d 104 $this->progress->increment_progress();
e68e4ccf 105
c3e2e754 106 $israndomquestion = ($step->questionid != $this->stats->for_slot($step->slot)->questionid);
3d6f2466 107 $breakdownvariants = !$israndomquestion && $this->stats->for_slot($step->slot)->break_down_by_variant();
1239d287 108 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
bccad386 109 if ($breakdownvariants && !$this->stats->has_slot($step->slot, $step->variant)) {
3d6f2466
JP
110 $question = $this->stats->for_slot($step->slot)->question;
111 $this->stats->initialise_for_slot($step->slot, $question, $step->variant);
c3e2e754 112 $this->stats->for_slot($step->slot, $step->variant)->randomguessscore =
3d6f2466 113 $this->get_random_guess_score($question);
1239d287
JP
114 }
115
1239d287 116 // Step data walker for main question.
3d6f2466 117 $this->initial_steps_walker($step, $this->stats->for_slot($step->slot), $summarks, true, $breakdownvariants);
1239d287
JP
118
119 // If this is a random question do the calculations for sub question stats.
120 if ($israndomquestion) {
bccad386 121 if (!$this->stats->has_subq($step->questionid)) {
c3e2e754
JP
122 $this->stats->initialise_for_subq($step);
123 } else if ($this->stats->for_subq($step->questionid)->maxmark != $step->maxmark) {
124 $this->stats->for_subq($step->questionid)->differentweights = true;
e68e4ccf
JP
125 }
126
1239d287 127 // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
bccad386 128 if (!$this->stats->has_subq($step->questionid, $step->variant)) {
c3e2e754 129 $this->stats->initialise_for_subq($step, $step->variant);
1239d287
JP
130 }
131
c3e2e754 132 $this->initial_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks, false);
e68e4ccf 133
1239d287
JP
134 // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
135
c3e2e754
JP
136 $number = $this->stats->for_slot($step->slot)->question->number;
137 $this->stats->for_subq($step->questionid)->usedin[$number] = $number;
e68e4ccf 138
aa05ae5d
JP
139 // Keep track of which random questions are actually selected from each pool of questions that random
140 // questions are pulled from.
c3e2e754 141 $randomselectorstring = $this->stats->for_slot($step->slot)->random_selector_string();
e68e4ccf
JP
142 if (!isset($this->randomselectors[$randomselectorstring])) {
143 $this->randomselectors[$randomselectorstring] = array();
144 }
515b3ae6 145 $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
e68e4ccf
JP
146 }
147 }
8da6fc9d 148 $this->progress->end_progress();
e68e4ccf
JP
149
150 foreach ($this->randomselectors as $key => $notused) {
151 ksort($this->randomselectors[$key]);
c3e2e754 152 $this->randomselectors[$key] = implode(',', $this->randomselectors[$key]);
e68e4ccf
JP
153 }
154
c3e2e754 155 $this->stats->subquestions = question_load_questions($this->stats->get_all_subq_ids());
aa05ae5d 156 // Compute the statistics for sub questions, if there are any.
c3e2e754
JP
157 $this->progress->start_progress('', count($this->stats->subquestions), 1);
158 foreach ($this->stats->subquestions as $qid => $subquestion) {
aa05ae5d 159 $this->progress->increment_progress();
c3e2e754
JP
160 $subquestion->maxmark = $this->stats->for_subq($qid)->maxmark;
161 $this->stats->for_subq($qid)->question = $subquestion;
162 $this->stats->for_subq($qid)->randomguessscore = $this->get_random_guess_score($subquestion);
163
bec7719c 164 if ($variants = $this->stats->for_subq($qid)->get_variants()) {
c3e2e754
JP
165 foreach ($variants as $variant) {
166 $this->stats->for_subq($qid, $variant)->question = $subquestion;
167 $this->stats->for_subq($qid, $variant)->randomguessscore = $this->get_random_guess_score($subquestion);
168 }
3d6f2466 169 $this->stats->for_subq($qid)->sort_variants();
1239d287 170 }
c3e2e754 171 $this->initial_question_walker($this->stats->for_subq($qid));
e68e4ccf 172
c3e2e754
JP
173 if ($this->stats->for_subq($qid)->usedin) {
174 sort($this->stats->for_subq($qid)->usedin, SORT_NUMERIC);
175 $this->stats->for_subq($qid)->positions = implode(',', $this->stats->for_subq($qid)->usedin);
e68e4ccf 176 } else {
c3e2e754 177 $this->stats->for_subq($qid)->positions = '';
e68e4ccf
JP
178 }
179 }
8da6fc9d 180 $this->progress->end_progress();
e68e4ccf 181
c3e2e754 182 // Finish computing the averages, and put the sub-question data into the
e68e4ccf 183 // corresponding questions.
c3e2e754 184 $slots = $this->stats->get_all_slots();
40633c63
RW
185 $totalnumberofslots = count($slots);
186 $maxindex = $totalnumberofslots - 1;
187 $this->progress->start_progress('', $totalnumberofslots, 1);
188 foreach ($slots as $index => $slot) {
c3e2e754 189 $this->stats->for_slot($slot)->sort_variants();
aa05ae5d 190 $this->progress->increment_progress();
40633c63
RW
191 $nextslotindex = $index + 1;
192 $nextslot = ($nextslotindex > $maxindex) ? false : $slots[$nextslotindex];
c3e2e754
JP
193
194 $this->initial_question_walker($this->stats->for_slot($slot));
195
196 // The rest of this loop is to finish working out where randomly selected question stats should be displayed.
197 if ($this->stats->for_slot($slot)->question->qtype == 'random') {
198 $randomselectorstring = $this->stats->for_slot($slot)->random_selector_string();
199 if ($nextslot && ($randomselectorstring == $this->stats->for_slot($nextslot)->random_selector_string())) {
200 continue; // Next loop iteration.
e68e4ccf
JP
201 }
202 if (isset($this->randomselectors[$randomselectorstring])) {
c3e2e754 203 $this->stats->for_slot($slot)->subquestions = $this->randomselectors[$randomselectorstring];
e68e4ccf
JP
204 }
205 }
206 }
8da6fc9d 207 $this->progress->end_progress();
e68e4ccf
JP
208
209 // Go through the records one more time.
8da6fc9d 210 $this->progress->start_progress('', count($lateststeps), 1);
e68e4ccf 211 foreach ($lateststeps as $step) {
aa05ae5d 212 $this->progress->increment_progress();
c3e2e754 213 $israndomquestion = ($this->stats->for_slot($step->slot)->question->qtype == 'random');
3d6f2466 214 $this->secondary_steps_walker($step, $this->stats->for_slot($step->slot), $summarks);
e68e4ccf 215
3d6f2466 216 if ($israndomquestion) {
c3e2e754 217 $this->secondary_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks);
e68e4ccf
JP
218 }
219 }
8da6fc9d 220 $this->progress->end_progress();
e68e4ccf 221
c3e2e754
JP
222 $slots = $this->stats->get_all_slots();
223 $this->progress->start_progress('', count($slots), 1);
e68e4ccf 224 $sumofcovariancewithoverallmark = 0;
c3e2e754 225 foreach ($this->stats->get_all_slots() as $slot) {
aa05ae5d 226 $this->progress->increment_progress();
c3e2e754 227 $this->secondary_question_walker($this->stats->for_slot($slot));
e68e4ccf 228
c3e2e754 229 $this->sumofmarkvariance += $this->stats->for_slot($slot)->markvariance;
e68e4ccf 230
c3e2e754
JP
231 if ($this->stats->for_slot($slot)->covariancewithoverallmark >= 0) {
232 $sumofcovariancewithoverallmark += sqrt($this->stats->for_slot($slot)->covariancewithoverallmark);
e68e4ccf
JP
233 }
234 }
8da6fc9d 235 $this->progress->end_progress();
e68e4ccf 236
c3e2e754
JP
237 $subqids = $this->stats->get_all_subq_ids();
238 $this->progress->start_progress('', count($subqids), 1);
239 foreach ($subqids as $subqid) {
aa05ae5d 240 $this->progress->increment_progress();
c3e2e754 241 $this->secondary_question_walker($this->stats->for_subq($subqid));
e68e4ccf 242 }
8da6fc9d 243 $this->progress->end_progress();
e68e4ccf 244
c3e2e754 245 foreach ($this->stats->get_all_slots() as $slot) {
e68e4ccf 246 if ($sumofcovariancewithoverallmark) {
c3e2e754
JP
247 if ($this->stats->for_slot($slot)->negcovar) {
248 $this->stats->for_slot($slot)->effectiveweight = null;
e68e4ccf 249 } else {
c3e2e754
JP
250 $this->stats->for_slot($slot)->effectiveweight =
251 100 * sqrt($this->stats->for_slot($slot)->covariancewithoverallmark) /
252 $sumofcovariancewithoverallmark;
e68e4ccf
JP
253 }
254 } else {
c3e2e754 255 $this->stats->for_slot($slot)->effectiveweight = null;
e68e4ccf
JP
256 }
257 }
c3e2e754 258 $this->stats->cache($qubaids);
8da6fc9d
JP
259
260 // All finished.
261 $this->progress->end_progress();
e68e4ccf 262 }
c3e2e754 263 return $this->stats;
e68e4ccf
JP
264 }
265
515b3ae6
JP
266 /**
267 * Used when computing Coefficient of Internal Consistency by quiz statistics.
268 *
269 * @return float
270 */
271 public function get_sum_of_mark_variance() {
272 return $this->sumofmarkvariance;
273 }
274
275 /**
fcdd04db
JP
276 * Get the latest step data from the db, from which we will calculate stats.
277 *
278 * @param \qubaid_condition $qubaids Which question usages to get the latest steps for?
8e328617 279 * @return array with two items
515b3ae6
JP
280 * - $lateststeps array of latest step data for the question usages
281 * - $summarks array of total marks for each usage, indexed by usage id
515b3ae6
JP
282 */
283 protected function get_latest_steps($qubaids) {
284 $dm = new \question_engine_data_mapper();
285
286 $fields = " qas.id,
287 qa.questionusageid,
288 qa.questionid,
1239d287 289 qa.variant,
515b3ae6
JP
290 qa.slot,
291 qa.maxmark,
292 qas.fraction * qa.maxmark as mark";
293
c3e2e754 294 $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, $this->stats->get_all_slots(), $fields);
515b3ae6
JP
295 $summarks = array();
296 if ($lateststeps) {
297 foreach ($lateststeps as $step) {
298 if (!isset($summarks[$step->questionusageid])) {
299 $summarks[$step->questionusageid] = 0;
300 }
301 $summarks[$step->questionusageid] += $step->mark;
302 }
e68e4ccf
JP
303 }
304
8e328617 305 return array($lateststeps, $summarks);
e68e4ccf
JP
306 }
307
308 /**
3d6f2466
JP
309 * Calculating the stats is a four step process.
310 *
311 * We loop through all 'last step' data first.
312 *
e68e4ccf
JP
313 * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
314 * and $stats->othermarksarray to include another state.
315 *
1239d287 316 * @param object $step the state to add to the statistics.
515b3ae6 317 * @param calculated $stats the question statistics we are accumulating.
1239d287
JP
318 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
319 * @param bool $positionstat whether this is a statistic of position of question.
320 * @param bool $dovariantalso do we also want to do the same calculations for this variant?
e68e4ccf 321 */
1239d287 322 protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
e68e4ccf
JP
323 $stats->s++;
324 $stats->totalmarks += $step->mark;
325 $stats->markarray[] = $step->mark;
326
327 if ($positionstat) {
328 $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
329 $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
330
331 } else {
332 $stats->totalothermarks += $summarks[$step->questionusageid];
333 $stats->othermarksarray[] = $summarks[$step->questionusageid];
334 }
1239d287
JP
335 if ($dovariantalso) {
336 $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
1239d287 337 }
e68e4ccf
JP
338 }
339
340 /**
3d6f2466
JP
341 * Then loop through all questions for the first time.
342 *
e68e4ccf 343 * Perform some computations on the per-question statistics calculations after
1239d287 344 * we have been through all the step data.
e68e4ccf 345 *
515b3ae6 346 * @param calculated $stats question stats to update.
e68e4ccf 347 */
3d6f2466 348 protected function initial_question_walker($stats) {
e68e4ccf
JP
349 $stats->markaverage = $stats->totalmarks / $stats->s;
350
351 if ($stats->maxmark != 0) {
352 $stats->facility = $stats->markaverage / $stats->maxmark;
353 } else {
354 $stats->facility = null;
355 }
356
357 $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
358
8e328617
JP
359 $stats->summarksaverage = $stats->totalsummarks / $stats->s;
360
e68e4ccf
JP
361 sort($stats->markarray, SORT_NUMERIC);
362 sort($stats->othermarksarray, SORT_NUMERIC);
1239d287 363
3d6f2466
JP
364 // Here we have collected enough data to make the decision about which questions have variants whose stats we also want to
365 // calculate. We delete the initialised structures where they are not needed.
366 if (!$stats->get_variants() || !$stats->break_down_by_variant()) {
367 $stats->clear_variants();
368 }
369
370 foreach ($stats->get_variants() as $variant) {
371 $this->initial_question_walker($stats->variantstats[$variant]);
1239d287 372 }
e68e4ccf
JP
373 }
374
375 /**
3d6f2466
JP
376 * Loop through all last step data again.
377 *
e68e4ccf
JP
378 * Now we know the averages, accumulate the date needed to compute the higher
379 * moments of the question scores.
380 *
515b3ae6
JP
381 * @param object $step the state to add to the statistics.
382 * @param calculated $stats the question statistics we are accumulating.
fcdd04db 383 * @param float[] $summarks of the sum of marks for each question usage, indexed by question usage id
e68e4ccf 384 */
3d6f2466 385 protected function secondary_steps_walker($step, $stats, $summarks) {
e68e4ccf
JP
386 $markdifference = $step->mark - $stats->markaverage;
387 if ($stats->subquestion) {
388 $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
389 } else {
515b3ae6 390 $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
e68e4ccf 391 }
8e328617 392 $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
e68e4ccf
JP
393
394 $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
515b3ae6 395 $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
e68e4ccf
JP
396
397 $stats->markvariancesum += pow($markdifference, 2);
398 $stats->othermarkvariancesum += pow($othermarkdifference, 2);
399 $stats->covariancesum += $markdifference * $othermarkdifference;
400 $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
401 $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
1239d287 402
3d6f2466
JP
403 if (isset($stats->variantstats[$step->variant])) {
404 $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks);
1239d287 405 }
e68e4ccf
JP
406 }
407
408 /**
3d6f2466
JP
409 * And finally loop through all the questions again.
410 *
e68e4ccf
JP
411 * Perform more per-question statistics calculations.
412 *
515b3ae6 413 * @param calculated $stats question stats to update.
e68e4ccf 414 */
3d6f2466 415 protected function secondary_question_walker($stats) {
e68e4ccf
JP
416 if ($stats->s > 1) {
417 $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
418 $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
419 $stats->covariance = $stats->covariancesum / ($stats->s - 1);
420 $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
421 $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
515b3ae6 422 ($stats->s - 1);
e68e4ccf
JP
423 $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
424
515b3ae6
JP
425 if ($stats->covariancewithoverallmark >= 0) {
426 $stats->negcovar = 0;
427 } else {
428 $stats->negcovar = 1;
429 }
e68e4ccf
JP
430 } else {
431 $stats->markvariance = null;
432 $stats->othermarkvariance = null;
433 $stats->covariance = null;
434 $stats->covariancemax = null;
435 $stats->covariancewithoverallmark = null;
436 $stats->sd = null;
515b3ae6 437 $stats->negcovar = 0;
e68e4ccf
JP
438 }
439
440 if ($stats->markvariance * $stats->othermarkvariance) {
441 $stats->discriminationindex = 100 * $stats->covariance /
515b3ae6 442 sqrt($stats->markvariance * $stats->othermarkvariance);
e68e4ccf
JP
443 } else {
444 $stats->discriminationindex = null;
445 }
446
447 if ($stats->covariancemax) {
448 $stats->discriminativeefficiency = 100 * $stats->covariance /
515b3ae6 449 $stats->covariancemax;
e68e4ccf
JP
450 } else {
451 $stats->discriminativeefficiency = null;
452 }
1239d287 453
3d6f2466
JP
454 foreach ($stats->variantstats as $variantstat) {
455 $this->secondary_question_walker($variantstat);
1239d287 456 }
e68e4ccf
JP
457 }
458
459 /**
fcdd04db
JP
460 * Given the question data find the average grade that random guesses would get.
461 *
462 * @param object $questiondata the full question object.
463 * @return float the random guess score for this question.
e68e4ccf
JP
464 */
465 protected function get_random_guess_score($questiondata) {
515b3ae6
JP
466 return \question_bank::get_qtype(
467 $questiondata->qtype, false)->get_random_guess_score($questiondata);
e68e4ccf
JP
468 }
469
470 /**
c3e2e754
JP
471 * Find time of non-expired statistics in the database.
472 *
fcdd04db
JP
473 * @param \qubaid_condition $qubaids Which question usages to look for?
474 * @return int|bool Time of cached record that matches this qubaid_condition or false is non found.
e68e4ccf 475 */
c3e2e754
JP
476 public function get_last_calculated_time($qubaids) {
477 return $this->stats->get_last_calculated_time($qubaids);
e68e4ccf
JP
478 }
479
c3e2e754
JP
480 /**
481 * Load cached statistics from the database.
482 *
fcdd04db
JP
483 * @param \qubaid_condition $qubaids Which question usages to load the cached stats for?
484 * @return all_calculated_for_qubaid_condition The cached stats.
c3e2e754
JP
485 */
486 public function get_cached($qubaids) {
487 $this->stats->get_cached($qubaids);
488 return $this->stats;
489 }
e68e4ccf 490}