MDL-43479 quiz response analysis : suppress break down by variants
[moodle.git] / question / classes / statistics / questions / calculator.php
CommitLineData
e68e4ccf
JP
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
515b3ae6 18 * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
e68e4ccf
JP
19 *
20 * @package core
21 * @subpackage questionbank
22 * @copyright 2013 Open University
23 * @author Jamie Pratt <me@jamiep.org>
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 */
26
515b3ae6 27namespace core_question\statistics\questions;
e68e4ccf
JP
28defined('MOODLE_INTERNAL') || die();
29
e68e4ccf
JP
30/**
31 * This class has methods to compute the question statistics from the raw data.
32 *
33 * @copyright 2013 Open University
34 * @author Jamie Pratt <me@jamiep.org>
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36 */
515b3ae6 37class calculator {
e68e4ccf
JP
38
39 /**
c3e2e754
JP
40 * @var all_calculated_for_qubaid_condition all the stats calculated for slots and sub-questions and variants of those
41 * questions.
e68e4ccf 42 */
c3e2e754 43 protected $stats;
e68e4ccf
JP
44
45 /**
515b3ae6 46 * @var float
e68e4ccf 47 */
515b3ae6 48 protected $sumofmarkvariance = 0;
e68e4ccf 49
c3e2e754
JP
50 /**
51 * @var array[] keyed by a string representing the pool of questions that this random question draws from.
52 * string as returned from {@link \core_question\statistics\questions\calculated::random_selector_string}
53 */
515b3ae6 54 protected $randomselectors = array();
e68e4ccf 55
8da6fc9d
JP
56 /**
57 * @var \progress_trace
58 */
59 protected $progress;
60
c3e2e754
JP
61 protected $statscollectionclassname = '\core_question\statistics\questions\all_calculated_for_qubaid_condition';
62
515b3ae6
JP
63 /**
64 * Constructor.
65 *
66 * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
67 * we expect some extra fields - slot, maxmark and number on the full question data objects.
8da6fc9d 68 * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\null}.
515b3ae6 69 */
8da6fc9d
JP
70 public function __construct($questions, $progress = null) {
71
72 if ($progress === null) {
73 $progress = new \core\progress\null();
74 }
75 $this->progress = $progress;
c3e2e754 76 $this->stats = new $this->statscollectionclassname();
515b3ae6 77 foreach ($questions as $slot => $question) {
c3e2e754
JP
78 $this->stats->initialise_for_slot($slot, $question);
79 $this->stats->for_slot($slot)->randomguessscore = $this->get_random_guess_score($question);
e68e4ccf 80 }
e68e4ccf
JP
81 }
82
83 /**
515b3ae6 84 * @param $qubaids \qubaid_condition
c3e2e754 85 * @return all_calculated_for_qubaid_condition
e68e4ccf
JP
86 */
87 public function calculate($qubaids) {
8da6fc9d
JP
88
89 $this->progress->start_progress('', 6);
e68e4ccf 90
8e328617 91 list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
e68e4ccf
JP
92
93 if ($lateststeps) {
8da6fc9d 94 $this->progress->start_progress('', count($lateststeps), 1);
e68e4ccf
JP
95 // Compute the statistics of position, and for random questions, work
96 // out which questions appear in which positions.
97 foreach ($lateststeps as $step) {
1239d287 98
aa05ae5d 99 $this->progress->increment_progress();
e68e4ccf 100
c3e2e754 101 $israndomquestion = ($step->questionid != $this->stats->for_slot($step->slot)->questionid);
3d6f2466 102 $breakdownvariants = !$israndomquestion && $this->stats->for_slot($step->slot)->break_down_by_variant();
1239d287 103 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
3d6f2466
JP
104 if ($breakdownvariants && is_null($this->stats->for_slot($step->slot , $step->variant))) {
105 $question = $this->stats->for_slot($step->slot)->question;
106 $this->stats->initialise_for_slot($step->slot, $question, $step->variant);
c3e2e754 107 $this->stats->for_slot($step->slot, $step->variant)->randomguessscore =
3d6f2466 108 $this->get_random_guess_score($question);
1239d287
JP
109 }
110
1239d287 111 // Step data walker for main question.
3d6f2466 112 $this->initial_steps_walker($step, $this->stats->for_slot($step->slot), $summarks, true, $breakdownvariants);
1239d287
JP
113
114 // If this is a random question do the calculations for sub question stats.
115 if ($israndomquestion) {
c3e2e754
JP
116 if (is_null($this->stats->for_subq($step->questionid))) {
117 $this->stats->initialise_for_subq($step);
118 } else if ($this->stats->for_subq($step->questionid)->maxmark != $step->maxmark) {
119 $this->stats->for_subq($step->questionid)->differentweights = true;
e68e4ccf
JP
120 }
121
1239d287 122 // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
c3e2e754
JP
123 if (is_null($this->stats->for_subq($step->questionid, $step->variant))) {
124 $this->stats->initialise_for_subq($step, $step->variant);
1239d287
JP
125 }
126
c3e2e754 127 $this->initial_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks, false);
e68e4ccf 128
1239d287
JP
129 // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
130
c3e2e754
JP
131 $number = $this->stats->for_slot($step->slot)->question->number;
132 $this->stats->for_subq($step->questionid)->usedin[$number] = $number;
e68e4ccf 133
aa05ae5d
JP
134 // Keep track of which random questions are actually selected from each pool of questions that random
135 // questions are pulled from.
c3e2e754 136 $randomselectorstring = $this->stats->for_slot($step->slot)->random_selector_string();
e68e4ccf
JP
137 if (!isset($this->randomselectors[$randomselectorstring])) {
138 $this->randomselectors[$randomselectorstring] = array();
139 }
515b3ae6 140 $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
e68e4ccf
JP
141 }
142 }
8da6fc9d 143 $this->progress->end_progress();
e68e4ccf
JP
144
145 foreach ($this->randomselectors as $key => $notused) {
146 ksort($this->randomselectors[$key]);
c3e2e754 147 $this->randomselectors[$key] = implode(',', $this->randomselectors[$key]);
e68e4ccf
JP
148 }
149
c3e2e754 150 $this->stats->subquestions = question_load_questions($this->stats->get_all_subq_ids());
aa05ae5d 151 // Compute the statistics for sub questions, if there are any.
c3e2e754
JP
152 $this->progress->start_progress('', count($this->stats->subquestions), 1);
153 foreach ($this->stats->subquestions as $qid => $subquestion) {
aa05ae5d 154 $this->progress->increment_progress();
c3e2e754
JP
155 $subquestion->maxmark = $this->stats->for_subq($qid)->maxmark;
156 $this->stats->for_subq($qid)->question = $subquestion;
157 $this->stats->for_subq($qid)->randomguessscore = $this->get_random_guess_score($subquestion);
158
c3e2e754
JP
159 if ($variants = $this->stats->get_variants_for_subq($qid)) {
160 foreach ($variants as $variant) {
161 $this->stats->for_subq($qid, $variant)->question = $subquestion;
162 $this->stats->for_subq($qid, $variant)->randomguessscore = $this->get_random_guess_score($subquestion);
163 }
3d6f2466 164 $this->stats->for_subq($qid)->sort_variants();
1239d287 165 }
c3e2e754 166 $this->initial_question_walker($this->stats->for_subq($qid));
e68e4ccf 167
c3e2e754
JP
168 if ($this->stats->for_subq($qid)->usedin) {
169 sort($this->stats->for_subq($qid)->usedin, SORT_NUMERIC);
170 $this->stats->for_subq($qid)->positions = implode(',', $this->stats->for_subq($qid)->usedin);
e68e4ccf 171 } else {
c3e2e754 172 $this->stats->for_subq($qid)->positions = '';
e68e4ccf
JP
173 }
174 }
8da6fc9d 175 $this->progress->end_progress();
e68e4ccf 176
c3e2e754 177 // Finish computing the averages, and put the sub-question data into the
e68e4ccf
JP
178 // corresponding questions.
179
180 // This cannot be a foreach loop because we need to have both
181 // $question and $nextquestion available, but apart from that it is
182 // foreach ($this->questions as $qid => $question).
c3e2e754
JP
183 $slots = $this->stats->get_all_slots();
184 $this->progress->start_progress('', count($slots), 1);
185 while (list(, $slot) = each($slots)) {
186 $this->stats->for_slot($slot)->sort_variants();
aa05ae5d 187 $this->progress->increment_progress();
c3e2e754
JP
188 $nextslot = current($slots);
189
190 $this->initial_question_walker($this->stats->for_slot($slot));
191
192 // The rest of this loop is to finish working out where randomly selected question stats should be displayed.
193 if ($this->stats->for_slot($slot)->question->qtype == 'random') {
194 $randomselectorstring = $this->stats->for_slot($slot)->random_selector_string();
195 if ($nextslot && ($randomselectorstring == $this->stats->for_slot($nextslot)->random_selector_string())) {
196 continue; // Next loop iteration.
e68e4ccf
JP
197 }
198 if (isset($this->randomselectors[$randomselectorstring])) {
c3e2e754 199 $this->stats->for_slot($slot)->subquestions = $this->randomselectors[$randomselectorstring];
e68e4ccf
JP
200 }
201 }
202 }
8da6fc9d 203 $this->progress->end_progress();
e68e4ccf
JP
204
205 // Go through the records one more time.
8da6fc9d 206 $this->progress->start_progress('', count($lateststeps), 1);
e68e4ccf 207 foreach ($lateststeps as $step) {
aa05ae5d 208 $this->progress->increment_progress();
c3e2e754 209 $israndomquestion = ($this->stats->for_slot($step->slot)->question->qtype == 'random');
3d6f2466 210 $this->secondary_steps_walker($step, $this->stats->for_slot($step->slot), $summarks);
e68e4ccf 211
3d6f2466 212 if ($israndomquestion) {
c3e2e754 213 $this->secondary_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks);
e68e4ccf
JP
214 }
215 }
8da6fc9d 216 $this->progress->end_progress();
e68e4ccf 217
c3e2e754
JP
218 $slots = $this->stats->get_all_slots();
219 $this->progress->start_progress('', count($slots), 1);
e68e4ccf 220 $sumofcovariancewithoverallmark = 0;
c3e2e754 221 foreach ($this->stats->get_all_slots() as $slot) {
aa05ae5d 222 $this->progress->increment_progress();
c3e2e754 223 $this->secondary_question_walker($this->stats->for_slot($slot));
e68e4ccf 224
c3e2e754 225 $this->sumofmarkvariance += $this->stats->for_slot($slot)->markvariance;
e68e4ccf 226
c3e2e754
JP
227 if ($this->stats->for_slot($slot)->covariancewithoverallmark >= 0) {
228 $sumofcovariancewithoverallmark += sqrt($this->stats->for_slot($slot)->covariancewithoverallmark);
e68e4ccf
JP
229 }
230 }
8da6fc9d 231 $this->progress->end_progress();
e68e4ccf 232
c3e2e754
JP
233 $subqids = $this->stats->get_all_subq_ids();
234 $this->progress->start_progress('', count($subqids), 1);
235 foreach ($subqids as $subqid) {
aa05ae5d 236 $this->progress->increment_progress();
c3e2e754 237 $this->secondary_question_walker($this->stats->for_subq($subqid));
e68e4ccf 238 }
8da6fc9d 239 $this->progress->end_progress();
e68e4ccf 240
c3e2e754 241 foreach ($this->stats->get_all_slots() as $slot) {
e68e4ccf 242 if ($sumofcovariancewithoverallmark) {
c3e2e754
JP
243 if ($this->stats->for_slot($slot)->negcovar) {
244 $this->stats->for_slot($slot)->effectiveweight = null;
e68e4ccf 245 } else {
c3e2e754
JP
246 $this->stats->for_slot($slot)->effectiveweight =
247 100 * sqrt($this->stats->for_slot($slot)->covariancewithoverallmark) /
248 $sumofcovariancewithoverallmark;
e68e4ccf
JP
249 }
250 } else {
c3e2e754 251 $this->stats->for_slot($slot)->effectiveweight = null;
e68e4ccf
JP
252 }
253 }
c3e2e754 254 $this->stats->cache($qubaids);
8da6fc9d
JP
255
256 // All finished.
257 $this->progress->end_progress();
e68e4ccf 258 }
c3e2e754 259 return $this->stats;
e68e4ccf
JP
260 }
261
515b3ae6
JP
262 /**
263 * Used when computing Coefficient of Internal Consistency by quiz statistics.
264 *
265 * @return float
266 */
267 public function get_sum_of_mark_variance() {
268 return $this->sumofmarkvariance;
269 }
270
271 /**
272 * @param $qubaids \qubaid_condition
8e328617 273 * @return array with two items
515b3ae6
JP
274 * - $lateststeps array of latest step data for the question usages
275 * - $summarks array of total marks for each usage, indexed by usage id
515b3ae6
JP
276 */
277 protected function get_latest_steps($qubaids) {
278 $dm = new \question_engine_data_mapper();
279
280 $fields = " qas.id,
281 qa.questionusageid,
282 qa.questionid,
1239d287 283 qa.variant,
515b3ae6
JP
284 qa.slot,
285 qa.maxmark,
286 qas.fraction * qa.maxmark as mark";
287
c3e2e754 288 $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, $this->stats->get_all_slots(), $fields);
515b3ae6
JP
289 $summarks = array();
290 if ($lateststeps) {
291 foreach ($lateststeps as $step) {
292 if (!isset($summarks[$step->questionusageid])) {
293 $summarks[$step->questionusageid] = 0;
294 }
295 $summarks[$step->questionusageid] += $step->mark;
296 }
e68e4ccf
JP
297 }
298
8e328617 299 return array($lateststeps, $summarks);
e68e4ccf
JP
300 }
301
302 /**
3d6f2466
JP
303 * Calculating the stats is a four step process.
304 *
305 * We loop through all 'last step' data first.
306 *
e68e4ccf
JP
307 * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
308 * and $stats->othermarksarray to include another state.
309 *
1239d287 310 * @param object $step the state to add to the statistics.
515b3ae6 311 * @param calculated $stats the question statistics we are accumulating.
1239d287
JP
312 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
313 * @param bool $positionstat whether this is a statistic of position of question.
314 * @param bool $dovariantalso do we also want to do the same calculations for this variant?
e68e4ccf 315 */
1239d287 316 protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
e68e4ccf
JP
317 $stats->s++;
318 $stats->totalmarks += $step->mark;
319 $stats->markarray[] = $step->mark;
320
321 if ($positionstat) {
322 $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
323 $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
324
325 } else {
326 $stats->totalothermarks += $summarks[$step->questionusageid];
327 $stats->othermarksarray[] = $summarks[$step->questionusageid];
328 }
1239d287
JP
329 if ($dovariantalso) {
330 $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
1239d287 331 }
e68e4ccf
JP
332 }
333
334 /**
3d6f2466
JP
335 * Then loop through all questions for the first time.
336 *
e68e4ccf 337 * Perform some computations on the per-question statistics calculations after
1239d287 338 * we have been through all the step data.
e68e4ccf 339 *
515b3ae6 340 * @param calculated $stats question stats to update.
e68e4ccf 341 */
3d6f2466 342 protected function initial_question_walker($stats) {
e68e4ccf
JP
343 $stats->markaverage = $stats->totalmarks / $stats->s;
344
345 if ($stats->maxmark != 0) {
346 $stats->facility = $stats->markaverage / $stats->maxmark;
347 } else {
348 $stats->facility = null;
349 }
350
351 $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
352
8e328617
JP
353 $stats->summarksaverage = $stats->totalsummarks / $stats->s;
354
e68e4ccf
JP
355 sort($stats->markarray, SORT_NUMERIC);
356 sort($stats->othermarksarray, SORT_NUMERIC);
1239d287 357
3d6f2466
JP
358 // Here we have collected enough data to make the decision about which questions have variants whose stats we also want to
359 // calculate. We delete the initialised structures where they are not needed.
360 if (!$stats->get_variants() || !$stats->break_down_by_variant()) {
361 $stats->clear_variants();
362 }
363
364 foreach ($stats->get_variants() as $variant) {
365 $this->initial_question_walker($stats->variantstats[$variant]);
1239d287 366 }
e68e4ccf
JP
367 }
368
369 /**
3d6f2466
JP
370 * Loop through all last step data again.
371 *
e68e4ccf
JP
372 * Now we know the averages, accumulate the date needed to compute the higher
373 * moments of the question scores.
374 *
515b3ae6
JP
375 * @param object $step the state to add to the statistics.
376 * @param calculated $stats the question statistics we are accumulating.
377 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
e68e4ccf 378 */
3d6f2466 379 protected function secondary_steps_walker($step, $stats, $summarks) {
e68e4ccf
JP
380 $markdifference = $step->mark - $stats->markaverage;
381 if ($stats->subquestion) {
382 $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
383 } else {
515b3ae6 384 $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
e68e4ccf 385 }
8e328617 386 $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
e68e4ccf
JP
387
388 $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
515b3ae6 389 $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
e68e4ccf
JP
390
391 $stats->markvariancesum += pow($markdifference, 2);
392 $stats->othermarkvariancesum += pow($othermarkdifference, 2);
393 $stats->covariancesum += $markdifference * $othermarkdifference;
394 $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
395 $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
1239d287 396
3d6f2466
JP
397 if (isset($stats->variantstats[$step->variant])) {
398 $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks);
1239d287 399 }
e68e4ccf
JP
400 }
401
402 /**
3d6f2466
JP
403 * And finally loop through all the questions again.
404 *
e68e4ccf
JP
405 * Perform more per-question statistics calculations.
406 *
515b3ae6 407 * @param calculated $stats question stats to update.
e68e4ccf 408 */
3d6f2466 409 protected function secondary_question_walker($stats) {
e68e4ccf
JP
410 if ($stats->s > 1) {
411 $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
412 $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
413 $stats->covariance = $stats->covariancesum / ($stats->s - 1);
414 $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
415 $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
515b3ae6 416 ($stats->s - 1);
e68e4ccf
JP
417 $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
418
515b3ae6
JP
419 if ($stats->covariancewithoverallmark >= 0) {
420 $stats->negcovar = 0;
421 } else {
422 $stats->negcovar = 1;
423 }
e68e4ccf
JP
424 } else {
425 $stats->markvariance = null;
426 $stats->othermarkvariance = null;
427 $stats->covariance = null;
428 $stats->covariancemax = null;
429 $stats->covariancewithoverallmark = null;
430 $stats->sd = null;
515b3ae6 431 $stats->negcovar = 0;
e68e4ccf
JP
432 }
433
434 if ($stats->markvariance * $stats->othermarkvariance) {
435 $stats->discriminationindex = 100 * $stats->covariance /
515b3ae6 436 sqrt($stats->markvariance * $stats->othermarkvariance);
e68e4ccf
JP
437 } else {
438 $stats->discriminationindex = null;
439 }
440
441 if ($stats->covariancemax) {
442 $stats->discriminativeefficiency = 100 * $stats->covariance /
515b3ae6 443 $stats->covariancemax;
e68e4ccf
JP
444 } else {
445 $stats->discriminativeefficiency = null;
446 }
1239d287 447
3d6f2466
JP
448 foreach ($stats->variantstats as $variantstat) {
449 $this->secondary_question_walker($variantstat);
1239d287 450 }
e68e4ccf
JP
451 }
452
453 /**
454 * @param object $questiondata
455 * @return number the random guess score for this question.
456 */
457 protected function get_random_guess_score($questiondata) {
515b3ae6
JP
458 return \question_bank::get_qtype(
459 $questiondata->qtype, false)->get_random_guess_score($questiondata);
e68e4ccf
JP
460 }
461
462 /**
c3e2e754
JP
463 * Find time of non-expired statistics in the database.
464 *
515b3ae6 465 * @param $qubaids \qubaid_condition
c3e2e754 466 * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
e68e4ccf 467 */
c3e2e754
JP
468 public function get_last_calculated_time($qubaids) {
469 return $this->stats->get_last_calculated_time($qubaids);
e68e4ccf
JP
470 }
471
c3e2e754
JP
472 /**
473 * Load cached statistics from the database.
474 *
475 * @param $qubaids \qubaid_condition
476 * @return all_calculated_for_qubaid_condition
477 */
478 public function get_cached($qubaids) {
479 $this->stats->get_cached($qubaids);
480 return $this->stats;
481 }
e68e4ccf 482}