MDL-43479 quiz response analysis : suppress break down by variants
[moodle.git] / question / classes / statistics / questions / calculator.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
19  *
20  * @package    core
21  * @subpackage questionbank
22  * @copyright  2013 Open University
23  * @author     Jamie Pratt <me@jamiep.org>
24  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25  */
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
30 /**
31  * This class has methods to compute the question statistics from the raw data.
32  *
33  * @copyright 2013 Open University
34  * @author    Jamie Pratt <me@jamiep.org>
35  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class calculator {
39     /**
40      * @var all_calculated_for_qubaid_condition all the stats calculated for slots and sub-questions and variants of those
41      *                                                  questions.
42      */
43     protected $stats;
45     /**
46      * @var float
47      */
48     protected $sumofmarkvariance = 0;
50     /**
51      * @var array[] keyed by a string representing the pool of questions that this random question draws from.
52      *              string as returned from {@link \core_question\statistics\questions\calculated::random_selector_string}
53      */
54     protected $randomselectors = array();
56     /**
57      * @var \progress_trace
58      */
59     protected $progress;
61     protected $statscollectionclassname = '\core_question\statistics\questions\all_calculated_for_qubaid_condition';
63     /**
64      * Constructor.
65      *
66      * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
67      *                              we expect some extra fields - slot, maxmark and number on the full question data objects.
68      * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\null}.
69      */
70     public function __construct($questions, $progress = null) {
72         if ($progress === null) {
73             $progress = new \core\progress\null();
74         }
75         $this->progress = $progress;
76         $this->stats = new $this->statscollectionclassname();
77         foreach ($questions as $slot => $question) {
78             $this->stats->initialise_for_slot($slot, $question);
79             $this->stats->for_slot($slot)->randomguessscore = $this->get_random_guess_score($question);
80         }
81     }
83     /**
84      * @param $qubaids \qubaid_condition
85      * @return all_calculated_for_qubaid_condition
86      */
87     public function calculate($qubaids) {
89         $this->progress->start_progress('', 6);
91         list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
93         if ($lateststeps) {
94             $this->progress->start_progress('', count($lateststeps), 1);
95             // Compute the statistics of position, and for random questions, work
96             // out which questions appear in which positions.
97             foreach ($lateststeps as $step) {
99                 $this->progress->increment_progress();
101                 $israndomquestion = ($step->questionid != $this->stats->for_slot($step->slot)->questionid);
102                 $breakdownvariants = !$israndomquestion && $this->stats->for_slot($step->slot)->break_down_by_variant();
103                 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
104                 if ($breakdownvariants && is_null($this->stats->for_slot($step->slot , $step->variant))) {
105                     $question = $this->stats->for_slot($step->slot)->question;
106                     $this->stats->initialise_for_slot($step->slot, $question, $step->variant);
107                     $this->stats->for_slot($step->slot, $step->variant)->randomguessscore =
108                                                                                     $this->get_random_guess_score($question);
109                 }
111                 // Step data walker for main question.
112                 $this->initial_steps_walker($step, $this->stats->for_slot($step->slot), $summarks, true, $breakdownvariants);
114                 // If this is a random question do the calculations for sub question stats.
115                 if ($israndomquestion) {
116                     if (is_null($this->stats->for_subq($step->questionid))) {
117                         $this->stats->initialise_for_subq($step);
118                     } else if ($this->stats->for_subq($step->questionid)->maxmark != $step->maxmark) {
119                         $this->stats->for_subq($step->questionid)->differentweights = true;
120                     }
122                     // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
123                     if (is_null($this->stats->for_subq($step->questionid, $step->variant))) {
124                         $this->stats->initialise_for_subq($step, $step->variant);
125                     }
127                     $this->initial_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks, false);
129                     // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
131                     $number = $this->stats->for_slot($step->slot)->question->number;
132                     $this->stats->for_subq($step->questionid)->usedin[$number] = $number;
134                     // Keep track of which random questions are actually selected from each pool of questions that random
135                     // questions are pulled from.
136                     $randomselectorstring = $this->stats->for_slot($step->slot)->random_selector_string();
137                     if (!isset($this->randomselectors[$randomselectorstring])) {
138                         $this->randomselectors[$randomselectorstring] = array();
139                     }
140                     $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
141                 }
142             }
143             $this->progress->end_progress();
145             foreach ($this->randomselectors as $key => $notused) {
146                 ksort($this->randomselectors[$key]);
147                 $this->randomselectors[$key] = implode(',', $this->randomselectors[$key]);
148             }
150             $this->stats->subquestions = question_load_questions($this->stats->get_all_subq_ids());
151             // Compute the statistics for sub questions, if there are any.
152             $this->progress->start_progress('', count($this->stats->subquestions), 1);
153             foreach ($this->stats->subquestions as $qid => $subquestion) {
154                 $this->progress->increment_progress();
155                 $subquestion->maxmark = $this->stats->for_subq($qid)->maxmark;
156                 $this->stats->for_subq($qid)->question = $subquestion;
157                 $this->stats->for_subq($qid)->randomguessscore = $this->get_random_guess_score($subquestion);
159                 if ($variants = $this->stats->get_variants_for_subq($qid)) {
160                     foreach ($variants as $variant) {
161                         $this->stats->for_subq($qid, $variant)->question = $subquestion;
162                         $this->stats->for_subq($qid, $variant)->randomguessscore = $this->get_random_guess_score($subquestion);
163                     }
164                     $this->stats->for_subq($qid)->sort_variants();
165                 }
166                 $this->initial_question_walker($this->stats->for_subq($qid));
168                 if ($this->stats->for_subq($qid)->usedin) {
169                     sort($this->stats->for_subq($qid)->usedin, SORT_NUMERIC);
170                     $this->stats->for_subq($qid)->positions = implode(',', $this->stats->for_subq($qid)->usedin);
171                 } else {
172                     $this->stats->for_subq($qid)->positions = '';
173                 }
174             }
175             $this->progress->end_progress();
177             // Finish computing the averages, and put the sub-question data into the
178             // corresponding questions.
180             // This cannot be a foreach loop because we need to have both
181             // $question and $nextquestion available, but apart from that it is
182             // foreach ($this->questions as $qid => $question).
183             $slots = $this->stats->get_all_slots();
184             $this->progress->start_progress('', count($slots), 1);
185             while (list(, $slot) = each($slots)) {
186                 $this->stats->for_slot($slot)->sort_variants();
187                 $this->progress->increment_progress();
188                 $nextslot = current($slots);
190                 $this->initial_question_walker($this->stats->for_slot($slot));
192                 // The rest of this loop is to finish working out where randomly selected question stats should be displayed.
193                 if ($this->stats->for_slot($slot)->question->qtype == 'random') {
194                     $randomselectorstring = $this->stats->for_slot($slot)->random_selector_string();
195                     if ($nextslot &&  ($randomselectorstring == $this->stats->for_slot($nextslot)->random_selector_string())) {
196                         continue; // Next loop iteration.
197                     }
198                     if (isset($this->randomselectors[$randomselectorstring])) {
199                         $this->stats->for_slot($slot)->subquestions = $this->randomselectors[$randomselectorstring];
200                     }
201                 }
202             }
203             $this->progress->end_progress();
205             // Go through the records one more time.
206             $this->progress->start_progress('', count($lateststeps), 1);
207             foreach ($lateststeps as $step) {
208                 $this->progress->increment_progress();
209                 $israndomquestion = ($this->stats->for_slot($step->slot)->question->qtype == 'random');
210                 $this->secondary_steps_walker($step, $this->stats->for_slot($step->slot), $summarks);
212                 if ($israndomquestion) {
213                     $this->secondary_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks);
214                 }
215             }
216             $this->progress->end_progress();
218             $slots = $this->stats->get_all_slots();
219             $this->progress->start_progress('', count($slots), 1);
220             $sumofcovariancewithoverallmark = 0;
221             foreach ($this->stats->get_all_slots() as $slot) {
222                 $this->progress->increment_progress();
223                 $this->secondary_question_walker($this->stats->for_slot($slot));
225                 $this->sumofmarkvariance += $this->stats->for_slot($slot)->markvariance;
227                 if ($this->stats->for_slot($slot)->covariancewithoverallmark >= 0) {
228                     $sumofcovariancewithoverallmark += sqrt($this->stats->for_slot($slot)->covariancewithoverallmark);
229                 }
230             }
231             $this->progress->end_progress();
233             $subqids = $this->stats->get_all_subq_ids();
234             $this->progress->start_progress('', count($subqids), 1);
235             foreach ($subqids as $subqid) {
236                 $this->progress->increment_progress();
237                 $this->secondary_question_walker($this->stats->for_subq($subqid));
238             }
239             $this->progress->end_progress();
241             foreach ($this->stats->get_all_slots() as $slot) {
242                 if ($sumofcovariancewithoverallmark) {
243                     if ($this->stats->for_slot($slot)->negcovar) {
244                         $this->stats->for_slot($slot)->effectiveweight = null;
245                     } else {
246                         $this->stats->for_slot($slot)->effectiveweight =
247                                                         100 * sqrt($this->stats->for_slot($slot)->covariancewithoverallmark) /
248                                                         $sumofcovariancewithoverallmark;
249                     }
250                 } else {
251                     $this->stats->for_slot($slot)->effectiveweight = null;
252                 }
253             }
254             $this->stats->cache($qubaids);
256             // All finished.
257             $this->progress->end_progress();
258         }
259         return $this->stats;
260     }
262     /**
263      * Used when computing Coefficient of Internal Consistency by quiz statistics.
264      *
265      * @return float
266      */
267     public function get_sum_of_mark_variance() {
268         return $this->sumofmarkvariance;
269     }
271     /**
272      * @param $qubaids \qubaid_condition
273      * @return array with two items
274      *              - $lateststeps array of latest step data for the question usages
275      *              - $summarks    array of total marks for each usage, indexed by usage id
276      */
277     protected function get_latest_steps($qubaids) {
278         $dm = new \question_engine_data_mapper();
280         $fields = "    qas.id,
281     qa.questionusageid,
282     qa.questionid,
283     qa.variant,
284     qa.slot,
285     qa.maxmark,
286     qas.fraction * qa.maxmark as mark";
288         $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, $this->stats->get_all_slots(), $fields);
289         $summarks = array();
290         if ($lateststeps) {
291             foreach ($lateststeps as $step) {
292                 if (!isset($summarks[$step->questionusageid])) {
293                     $summarks[$step->questionusageid] = 0;
294                 }
295                 $summarks[$step->questionusageid] += $step->mark;
296             }
297         }
299         return array($lateststeps, $summarks);
300     }
302     /**
303      * Calculating the stats is a four step process.
304      *
305      * We loop through all 'last step' data first.
306      *
307      * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
308      * and $stats->othermarksarray to include another state.
309      *
310      * @param object     $step         the state to add to the statistics.
311      * @param calculated $stats        the question statistics we are accumulating.
312      * @param array      $summarks     of the sum of marks for each question usage, indexed by question usage id
313      * @param bool       $positionstat whether this is a statistic of position of question.
314      * @param bool       $dovariantalso do we also want to do the same calculations for this variant?
315      */
316     protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
317         $stats->s++;
318         $stats->totalmarks += $step->mark;
319         $stats->markarray[] = $step->mark;
321         if ($positionstat) {
322             $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
323             $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
325         } else {
326             $stats->totalothermarks += $summarks[$step->questionusageid];
327             $stats->othermarksarray[] = $summarks[$step->questionusageid];
328         }
329         if ($dovariantalso) {
330             $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
331         }
332     }
334     /**
335      * Then loop through all questions for the first time.
336      *
337      * Perform some computations on the per-question statistics calculations after
338      * we have been through all the step data.
339      *
340      * @param calculated $stats question stats to update.
341      */
342     protected function initial_question_walker($stats) {
343         $stats->markaverage = $stats->totalmarks / $stats->s;
345         if ($stats->maxmark != 0) {
346             $stats->facility = $stats->markaverage / $stats->maxmark;
347         } else {
348             $stats->facility = null;
349         }
351         $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
353         $stats->summarksaverage = $stats->totalsummarks / $stats->s;
355         sort($stats->markarray, SORT_NUMERIC);
356         sort($stats->othermarksarray, SORT_NUMERIC);
358         // Here we have collected enough data to make the decision about which questions have variants whose stats we also want to
359         // calculate. We delete the initialised structures where they are not needed.
360         if (!$stats->get_variants() || !$stats->break_down_by_variant()) {
361             $stats->clear_variants();
362         }
364         foreach ($stats->get_variants() as $variant) {
365             $this->initial_question_walker($stats->variantstats[$variant]);
366         }
367     }
369     /**
370      * Loop through all last step data again.
371      *
372      * Now we know the averages, accumulate the date needed to compute the higher
373      * moments of the question scores.
374      *
375      * @param object $step        the state to add to the statistics.
376      * @param calculated $stats       the question statistics we are accumulating.
377      * @param array  $summarks    of the sum of marks for each question usage, indexed by question usage id
378      */
379     protected function secondary_steps_walker($step, $stats, $summarks) {
380         $markdifference = $step->mark - $stats->markaverage;
381         if ($stats->subquestion) {
382             $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
383         } else {
384             $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
385         }
386         $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
388         $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
389         $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
391         $stats->markvariancesum += pow($markdifference, 2);
392         $stats->othermarkvariancesum += pow($othermarkdifference, 2);
393         $stats->covariancesum += $markdifference * $othermarkdifference;
394         $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
395         $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
397         if (isset($stats->variantstats[$step->variant])) {
398             $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks);
399         }
400     }
402     /**
403      * And finally loop through all the questions again.
404      *
405      * Perform more per-question statistics calculations.
406      *
407      * @param calculated $stats question stats to update.
408      */
409     protected function secondary_question_walker($stats) {
410         if ($stats->s > 1) {
411             $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
412             $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
413             $stats->covariance = $stats->covariancesum / ($stats->s - 1);
414             $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
415             $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
416                 ($stats->s - 1);
417             $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
419             if ($stats->covariancewithoverallmark >= 0) {
420                 $stats->negcovar = 0;
421             } else {
422                 $stats->negcovar = 1;
423             }
424         } else {
425             $stats->markvariance = null;
426             $stats->othermarkvariance = null;
427             $stats->covariance = null;
428             $stats->covariancemax = null;
429             $stats->covariancewithoverallmark = null;
430             $stats->sd = null;
431             $stats->negcovar = 0;
432         }
434         if ($stats->markvariance * $stats->othermarkvariance) {
435             $stats->discriminationindex = 100 * $stats->covariance /
436                 sqrt($stats->markvariance * $stats->othermarkvariance);
437         } else {
438             $stats->discriminationindex = null;
439         }
441         if ($stats->covariancemax) {
442             $stats->discriminativeefficiency = 100 * $stats->covariance /
443                 $stats->covariancemax;
444         } else {
445             $stats->discriminativeefficiency = null;
446         }
448         foreach ($stats->variantstats as $variantstat) {
449             $this->secondary_question_walker($variantstat);
450         }
451     }
453     /**
454      * @param object $questiondata
455      * @return number the random guess score for this question.
456      */
457     protected function get_random_guess_score($questiondata) {
458         return \question_bank::get_qtype(
459             $questiondata->qtype, false)->get_random_guess_score($questiondata);
460     }
462     /**
463      * Find time of non-expired statistics in the database.
464      *
465      * @param $qubaids \qubaid_condition
466      * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
467      */
468     public function get_last_calculated_time($qubaids) {
469         return $this->stats->get_last_calculated_time($qubaids);
470     }
472     /**
473      * Load cached statistics from the database.
474      *
475      * @param $qubaids \qubaid_condition
476      * @return all_calculated_for_qubaid_condition
477      */
478     public function get_cached($qubaids) {
479         $this->stats->get_cached($qubaids);
480         return $this->stats;
481     }