Merge branch 'MDL-61950-master' of git://github.com/ryanwyllie/moodle
[moodle.git] / question / classes / statistics / questions / calculator.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
19  *
20  * @package    core
21  * @subpackage questionbank
22  * @copyright  2013 Open University
23  * @author     Jamie Pratt <me@jamiep.org>
24  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25  */
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
30 /**
31  * This class has methods to compute the question statistics from the raw data.
32  *
33  * @copyright 2013 Open University
34  * @author    Jamie Pratt <me@jamiep.org>
35  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class calculator {
39     /**
40      * @var all_calculated_for_qubaid_condition all the stats calculated for slots and sub-questions and variants of those
41      *                                                  questions.
42      */
43     protected $stats;
45     /**
46      * @var float
47      */
48     protected $sumofmarkvariance = 0;
50     /**
51      * @var array[] keyed by a string representing the pool of questions that this random question draws from.
52      *              string as returned from {@link \core_question\statistics\questions\calculated::random_selector_string}
53      */
54     protected $randomselectors = array();
56     /**
57      * @var \progress_trace
58      */
59     protected $progress;
61     /**
62      * @var string The class name of the class to instantiate to store statistics calculated.
63      */
64     protected $statscollectionclassname = '\core_question\statistics\questions\all_calculated_for_qubaid_condition';
66     /**
67      * Constructor.
68      *
69      * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
70      *                              we expect some extra fields - slot, maxmark and number on the full question data objects.
71      * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\none}.
72      */
73     public function __construct($questions, $progress = null) {
75         if ($progress === null) {
76             $progress = new \core\progress\none();
77         }
78         $this->progress = $progress;
79         $this->stats = new $this->statscollectionclassname();
80         foreach ($questions as $slot => $question) {
81             $this->stats->initialise_for_slot($slot, $question);
82             $this->stats->for_slot($slot)->randomguessscore = $this->get_random_guess_score($question);
83         }
84     }
86     /**
87      * Calculate the stats.
88      *
89      * @param \qubaid_condition $qubaids Which question usages to calculate the stats for?
90      * @return all_calculated_for_qubaid_condition The calculated stats.
91      */
92     public function calculate($qubaids) {
94         $this->progress->start_progress('', 6);
96         list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
98         if ($lateststeps) {
99             $this->progress->start_progress('', count($lateststeps), 1);
100             // Compute the statistics of position, and for random questions, work
101             // out which questions appear in which positions.
102             foreach ($lateststeps as $step) {
104                 $this->progress->increment_progress();
106                 $israndomquestion = ($step->questionid != $this->stats->for_slot($step->slot)->questionid);
107                 $breakdownvariants = !$israndomquestion && $this->stats->for_slot($step->slot)->break_down_by_variant();
108                 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
109                 if ($breakdownvariants && !$this->stats->has_slot($step->slot, $step->variant)) {
110                     $question = $this->stats->for_slot($step->slot)->question;
111                     $this->stats->initialise_for_slot($step->slot, $question, $step->variant);
112                     $this->stats->for_slot($step->slot, $step->variant)->randomguessscore =
113                                                                                     $this->get_random_guess_score($question);
114                 }
116                 // Step data walker for main question.
117                 $this->initial_steps_walker($step, $this->stats->for_slot($step->slot), $summarks, true, $breakdownvariants);
119                 // If this is a random question do the calculations for sub question stats.
120                 if ($israndomquestion) {
121                     if (!$this->stats->has_subq($step->questionid)) {
122                         $this->stats->initialise_for_subq($step);
123                     } else if ($this->stats->for_subq($step->questionid)->maxmark != $step->maxmark) {
124                         $this->stats->for_subq($step->questionid)->differentweights = true;
125                     }
127                     // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
128                     if (!$this->stats->has_subq($step->questionid, $step->variant)) {
129                         $this->stats->initialise_for_subq($step, $step->variant);
130                     }
132                     $this->initial_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks, false);
134                     // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
136                     $number = $this->stats->for_slot($step->slot)->question->number;
137                     $this->stats->for_subq($step->questionid)->usedin[$number] = $number;
139                     // Keep track of which random questions are actually selected from each pool of questions that random
140                     // questions are pulled from.
141                     $randomselectorstring = $this->stats->for_slot($step->slot)->random_selector_string();
142                     if (!isset($this->randomselectors[$randomselectorstring])) {
143                         $this->randomselectors[$randomselectorstring] = array();
144                     }
145                     $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
146                 }
147             }
148             $this->progress->end_progress();
150             foreach ($this->randomselectors as $key => $notused) {
151                 ksort($this->randomselectors[$key]);
152                 $this->randomselectors[$key] = implode(',', $this->randomselectors[$key]);
153             }
155             $this->stats->subquestions = question_load_questions($this->stats->get_all_subq_ids());
156             // Compute the statistics for sub questions, if there are any.
157             $this->progress->start_progress('', count($this->stats->subquestions), 1);
158             foreach ($this->stats->subquestions as $qid => $subquestion) {
159                 $this->progress->increment_progress();
160                 $subquestion->maxmark = $this->stats->for_subq($qid)->maxmark;
161                 $this->stats->for_subq($qid)->question = $subquestion;
162                 $this->stats->for_subq($qid)->randomguessscore = $this->get_random_guess_score($subquestion);
164                 if ($variants = $this->stats->for_subq($qid)->get_variants()) {
165                     foreach ($variants as $variant) {
166                         $this->stats->for_subq($qid, $variant)->question = $subquestion;
167                         $this->stats->for_subq($qid, $variant)->randomguessscore = $this->get_random_guess_score($subquestion);
168                     }
169                     $this->stats->for_subq($qid)->sort_variants();
170                 }
171                 $this->initial_question_walker($this->stats->for_subq($qid));
173                 if ($this->stats->for_subq($qid)->usedin) {
174                     sort($this->stats->for_subq($qid)->usedin, SORT_NUMERIC);
175                     $this->stats->for_subq($qid)->positions = implode(',', $this->stats->for_subq($qid)->usedin);
176                 } else {
177                     $this->stats->for_subq($qid)->positions = '';
178                 }
179             }
180             $this->progress->end_progress();
182             // Finish computing the averages, and put the sub-question data into the
183             // corresponding questions.
184             $slots = $this->stats->get_all_slots();
185             $totalnumberofslots = count($slots);
186             $maxindex = $totalnumberofslots - 1;
187             $this->progress->start_progress('', $totalnumberofslots, 1);
188             foreach ($slots as $index => $slot) {
189                 $this->stats->for_slot($slot)->sort_variants();
190                 $this->progress->increment_progress();
191                 $nextslotindex = $index + 1;
192                 $nextslot = ($nextslotindex > $maxindex) ? false : $slots[$nextslotindex];
194                 $this->initial_question_walker($this->stats->for_slot($slot));
196                 // The rest of this loop is to finish working out where randomly selected question stats should be displayed.
197                 if ($this->stats->for_slot($slot)->question->qtype == 'random') {
198                     $randomselectorstring = $this->stats->for_slot($slot)->random_selector_string();
199                     if ($nextslot &&  ($randomselectorstring == $this->stats->for_slot($nextslot)->random_selector_string())) {
200                         continue; // Next loop iteration.
201                     }
202                     if (isset($this->randomselectors[$randomselectorstring])) {
203                         $this->stats->for_slot($slot)->subquestions = $this->randomselectors[$randomselectorstring];
204                     }
205                 }
206             }
207             $this->progress->end_progress();
209             // Go through the records one more time.
210             $this->progress->start_progress('', count($lateststeps), 1);
211             foreach ($lateststeps as $step) {
212                 $this->progress->increment_progress();
213                 $israndomquestion = ($this->stats->for_slot($step->slot)->question->qtype == 'random');
214                 $this->secondary_steps_walker($step, $this->stats->for_slot($step->slot), $summarks);
216                 if ($israndomquestion) {
217                     $this->secondary_steps_walker($step, $this->stats->for_subq($step->questionid), $summarks);
218                 }
219             }
220             $this->progress->end_progress();
222             $slots = $this->stats->get_all_slots();
223             $this->progress->start_progress('', count($slots), 1);
224             $sumofcovariancewithoverallmark = 0;
225             foreach ($this->stats->get_all_slots() as $slot) {
226                 $this->progress->increment_progress();
227                 $this->secondary_question_walker($this->stats->for_slot($slot));
229                 $this->sumofmarkvariance += $this->stats->for_slot($slot)->markvariance;
231                 if ($this->stats->for_slot($slot)->covariancewithoverallmark >= 0) {
232                     $sumofcovariancewithoverallmark += sqrt($this->stats->for_slot($slot)->covariancewithoverallmark);
233                 }
234             }
235             $this->progress->end_progress();
237             $subqids = $this->stats->get_all_subq_ids();
238             $this->progress->start_progress('', count($subqids), 1);
239             foreach ($subqids as $subqid) {
240                 $this->progress->increment_progress();
241                 $this->secondary_question_walker($this->stats->for_subq($subqid));
242             }
243             $this->progress->end_progress();
245             foreach ($this->stats->get_all_slots() as $slot) {
246                 if ($sumofcovariancewithoverallmark) {
247                     if ($this->stats->for_slot($slot)->negcovar) {
248                         $this->stats->for_slot($slot)->effectiveweight = null;
249                     } else {
250                         $this->stats->for_slot($slot)->effectiveweight =
251                                                         100 * sqrt($this->stats->for_slot($slot)->covariancewithoverallmark) /
252                                                         $sumofcovariancewithoverallmark;
253                     }
254                 } else {
255                     $this->stats->for_slot($slot)->effectiveweight = null;
256                 }
257             }
258             $this->stats->cache($qubaids);
260             // All finished.
261             $this->progress->end_progress();
262         }
263         return $this->stats;
264     }
266     /**
267      * Used when computing Coefficient of Internal Consistency by quiz statistics.
268      *
269      * @return float
270      */
271     public function get_sum_of_mark_variance() {
272         return $this->sumofmarkvariance;
273     }
275     /**
276      * Get the latest step data from the db, from which we will calculate stats.
277      *
278      * @param \qubaid_condition $qubaids Which question usages to get the latest steps for?
279      * @return array with two items
280      *              - $lateststeps array of latest step data for the question usages
281      *              - $summarks    array of total marks for each usage, indexed by usage id
282      */
283     protected function get_latest_steps($qubaids) {
284         $dm = new \question_engine_data_mapper();
286         $fields = "    qas.id,
287     qa.questionusageid,
288     qa.questionid,
289     qa.variant,
290     qa.slot,
291     qa.maxmark,
292     qas.fraction * qa.maxmark as mark";
294         $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, $this->stats->get_all_slots(), $fields);
295         $summarks = array();
296         if ($lateststeps) {
297             foreach ($lateststeps as $step) {
298                 if (!isset($summarks[$step->questionusageid])) {
299                     $summarks[$step->questionusageid] = 0;
300                 }
301                 $summarks[$step->questionusageid] += $step->mark;
302             }
303         }
305         return array($lateststeps, $summarks);
306     }
308     /**
309      * Calculating the stats is a four step process.
310      *
311      * We loop through all 'last step' data first.
312      *
313      * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
314      * and $stats->othermarksarray to include another state.
315      *
316      * @param object     $step         the state to add to the statistics.
317      * @param calculated $stats        the question statistics we are accumulating.
318      * @param array      $summarks     of the sum of marks for each question usage, indexed by question usage id
319      * @param bool       $positionstat whether this is a statistic of position of question.
320      * @param bool       $dovariantalso do we also want to do the same calculations for this variant?
321      */
322     protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
323         $stats->s++;
324         $stats->totalmarks += $step->mark;
325         $stats->markarray[] = $step->mark;
327         if ($positionstat) {
328             $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
329             $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
331         } else {
332             $stats->totalothermarks += $summarks[$step->questionusageid];
333             $stats->othermarksarray[] = $summarks[$step->questionusageid];
334         }
335         if ($dovariantalso) {
336             $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
337         }
338     }
340     /**
341      * Then loop through all questions for the first time.
342      *
343      * Perform some computations on the per-question statistics calculations after
344      * we have been through all the step data.
345      *
346      * @param calculated $stats question stats to update.
347      */
348     protected function initial_question_walker($stats) {
349         $stats->markaverage = $stats->totalmarks / $stats->s;
351         if ($stats->maxmark != 0) {
352             $stats->facility = $stats->markaverage / $stats->maxmark;
353         } else {
354             $stats->facility = null;
355         }
357         $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
359         $stats->summarksaverage = $stats->totalsummarks / $stats->s;
361         sort($stats->markarray, SORT_NUMERIC);
362         sort($stats->othermarksarray, SORT_NUMERIC);
364         // Here we have collected enough data to make the decision about which questions have variants whose stats we also want to
365         // calculate. We delete the initialised structures where they are not needed.
366         if (!$stats->get_variants() || !$stats->break_down_by_variant()) {
367             $stats->clear_variants();
368         }
370         foreach ($stats->get_variants() as $variant) {
371             $this->initial_question_walker($stats->variantstats[$variant]);
372         }
373     }
375     /**
376      * Loop through all last step data again.
377      *
378      * Now we know the averages, accumulate the date needed to compute the higher
379      * moments of the question scores.
380      *
381      * @param object $step        the state to add to the statistics.
382      * @param calculated $stats       the question statistics we are accumulating.
383      * @param float[]  $summarks    of the sum of marks for each question usage, indexed by question usage id
384      */
385     protected function secondary_steps_walker($step, $stats, $summarks) {
386         $markdifference = $step->mark - $stats->markaverage;
387         if ($stats->subquestion) {
388             $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
389         } else {
390             $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
391         }
392         $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
394         $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
395         $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
397         $stats->markvariancesum += pow($markdifference, 2);
398         $stats->othermarkvariancesum += pow($othermarkdifference, 2);
399         $stats->covariancesum += $markdifference * $othermarkdifference;
400         $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
401         $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
403         if (isset($stats->variantstats[$step->variant])) {
404             $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks);
405         }
406     }
408     /**
409      * And finally loop through all the questions again.
410      *
411      * Perform more per-question statistics calculations.
412      *
413      * @param calculated $stats question stats to update.
414      */
415     protected function secondary_question_walker($stats) {
416         if ($stats->s > 1) {
417             $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
418             $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
419             $stats->covariance = $stats->covariancesum / ($stats->s - 1);
420             $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
421             $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
422                 ($stats->s - 1);
423             $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
425             if ($stats->covariancewithoverallmark >= 0) {
426                 $stats->negcovar = 0;
427             } else {
428                 $stats->negcovar = 1;
429             }
430         } else {
431             $stats->markvariance = null;
432             $stats->othermarkvariance = null;
433             $stats->covariance = null;
434             $stats->covariancemax = null;
435             $stats->covariancewithoverallmark = null;
436             $stats->sd = null;
437             $stats->negcovar = 0;
438         }
440         if ($stats->markvariance * $stats->othermarkvariance) {
441             $stats->discriminationindex = 100 * $stats->covariance /
442                 sqrt($stats->markvariance * $stats->othermarkvariance);
443         } else {
444             $stats->discriminationindex = null;
445         }
447         if ($stats->covariancemax) {
448             $stats->discriminativeefficiency = 100 * $stats->covariance /
449                 $stats->covariancemax;
450         } else {
451             $stats->discriminativeefficiency = null;
452         }
454         foreach ($stats->variantstats as $variantstat) {
455             $this->secondary_question_walker($variantstat);
456         }
457     }
459     /**
460      * Given the question data find the average grade that random guesses would get.
461      *
462      * @param object $questiondata the full question object.
463      * @return float the random guess score for this question.
464      */
465     protected function get_random_guess_score($questiondata) {
466         return \question_bank::get_qtype(
467             $questiondata->qtype, false)->get_random_guess_score($questiondata);
468     }
470     /**
471      * Find time of non-expired statistics in the database.
472      *
473      * @param \qubaid_condition $qubaids Which question usages to look for?
474      * @return int|bool Time of cached record that matches this qubaid_condition or false is non found.
475      */
476     public function get_last_calculated_time($qubaids) {
477         return $this->stats->get_last_calculated_time($qubaids);
478     }
480     /**
481      * Load cached statistics from the database.
482      *
483      * @param \qubaid_condition $qubaids Which question usages to load the cached stats for?
484      * @return all_calculated_for_qubaid_condition The cached stats.
485      */
486     public function get_cached($qubaids) {
487         $this->stats->get_cached($qubaids);
488         return $this->stats;
489     }