Merge branch 'MDL-41756-master' of https://github.com/jamiepratt/moodle
[moodle.git] / question / classes / statistics / questions / calculator.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
19  *
20  * @package    core
21  * @subpackage questionbank
22  * @copyright  2013 Open University
23  * @author     Jamie Pratt <me@jamiep.org>
24  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25  */
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
30 /**
31  * This class has methods to compute the question statistics from the raw data.
32  *
33  * @copyright 2013 Open University
34  * @author    Jamie Pratt <me@jamiep.org>
35  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class calculator {
39     /**
40      * @var calculated[]
41      */
42     public $questionstats = array();
44     /**
45      * @var calculated_for_subquestion[]
46      */
47     public $subquestionstats = array();
49     /**
50      * @var float
51      */
52     protected $sumofmarkvariance = 0;
54     protected $randomselectors = array();
56     /**
57      * @var \progress_trace
58      */
59     protected $progress;
61     /**
62      * Constructor.
63      *
64      * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
65      *                              we expect some extra fields - slot, maxmark and number on the full question data objects.
66      * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\null}.
67      */
68     public function __construct($questions, $progress = null) {
70         if ($progress === null) {
71             $progress = new \core\progress\null();
72         }
73         $this->progress = $progress;
75         foreach ($questions as $slot => $question) {
76             $this->questionstats[$slot] = $this->new_slot_stats($question, $slot);
77         }
78     }
80     /**
81      * Set up a calculated instance ready to store a questions stats.
82      *
83      * @param $question
84      * @param $slot
85      * @return calculated
86      */
87     protected function new_slot_stats($question, $slot) {
88         $toreturn = new calculated();
89         $toreturn->questionid = $question->id;
90         $toreturn->maxmark = $question->maxmark;
91         $toreturn->question = $question;
92         $toreturn->slot = $slot;
93         $toreturn->positions = $question->number;
94         $toreturn->randomguessscore = $this->get_random_guess_score($question);
95         return $toreturn;
96     }
98     /**
99      * Set up a calculated instance ready to store a randomly selected question's stats.
100      *
101      * @param $step
102      * @return calculated_for_subquestion
103      */
104     protected function new_subq_stats($step) {
105         $toreturn = new calculated_for_subquestion();
106         $toreturn->questionid = $step->questionid;
107         $toreturn->maxmark = $step->maxmark;
108         return $toreturn;
109     }
111     /**
112      * @param $qubaids \qubaid_condition
113      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
114      */
115     public function calculate($qubaids) {
117         $this->progress->start_progress('', 6);
119         list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
121         if ($lateststeps) {
122             $this->progress->start_progress('', count($lateststeps), 1);
123             // Compute the statistics of position, and for random questions, work
124             // out which questions appear in which positions.
125             foreach ($lateststeps as $step) {
127                 $this->progress->increment_progress();
129                 $israndomquestion = ($step->questionid != $this->questionstats[$step->slot]->questionid);
130                 // If this is a variant we have not seen before create a place to store stats calculations for this variant.
131                 if (!$israndomquestion && !isset($this->questionstats[$step->slot]->variantstats[$step->variant])) {
132                     $this->questionstats[$step->slot]->variantstats[$step->variant] =
133                         $this->new_slot_stats($this->questionstats[$step->slot]->question, $step->slot);
134                     $this->questionstats[$step->slot]->variantstats[$step->variant]->variant = $step->variant;
135                 }
138                 // Step data walker for main question.
139                 $this->initial_steps_walker($step, $this->questionstats[$step->slot], $summarks, true, !$israndomquestion);
141                 // If this is a random question do the calculations for sub question stats.
142                 if ($israndomquestion) {
143                     if (!isset($this->subquestionstats[$step->questionid])) {
144                         $this->subquestionstats[$step->questionid] = $this->new_subq_stats($step);
145                     } else if ($this->subquestionstats[$step->questionid]->maxmark != $step->maxmark) {
146                         $this->subquestionstats[$step->questionid]->differentweights = true;
147                     }
149                     // If this is a variant of this subq we have not seen before create a place to store stats calculations for it.
150                     if (!isset($this->subquestionstats[$step->questionid]->variantstats[$step->variant])) {
151                         $this->subquestionstats[$step->questionid]->variantstats[$step->variant] = $this->new_subq_stats($step);
152                         $this->subquestionstats[$step->questionid]->variantstats[$step->variant]->variant = $step->variant;
153                     }
155                     $this->initial_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, false);
157                     // Extra stuff we need to do in this loop for subqs to keep track of where they need to be displayed later.
159                     $number = $this->questionstats[$step->slot]->question->number;
160                     $this->subquestionstats[$step->questionid]->usedin[$number] = $number;
162                     // Keep track of which random questions are actually selected from each pool of questions that random
163                     // questions are pulled from.
164                     $randomselectorstring = $this->questionstats[$step->slot]->question->category. '/'
165                                                                     .$this->questionstats[$step->slot]->question->questiontext;
166                     if (!isset($this->randomselectors[$randomselectorstring])) {
167                         $this->randomselectors[$randomselectorstring] = array();
168                     }
169                     $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
170                 }
171             }
172             $this->progress->end_progress();
174             foreach ($this->randomselectors as $key => $notused) {
175                 ksort($this->randomselectors[$key]);
176             }
178             $subquestions = question_load_questions(array_keys($this->subquestionstats));
179             // Compute the statistics for sub questions, if there are any.
180             $this->progress->start_progress('', count($subquestions), 1);
181             foreach ($subquestions as $qid => $subquestion) {
182                 $this->progress->increment_progress();
183                 $subquestion->maxmark = $this->subquestionstats[$qid]->maxmark;
184                 $this->subquestionstats[$qid]->question = $subquestion;
185                 $this->subquestionstats[$qid]->randomguessscore = $this->get_random_guess_score($subquestion);
187                 foreach ($this->subquestionstats[$qid]->variantstats as $variantstat) {
188                     $variantstat->question = $subquestion;
189                     $variantstat->randomguessscore = $this->get_random_guess_score($subquestion);
190                 }
192                 $this->initial_question_walker($this->subquestionstats[$qid]);
194                 if ($this->subquestionstats[$qid]->differentweights) {
195                     // TODO output here really sucks, but throwing is too severe.
196                     global $OUTPUT;
197                     $name = $this->subquestionstats[$qid]->question->name;
198                     echo $OUTPUT->notification( get_string('erroritemappearsmorethanoncewithdifferentweight',
199                                                             'quiz_statistics', $name));
200                 }
202                 if ($this->subquestionstats[$qid]->usedin) {
203                     sort($this->subquestionstats[$qid]->usedin, SORT_NUMERIC);
204                     $this->subquestionstats[$qid]->positions = implode(',', $this->subquestionstats[$qid]->usedin);
205                 } else {
206                     $this->subquestionstats[$qid]->positions = '';
207                 }
208             }
209             $this->progress->end_progress();
211             // Finish computing the averages, and put the subquestion data into the
212             // corresponding questions.
214             // This cannot be a foreach loop because we need to have both
215             // $question and $nextquestion available, but apart from that it is
216             // foreach ($this->questions as $qid => $question).
217             reset($this->questionstats);
218             $this->progress->start_progress('', count($this->questionstats), 1);
219             while (list(, $questionstat) = each($this->questionstats)) {
220                 $this->progress->increment_progress();
221                 $nextquestionstats = current($this->questionstats);
223                 $this->initial_question_walker($questionstat);
225                 // The rest of this loop is again to work out where randomly selected question stats should be displayed.
226                 if ($questionstat->question->qtype == 'random') {
227                     $randomselectorstring = $questionstat->question->category .'/'. $questionstat->question->questiontext;
228                     if ($nextquestionstats && $nextquestionstats->question->qtype == 'random') {
229                         $nextrandomselectorstring  =
230                             $nextquestionstats->question->category .'/'. $nextquestionstats->question->questiontext;
231                         if ($randomselectorstring == $nextrandomselectorstring) {
232                             continue; // Next loop iteration.
233                         }
234                     }
235                     if (isset($this->randomselectors[$randomselectorstring])) {
236                         $questionstat->subquestions = implode(',', $this->randomselectors[$randomselectorstring]);
237                     }
238                 }
239             }
240             $this->progress->end_progress();
242             // Go through the records one more time.
243             $this->progress->start_progress('', count($lateststeps), 1);
244             foreach ($lateststeps as $step) {
245                 $this->progress->increment_progress();
246                 $israndomquestion = ($this->questionstats[$step->slot]->question->qtype == 'random');
247                 $this->secondary_steps_walker($step, $this->questionstats[$step->slot], $summarks, !$israndomquestion);
249                 if ($this->questionstats[$step->slot]->subquestions) {
250                     $this->secondary_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks);
251                 }
252             }
253             $this->progress->end_progress();
255             $this->progress->start_progress('', count($this->questionstats), 1);
256             $sumofcovariancewithoverallmark = 0;
257             foreach ($this->questionstats as $questionstat) {
258                 $this->progress->increment_progress();
259                 $this->secondary_question_walker($questionstat);
261                 $this->sumofmarkvariance += $questionstat->markvariance;
263                 if ($questionstat->covariancewithoverallmark >= 0) {
264                     $sumofcovariancewithoverallmark += sqrt($questionstat->covariancewithoverallmark);
265                 }
266             }
267             $this->progress->end_progress();
269             $this->progress->start_progress('', count($this->subquestionstats), 1);
270             foreach ($this->subquestionstats as $subquestionstat) {
271                 $this->progress->increment_progress();
272                 $this->secondary_question_walker($subquestionstat);
273             }
274             $this->progress->end_progress();
276             foreach ($this->questionstats as $questionstat) {
277                 if ($sumofcovariancewithoverallmark) {
278                     if ($questionstat->negcovar) {
279                         $questionstat->effectiveweight = null;
280                     } else {
281                         $questionstat->effectiveweight = 100 * sqrt($questionstat->covariancewithoverallmark) /
282                             $sumofcovariancewithoverallmark;
283                     }
284                 } else {
285                     $questionstat->effectiveweight = null;
286                 }
287             }
288             $this->cache_stats($qubaids);
290             // All finished.
291             $this->progress->end_progress();
292         }
293         return array($this->questionstats, $this->subquestionstats);
294     }
296     /**
297      * Load cached statistics from the database.
298      *
299      * @param $qubaids \qubaid_condition
300      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
301      */
302     public function get_cached($qubaids) {
303         global $DB;
304         $timemodified = time() - self::TIME_TO_CACHE;
305         $questionstatrecs = $DB->get_records_select('question_statistics', 'hashcode = ? AND timemodified > ?',
306                                          array($qubaids->get_hash_code(), $timemodified));
308         $questionids = array();
309         foreach ($questionstatrecs as $fromdb) {
310             if (is_null($fromdb->variant) && !$fromdb->slot) {
311                 $questionids[] = $fromdb->questionid;
312             }
313         }
314         $subquestions = question_load_questions($questionids);
315         foreach ($questionstatrecs as $fromdb) {
316             if (is_null($fromdb->variant)) {
317                 if ($fromdb->slot) {
318                     $this->questionstats[$fromdb->slot]->populate_from_record($fromdb);
319                     // Array created in constructor and populated from question.
320                 } else {
321                     $this->subquestionstats[$fromdb->questionid] = new calculated_for_subquestion();
322                     $this->subquestionstats[$fromdb->questionid]->populate_from_record($fromdb);
323                     $this->subquestionstats[$fromdb->questionid]->question = $subquestions[$fromdb->questionid];
324                 }
325             }
326         }
327         // Add cached variant stats to data structure.
328         foreach ($questionstatrecs as $fromdb) {
329             if (!is_null($fromdb->variant)) {
330                 if ($fromdb->slot) {
331                     $newcalcinstance = new calculated();
332                     $this->questionstats[$fromdb->slot]->variantstats[$fromdb->variant] = $newcalcinstance;
333                     $newcalcinstance->question = $this->questionstats[$fromdb->slot]->question;
334                 } else {
335                     $newcalcinstance = new calculated_for_subquestion();
336                     $this->subquestionstats[$fromdb->questionid]->variantstats[$fromdb->variant] = $newcalcinstance;
337                     $newcalcinstance->question = $subquestions[$fromdb->questionid];
338                 }
339                 $newcalcinstance->populate_from_record($fromdb);
340             }
341         }
342         return array($this->questionstats, $this->subquestionstats);
343     }
345     /**
346      * Find time of non-expired statistics in the database.
347      *
348      * @param $qubaids \qubaid_condition
349      * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
350      */
351     public function get_last_calculated_time($qubaids) {
352         global $DB;
354         $timemodified = time() - self::TIME_TO_CACHE;
355         return $DB->get_field_select('question_statistics', 'timemodified', 'hashcode = ? AND timemodified > ?',
356                                      array($qubaids->get_hash_code(), $timemodified), IGNORE_MULTIPLE);
357     }
359     /** @var integer Time after which statistics are automatically recomputed. */
360     const TIME_TO_CACHE = 900; // 15 minutes.
362     /**
363      * Used when computing Coefficient of Internal Consistency by quiz statistics.
364      *
365      * @return float
366      */
367     public function get_sum_of_mark_variance() {
368         return $this->sumofmarkvariance;
369     }
371     /**
372      * @param $qubaids \qubaid_condition
373      * @return array with two items
374      *              - $lateststeps array of latest step data for the question usages
375      *              - $summarks    array of total marks for each usage, indexed by usage id
376      */
377     protected function get_latest_steps($qubaids) {
378         $dm = new \question_engine_data_mapper();
380         $fields = "    qas.id,
381     qa.questionusageid,
382     qa.questionid,
383     qa.variant,
384     qa.slot,
385     qa.maxmark,
386     qas.fraction * qa.maxmark as mark";
388         $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, array_keys($this->questionstats), $fields);
389         $summarks = array();
390         if ($lateststeps) {
391             foreach ($lateststeps as $step) {
392                 if (!isset($summarks[$step->questionusageid])) {
393                     $summarks[$step->questionusageid] = 0;
394                 }
395                 $summarks[$step->questionusageid] += $step->mark;
396             }
397         }
399         return array($lateststeps, $summarks);
400     }
402     /**
403      * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
404      * and $stats->othermarksarray to include another state.
405      *
406      * @param object     $step         the state to add to the statistics.
407      * @param calculated $stats        the question statistics we are accumulating.
408      * @param array      $summarks     of the sum of marks for each question usage, indexed by question usage id
409      * @param bool       $positionstat whether this is a statistic of position of question.
410      * @param bool       $dovariantalso do we also want to do the same calculations for this variant?
411      */
412     protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true, $dovariantalso = true) {
413         $stats->s++;
414         $stats->totalmarks += $step->mark;
415         $stats->markarray[] = $step->mark;
417         if ($positionstat) {
418             $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
419             $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
421         } else {
422             $stats->totalothermarks += $summarks[$step->questionusageid];
423             $stats->othermarksarray[] = $summarks[$step->questionusageid];
424         }
425         if ($dovariantalso) {
426             $this->initial_steps_walker($step, $stats->variantstats[$step->variant], $summarks, $positionstat, false);
428         }
429     }
431     /**
432      * Perform some computations on the per-question statistics calculations after
433      * we have been through all the step data.
434      *
435      * @param calculated $stats question stats to update.
436      * @param bool       $dovariantsalso do we also want to do the same calculations for the variants?
437      */
438     protected function initial_question_walker($stats, $dovariantsalso = true) {
439         $stats->markaverage = $stats->totalmarks / $stats->s;
441         if ($stats->maxmark != 0) {
442             $stats->facility = $stats->markaverage / $stats->maxmark;
443         } else {
444             $stats->facility = null;
445         }
447         $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
449         $stats->summarksaverage = $stats->totalsummarks / $stats->s;
451         sort($stats->markarray, SORT_NUMERIC);
452         sort($stats->othermarksarray, SORT_NUMERIC);
454         if ($dovariantsalso) {
455             foreach ($stats->variantstats as $variantstat) {
456                 $this->initial_question_walker($variantstat, false);
457             }
458         }
459     }
461     /**
462      * Now we know the averages, accumulate the date needed to compute the higher
463      * moments of the question scores.
464      *
465      * @param object $step        the state to add to the statistics.
466      * @param calculated $stats       the question statistics we are accumulating.
467      * @param array  $summarks    of the sum of marks for each question usage, indexed by question usage id
468      * @param bool   $dovariantalso do we also want to do the same calculations for the variant?
469      */
470     protected function secondary_steps_walker($step, $stats, $summarks, $dovariantalso = true) {
471         $markdifference = $step->mark - $stats->markaverage;
472         if ($stats->subquestion) {
473             $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
474         } else {
475             $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
476         }
477         $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
479         $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
480         $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
482         $stats->markvariancesum += pow($markdifference, 2);
483         $stats->othermarkvariancesum += pow($othermarkdifference, 2);
484         $stats->covariancesum += $markdifference * $othermarkdifference;
485         $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
486         $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
488         if ($dovariantalso) {
489             $this->secondary_steps_walker($step, $stats->variantstats[$step->variant], $summarks, false);
490         }
491     }
493     /**
494      * Perform more per-question statistics calculations.
495      *
496      * @param calculated $stats question stats to update.
497      * @param bool       $dovariantsalso do we also want to do the same calculations for the variants?
498      */
499     protected function secondary_question_walker($stats, $dovariantsalso = true) {
501         if ($stats->s > 1) {
502             $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
503             $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
504             $stats->covariance = $stats->covariancesum / ($stats->s - 1);
505             $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
506             $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
507                 ($stats->s - 1);
508             $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
510             if ($stats->covariancewithoverallmark >= 0) {
511                 $stats->negcovar = 0;
512             } else {
513                 $stats->negcovar = 1;
514             }
515         } else {
516             $stats->markvariance = null;
517             $stats->othermarkvariance = null;
518             $stats->covariance = null;
519             $stats->covariancemax = null;
520             $stats->covariancewithoverallmark = null;
521             $stats->sd = null;
522             $stats->negcovar = 0;
523         }
525         if ($stats->markvariance * $stats->othermarkvariance) {
526             $stats->discriminationindex = 100 * $stats->covariance /
527                 sqrt($stats->markvariance * $stats->othermarkvariance);
528         } else {
529             $stats->discriminationindex = null;
530         }
532         if ($stats->covariancemax) {
533             $stats->discriminativeefficiency = 100 * $stats->covariance /
534                 $stats->covariancemax;
535         } else {
536             $stats->discriminativeefficiency = null;
537         }
540         if ($dovariantsalso) {
541             foreach ($stats->variantstats as $variantstat) {
542                 $this->secondary_question_walker($variantstat, false);
543             }
544         }
545     }
547     /**
548      * @param object $questiondata
549      * @return number the random guess score for this question.
550      */
551     protected function get_random_guess_score($questiondata) {
552         return \question_bank::get_qtype(
553             $questiondata->qtype, false)->get_random_guess_score($questiondata);
554     }
556     /**
557      * @param $qubaids \qubaid_condition
558      */
559     protected function cache_stats($qubaids) {
560          foreach ($this->questionstats as $questionstat) {
561             $questionstat->cache($qubaids);
562          }
564         foreach ($this->subquestionstats as $subquestionstat) {
565             $subquestionstat->cache($qubaids);
566         }
567     }