Merge branch 'MDL-43369-master' of git://github.com/jamiepratt/moodle
[moodle.git] / question / classes / statistics / questions / calculator.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
19  *
20  * @package    core
21  * @subpackage questionbank
22  * @copyright  2013 Open University
23  * @author     Jamie Pratt <me@jamiep.org>
24  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25  */
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
30 /**
31  * This class has methods to compute the question statistics from the raw data.
32  *
33  * @copyright 2013 Open University
34  * @author    Jamie Pratt <me@jamiep.org>
35  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class calculator {
39     /**
40      * @var calculated[]
41      */
42     public $questionstats = array();
44     /**
45      * @var calculated_for_subquestion[]
46      */
47     public $subquestionstats = array();
49     /**
50      * @var float
51      */
52     protected $sumofmarkvariance = 0;
54     protected $randomselectors = array();
56     /**
57      * Constructor.
58      *
59      * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
60      *                              we expect some extra fields - slot, maxmark and number on the full question data objects.
61      */
62     public function __construct($questions) {
63         foreach ($questions as $slot => $question) {
64             $this->questionstats[$slot] = new calculated();
65             $this->questionstats[$slot]->questionid = $question->id;
66             $this->questionstats[$slot]->question = $question;
67             $this->questionstats[$slot]->slot = $slot;
68             $this->questionstats[$slot]->positions = $question->number;
69             $this->questionstats[$slot]->maxmark = $question->maxmark;
70             $this->questionstats[$slot]->randomguessscore = $this->get_random_guess_score($question);
71         }
72     }
74     /**
75      * @param $qubaids \qubaid_condition
76      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
77      */
78     public function calculate($qubaids) {
79         \core_php_time_limit::raise();
81         list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
83         if ($lateststeps) {
85             // Compute the statistics of position, and for random questions, work
86             // out which questions appear in which positions.
87             foreach ($lateststeps as $step) {
88                 $this->initial_steps_walker($step, $this->questionstats[$step->slot], $summarks);
90                 // If this is a random question what is the real item being used?
91                 if ($step->questionid != $this->questionstats[$step->slot]->questionid) {
92                     if (!isset($this->subquestionstats[$step->questionid])) {
93                         $this->subquestionstats[$step->questionid] = new calculated_for_subquestion();
94                         $this->subquestionstats[$step->questionid]->questionid = $step->questionid;
95                         $this->subquestionstats[$step->questionid]->maxmark = $step->maxmark;
96                     } else if ($this->subquestionstats[$step->questionid]->maxmark != $step->maxmark) {
97                         $this->subquestionstats[$step->questionid]->differentweights = true;
98                     }
100                     $this->initial_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, false);
102                     $number = $this->questionstats[$step->slot]->question->number;
103                     $this->subquestionstats[$step->questionid]->usedin[$number] = $number;
105                     $randomselectorstring = $this->questionstats[$step->slot]->question->category. '/'
106                                                                     .$this->questionstats[$step->slot]->question->questiontext;
107                     if (!isset($this->randomselectors[$randomselectorstring])) {
108                         $this->randomselectors[$randomselectorstring] = array();
109                     }
110                     $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
111                 }
112             }
114             foreach ($this->randomselectors as $key => $notused) {
115                 ksort($this->randomselectors[$key]);
116             }
118             // Compute the statistics of question id, if we need any.
119             $subquestions = question_load_questions(array_keys($this->subquestionstats));
120             foreach ($subquestions as $qid => $subquestion) {
121                 $this->subquestionstats[$qid]->question = $subquestion;
122                 $this->subquestionstats[$qid]->question->maxmark = $this->subquestionstats[$qid]->maxmark;
123                 $this->subquestionstats[$qid]->randomguessscore = $this->get_random_guess_score($subquestion);
125                 $this->initial_question_walker($this->subquestionstats[$qid]);
127                 if ($this->subquestionstats[$qid]->differentweights) {
128                     // TODO output here really sucks, but throwing is too severe.
129                     global $OUTPUT;
130                     $name = $this->subquestionstats[$qid]->question->name;
131                     echo $OUTPUT->notification( get_string('erroritemappearsmorethanoncewithdifferentweight',
132                                                             'quiz_statistics', $name));
133                 }
135                 if ($this->subquestionstats[$qid]->usedin) {
136                     sort($this->subquestionstats[$qid]->usedin, SORT_NUMERIC);
137                     $this->subquestionstats[$qid]->positions = implode(',', $this->subquestionstats[$qid]->usedin);
138                 } else {
139                     $this->subquestionstats[$qid]->positions = '';
140                 }
141             }
143             // Finish computing the averages, and put the subquestion data into the
144             // corresponding questions.
146             // This cannot be a foreach loop because we need to have both
147             // $question and $nextquestion available, but apart from that it is
148             // foreach ($this->questions as $qid => $question).
149             reset($this->questionstats);
150             while (list($slot, $questionstat) = each($this->questionstats)) {
151                 $nextquestionstats = current($this->questionstats);
153                 $this->initial_question_walker($questionstat);
155                 if ($questionstat->question->qtype == 'random') {
156                     $randomselectorstring = $questionstat->question->category .'/'. $questionstat->question->questiontext;
157                     if ($nextquestionstats && $nextquestionstats->question->qtype == 'random') {
158                         $nextrandomselectorstring  =
159                             $nextquestionstats->question->category .'/'. $nextquestionstats->question->questiontext;
160                         if ($randomselectorstring == $nextrandomselectorstring) {
161                             continue; // Next loop iteration.
162                         }
163                     }
164                     if (isset($this->randomselectors[$randomselectorstring])) {
165                         $questionstat->subquestions = implode(',', $this->randomselectors[$randomselectorstring]);
166                     }
167                 }
168             }
170             // Go through the records one more time.
171             foreach ($lateststeps as $step) {
172                 $this->secondary_steps_walker($step, $this->questionstats[$step->slot], $summarks);
174                 if ($this->questionstats[$step->slot]->subquestions) {
175                     $this->secondary_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks);
176                 }
177             }
179             $sumofcovariancewithoverallmark = 0;
180             foreach ($this->questionstats as $questionstat) {
181                 $this->secondary_question_walker($questionstat);
183                 $this->sumofmarkvariance += $questionstat->markvariance;
185                 if ($questionstat->covariancewithoverallmark >= 0) {
186                     $sumofcovariancewithoverallmark += sqrt($questionstat->covariancewithoverallmark);
187                 }
188             }
190             foreach ($this->subquestionstats as $subquestionstat) {
191                 $this->secondary_question_walker($subquestionstat);
192             }
194             foreach ($this->questionstats as $questionstat) {
195                 if ($sumofcovariancewithoverallmark) {
196                     if ($questionstat->negcovar) {
197                         $questionstat->effectiveweight = null;
198                     } else {
199                         $questionstat->effectiveweight = 100 * sqrt($questionstat->covariancewithoverallmark) /
200                             $sumofcovariancewithoverallmark;
201                     }
202                 } else {
203                     $questionstat->effectiveweight = null;
204                 }
205             }
206             $this->cache_stats($qubaids);
207         }
208         return array($this->questionstats, $this->subquestionstats);
209     }
211     /**
212      * Load cached statistics from the database.
213      *
214      * @param $qubaids \qubaid_condition
215      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
216      */
217     public function get_cached($qubaids) {
218         global $DB;
219         $timemodified = time() - self::TIME_TO_CACHE;
220         $questionstatrecs = $DB->get_records_select('question_statistics', 'hashcode = ? AND timemodified > ?',
221                                          array($qubaids->get_hash_code(), $timemodified));
223         $questionids = array();
224         foreach ($questionstatrecs as $fromdb) {
225             if (!$fromdb->slot) {
226                 $questionids[] = $fromdb->questionid;
227             }
228         }
229         $subquestions = question_load_questions($questionids);
230         foreach ($questionstatrecs as $fromdb) {
231             if ($fromdb->slot) {
232                 $this->questionstats[$fromdb->slot]->populate_from_record($fromdb);
233                 // Array created in constructor and populated from question.
234             } else {
235                 $this->subquestionstats[$fromdb->questionid] = new calculated_for_subquestion();
236                 $this->subquestionstats[$fromdb->questionid]->populate_from_record($fromdb);
237                 $this->subquestionstats[$fromdb->questionid]->question = $subquestions[$fromdb->questionid];
238             }
239         }
240         return array($this->questionstats, $this->subquestionstats);
241     }
243     /**
244      * Find time of non-expired statistics in the database.
245      *
246      * @param $qubaids \qubaid_condition
247      * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
248      */
249     public function get_last_calculated_time($qubaids) {
250         global $DB;
252         $timemodified = time() - self::TIME_TO_CACHE;
253         return $DB->get_field_select('question_statistics', 'timemodified', 'hashcode = ? AND timemodified > ?',
254                                      array($qubaids->get_hash_code(), $timemodified), IGNORE_MULTIPLE);
255     }
257     /** @var integer Time after which statistics are automatically recomputed. */
258     const TIME_TO_CACHE = 900; // 15 minutes.
260     /**
261      * Used when computing Coefficient of Internal Consistency by quiz statistics.
262      *
263      * @return float
264      */
265     public function get_sum_of_mark_variance() {
266         return $this->sumofmarkvariance;
267     }
269     /**
270      * @param $qubaids \qubaid_condition
271      * @return array with two items
272      *              - $lateststeps array of latest step data for the question usages
273      *              - $summarks    array of total marks for each usage, indexed by usage id
274      */
275     protected function get_latest_steps($qubaids) {
276         $dm = new \question_engine_data_mapper();
278         $fields = "    qas.id,
279     qa.questionusageid,
280     qa.questionid,
281     qa.slot,
282     qa.maxmark,
283     qas.fraction * qa.maxmark as mark";
285         $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, array_keys($this->questionstats), $fields);
286         $summarks = array();
287         if ($lateststeps) {
288             foreach ($lateststeps as $step) {
289                 if (!isset($summarks[$step->questionusageid])) {
290                     $summarks[$step->questionusageid] = 0;
291                 }
292                 $summarks[$step->questionusageid] += $step->mark;
293             }
294         }
296         return array($lateststeps, $summarks);
297     }
299     /**
300      * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
301      * and $stats->othermarksarray to include another state.
302      *
303      * @param object $step         the state to add to the statistics.
304      * @param calculated $stats        the question statistics we are accumulating.
305      * @param array  $summarks     of the sum of marks for each question usage, indexed by question usage id
306      * @param bool   $positionstat whether this is a statistic of position of question.
307      */
308     protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true) {
309         $stats->s++;
310         $stats->totalmarks += $step->mark;
311         $stats->markarray[] = $step->mark;
313         if ($positionstat) {
314             $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
315             $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
317         } else {
318             $stats->totalothermarks += $summarks[$step->questionusageid];
319             $stats->othermarksarray[] = $summarks[$step->questionusageid];
320         }
321     }
323     /**
324      * Perform some computations on the per-question statistics calculations after
325      * we have been through all the states.
326      *
327      * @param calculated $stats question stats to update.
328      */
329     protected function initial_question_walker($stats) {
330         $stats->markaverage = $stats->totalmarks / $stats->s;
332         if ($stats->maxmark != 0) {
333             $stats->facility = $stats->markaverage / $stats->maxmark;
334         } else {
335             $stats->facility = null;
336         }
338         $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
340         $stats->summarksaverage = $stats->totalsummarks / $stats->s;
342         sort($stats->markarray, SORT_NUMERIC);
343         sort($stats->othermarksarray, SORT_NUMERIC);
344     }
346     /**
347      * Now we know the averages, accumulate the date needed to compute the higher
348      * moments of the question scores.
349      *
350      * @param object $step        the state to add to the statistics.
351      * @param calculated $stats       the question statistics we are accumulating.
352      * @param array  $summarks    of the sum of marks for each question usage, indexed by question usage id
353      */
354     protected function secondary_steps_walker($step, $stats, $summarks) {
355         $markdifference = $step->mark - $stats->markaverage;
356         if ($stats->subquestion) {
357             $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
358         } else {
359             $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
360         }
361         $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
363         $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
364         $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
366         $stats->markvariancesum += pow($markdifference, 2);
367         $stats->othermarkvariancesum += pow($othermarkdifference, 2);
368         $stats->covariancesum += $markdifference * $othermarkdifference;
369         $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
370         $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
371     }
373     /**
374      * Perform more per-question statistics calculations.
375      *
376      * @param calculated $stats question stats to update.
377      */
378     protected function secondary_question_walker($stats) {
380         if ($stats->s > 1) {
381             $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
382             $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
383             $stats->covariance = $stats->covariancesum / ($stats->s - 1);
384             $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
385             $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
386                 ($stats->s - 1);
387             $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
389             if ($stats->covariancewithoverallmark >= 0) {
390                 $stats->negcovar = 0;
391             } else {
392                 $stats->negcovar = 1;
393             }
394         } else {
395             $stats->markvariance = null;
396             $stats->othermarkvariance = null;
397             $stats->covariance = null;
398             $stats->covariancemax = null;
399             $stats->covariancewithoverallmark = null;
400             $stats->sd = null;
401             $stats->negcovar = 0;
402         }
404         if ($stats->markvariance * $stats->othermarkvariance) {
405             $stats->discriminationindex = 100 * $stats->covariance /
406                 sqrt($stats->markvariance * $stats->othermarkvariance);
407         } else {
408             $stats->discriminationindex = null;
409         }
411         if ($stats->covariancemax) {
412             $stats->discriminativeefficiency = 100 * $stats->covariance /
413                 $stats->covariancemax;
414         } else {
415             $stats->discriminativeefficiency = null;
416         }
417     }
419     /**
420      * @param object $questiondata
421      * @return number the random guess score for this question.
422      */
423     protected function get_random_guess_score($questiondata) {
424         return \question_bank::get_qtype(
425             $questiondata->qtype, false)->get_random_guess_score($questiondata);
426     }
428     /**
429      * @param $qubaids \qubaid_condition
430      */
431     protected function cache_stats($qubaids) {
432         foreach ($this->questionstats as $questionstat) {
433             $questionstat->cache($qubaids);
434         }
436         foreach ($this->subquestionstats as $subquestionstat) {
437             $subquestionstat->cache($qubaids);
438         }
439     }