8fa439276d63a1766bb31f389c2069772a70fff9
[moodle.git] / question / classes / statistics / questions / calculator.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
19  *
20  * @package    core
21  * @subpackage questionbank
22  * @copyright  2013 Open University
23  * @author     Jamie Pratt <me@jamiep.org>
24  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25  */
27 namespace core_question\statistics\questions;
28 defined('MOODLE_INTERNAL') || die();
30 /**
31  * This class has methods to compute the question statistics from the raw data.
32  *
33  * @copyright 2013 Open University
34  * @author    Jamie Pratt <me@jamiep.org>
35  * @license   http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class calculator {
39     /**
40      * @var calculated[]
41      */
42     public $questionstats = array();
44     /**
45      * @var calculated_for_subquestion[]
46      */
47     public $subquestionstats = array();
49     /**
50      * @var float
51      */
52     protected $sumofmarkvariance = 0;
54     protected $randomselectors = array();
56     /**
57      * @var \progress_trace
58      */
59     protected $progress;
61     /**
62      * Constructor.
63      *
64      * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
65      *                              we expect some extra fields - slot, maxmark and number on the full question data objects.
66      * @param \core\progress\base|null $progress the element to send progress messages to, default is {@link \core\progress\null}.
67      */
68     public function __construct($questions, $progress = null) {
70         if ($progress === null) {
71             $progress = new \core\progress\null();
72         }
73         $this->progress = $progress;
75         foreach ($questions as $slot => $question) {
76             $this->questionstats[$slot] = new calculated();
77             $this->questionstats[$slot]->questionid = $question->id;
78             $this->questionstats[$slot]->question = $question;
79             $this->questionstats[$slot]->slot = $slot;
80             $this->questionstats[$slot]->positions = $question->number;
81             $this->questionstats[$slot]->maxmark = $question->maxmark;
82             $this->questionstats[$slot]->randomguessscore = $this->get_random_guess_score($question);
83         }
84     }
86     /**
87      * @param $qubaids \qubaid_condition
88      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
89      */
90     public function calculate($qubaids) {
92         $this->progress->start_progress('', 6);
94         list($lateststeps, $summarks) = $this->get_latest_steps($qubaids);
96         if ($lateststeps) {
97             $this->progress->start_progress('', count($lateststeps), 1);
98             // Compute the statistics of position, and for random questions, work
99             // out which questions appear in which positions.
100             $countdone = 1;
101             foreach ($lateststeps as $step) {
102                 $this->progress->progress($countdone);
103                 $countdone++;
104                 $this->initial_steps_walker($step, $this->questionstats[$step->slot], $summarks);
106                 // If this is a random question what is the real item being used?
107                 if ($step->questionid != $this->questionstats[$step->slot]->questionid) {
108                     if (!isset($this->subquestionstats[$step->questionid])) {
109                         $this->subquestionstats[$step->questionid] = new calculated_for_subquestion();
110                         $this->subquestionstats[$step->questionid]->questionid = $step->questionid;
111                         $this->subquestionstats[$step->questionid]->maxmark = $step->maxmark;
112                     } else if ($this->subquestionstats[$step->questionid]->maxmark != $step->maxmark) {
113                         $this->subquestionstats[$step->questionid]->differentweights = true;
114                     }
116                     $this->initial_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, false);
118                     $number = $this->questionstats[$step->slot]->question->number;
119                     $this->subquestionstats[$step->questionid]->usedin[$number] = $number;
121                     $randomselectorstring = $this->questionstats[$step->slot]->question->category. '/'
122                                                                     .$this->questionstats[$step->slot]->question->questiontext;
123                     if (!isset($this->randomselectors[$randomselectorstring])) {
124                         $this->randomselectors[$randomselectorstring] = array();
125                     }
126                     $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
127                 }
128             }
129             $this->progress->end_progress();
131             foreach ($this->randomselectors as $key => $notused) {
132                 ksort($this->randomselectors[$key]);
133             }
135             // Compute the statistics of question id, if we need any.
136             $subquestions = question_load_questions(array_keys($this->subquestionstats));
137             $this->progress->start_progress('', count($subquestions), 1);
138             $countdone = 1;
139             foreach ($subquestions as $qid => $subquestion) {
140                 $this->progress->progress($countdone);
141                 $countdone++;
142                 $this->subquestionstats[$qid]->question = $subquestion;
143                 $this->subquestionstats[$qid]->question->maxmark = $this->subquestionstats[$qid]->maxmark;
144                 $this->subquestionstats[$qid]->randomguessscore = $this->get_random_guess_score($subquestion);
146                 $this->initial_question_walker($this->subquestionstats[$qid]);
148                 if ($this->subquestionstats[$qid]->differentweights) {
149                     // TODO output here really sucks, but throwing is too severe.
150                     global $OUTPUT;
151                     $name = $this->subquestionstats[$qid]->question->name;
152                     echo $OUTPUT->notification( get_string('erroritemappearsmorethanoncewithdifferentweight',
153                                                             'quiz_statistics', $name));
154                 }
156                 if ($this->subquestionstats[$qid]->usedin) {
157                     sort($this->subquestionstats[$qid]->usedin, SORT_NUMERIC);
158                     $this->subquestionstats[$qid]->positions = implode(',', $this->subquestionstats[$qid]->usedin);
159                 } else {
160                     $this->subquestionstats[$qid]->positions = '';
161                 }
162             }
163             $this->progress->end_progress();
165             // Finish computing the averages, and put the subquestion data into the
166             // corresponding questions.
168             // This cannot be a foreach loop because we need to have both
169             // $question and $nextquestion available, but apart from that it is
170             // foreach ($this->questions as $qid => $question).
171             reset($this->questionstats);
172             $this->progress->start_progress('', count($this->questionstats), 1);
173             $countdone = 1;
174             while (list($slot, $questionstat) = each($this->questionstats)) {
175                 $this->progress->progress($countdone);
176                 $countdone++;
177                 $nextquestionstats = current($this->questionstats);
179                 $this->initial_question_walker($questionstat);
181                 if ($questionstat->question->qtype == 'random') {
182                     $randomselectorstring = $questionstat->question->category .'/'. $questionstat->question->questiontext;
183                     if ($nextquestionstats && $nextquestionstats->question->qtype == 'random') {
184                         $nextrandomselectorstring  =
185                             $nextquestionstats->question->category .'/'. $nextquestionstats->question->questiontext;
186                         if ($randomselectorstring == $nextrandomselectorstring) {
187                             continue; // Next loop iteration.
188                         }
189                     }
190                     if (isset($this->randomselectors[$randomselectorstring])) {
191                         $questionstat->subquestions = implode(',', $this->randomselectors[$randomselectorstring]);
192                     }
193                 }
194             }
195             $this->progress->end_progress();
197             // Go through the records one more time.
198             $this->progress->start_progress('', count($lateststeps), 1);
199             $countdone = 1;
200             foreach ($lateststeps as $step) {
201                 $this->progress->progress($countdone);
202                 $countdone++;
203                 $this->secondary_steps_walker($step, $this->questionstats[$step->slot], $summarks);
205                 if ($this->questionstats[$step->slot]->subquestions) {
206                     $this->secondary_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks);
207                 }
208             }
209             $this->progress->end_progress();
211             $this->progress->start_progress('', count($this->questionstats), 1);
212             $sumofcovariancewithoverallmark = 0;
213             $countdone = 1;
214             foreach ($this->questionstats as $questionstat) {
215                 $this->progress->progress($countdone);
216                 $countdone++;
217                 $this->secondary_question_walker($questionstat);
219                 $this->sumofmarkvariance += $questionstat->markvariance;
221                 if ($questionstat->covariancewithoverallmark >= 0) {
222                     $sumofcovariancewithoverallmark += sqrt($questionstat->covariancewithoverallmark);
223                 }
224             }
225             $this->progress->end_progress();
227             $this->progress->start_progress('', count($this->subquestionstats), 1);
228             $countdone = 1;
229             foreach ($this->subquestionstats as $subquestionstat) {
230                 $this->progress->progress($countdone);
231                 $countdone++;
232                 $this->secondary_question_walker($subquestionstat);
233             }
234             $this->progress->end_progress();
236             foreach ($this->questionstats as $questionstat) {
237                 if ($sumofcovariancewithoverallmark) {
238                     if ($questionstat->negcovar) {
239                         $questionstat->effectiveweight = null;
240                     } else {
241                         $questionstat->effectiveweight = 100 * sqrt($questionstat->covariancewithoverallmark) /
242                             $sumofcovariancewithoverallmark;
243                     }
244                 } else {
245                     $questionstat->effectiveweight = null;
246                 }
247             }
248             $this->cache_stats($qubaids);
250             // All finished.
251             $this->progress->end_progress();
252         }
253         return array($this->questionstats, $this->subquestionstats);
254     }
256     /**
257      * Load cached statistics from the database.
258      *
259      * @param $qubaids \qubaid_condition
260      * @return array containing two arrays calculated[] and calculated_for_subquestion[].
261      */
262     public function get_cached($qubaids) {
263         global $DB;
264         $timemodified = time() - self::TIME_TO_CACHE;
265         $questionstatrecs = $DB->get_records_select('question_statistics', 'hashcode = ? AND timemodified > ?',
266                                          array($qubaids->get_hash_code(), $timemodified));
268         $questionids = array();
269         foreach ($questionstatrecs as $fromdb) {
270             if (!$fromdb->slot) {
271                 $questionids[] = $fromdb->questionid;
272             }
273         }
274         $subquestions = question_load_questions($questionids);
275         foreach ($questionstatrecs as $fromdb) {
276             if ($fromdb->slot) {
277                 $this->questionstats[$fromdb->slot]->populate_from_record($fromdb);
278                 // Array created in constructor and populated from question.
279             } else {
280                 $this->subquestionstats[$fromdb->questionid] = new calculated_for_subquestion();
281                 $this->subquestionstats[$fromdb->questionid]->populate_from_record($fromdb);
282                 $this->subquestionstats[$fromdb->questionid]->question = $subquestions[$fromdb->questionid];
283             }
284         }
285         return array($this->questionstats, $this->subquestionstats);
286     }
288     /**
289      * Find time of non-expired statistics in the database.
290      *
291      * @param $qubaids \qubaid_condition
292      * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
293      */
294     public function get_last_calculated_time($qubaids) {
295         global $DB;
297         $timemodified = time() - self::TIME_TO_CACHE;
298         return $DB->get_field_select('question_statistics', 'timemodified', 'hashcode = ? AND timemodified > ?',
299                                      array($qubaids->get_hash_code(), $timemodified), IGNORE_MULTIPLE);
300     }
302     /** @var integer Time after which statistics are automatically recomputed. */
303     const TIME_TO_CACHE = 900; // 15 minutes.
305     /**
306      * Used when computing Coefficient of Internal Consistency by quiz statistics.
307      *
308      * @return float
309      */
310     public function get_sum_of_mark_variance() {
311         return $this->sumofmarkvariance;
312     }
314     /**
315      * @param $qubaids \qubaid_condition
316      * @return array with two items
317      *              - $lateststeps array of latest step data for the question usages
318      *              - $summarks    array of total marks for each usage, indexed by usage id
319      */
320     protected function get_latest_steps($qubaids) {
321         $dm = new \question_engine_data_mapper();
323         $fields = "    qas.id,
324     qa.questionusageid,
325     qa.questionid,
326     qa.slot,
327     qa.maxmark,
328     qas.fraction * qa.maxmark as mark";
330         $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, array_keys($this->questionstats), $fields);
331         $summarks = array();
332         if ($lateststeps) {
333             foreach ($lateststeps as $step) {
334                 if (!isset($summarks[$step->questionusageid])) {
335                     $summarks[$step->questionusageid] = 0;
336                 }
337                 $summarks[$step->questionusageid] += $step->mark;
338             }
339         }
341         return array($lateststeps, $summarks);
342     }
344     /**
345      * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
346      * and $stats->othermarksarray to include another state.
347      *
348      * @param object $step         the state to add to the statistics.
349      * @param calculated $stats        the question statistics we are accumulating.
350      * @param array  $summarks     of the sum of marks for each question usage, indexed by question usage id
351      * @param bool   $positionstat whether this is a statistic of position of question.
352      */
353     protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true) {
354         $stats->s++;
355         $stats->totalmarks += $step->mark;
356         $stats->markarray[] = $step->mark;
358         if ($positionstat) {
359             $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
360             $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
362         } else {
363             $stats->totalothermarks += $summarks[$step->questionusageid];
364             $stats->othermarksarray[] = $summarks[$step->questionusageid];
365         }
366     }
368     /**
369      * Perform some computations on the per-question statistics calculations after
370      * we have been through all the states.
371      *
372      * @param calculated $stats question stats to update.
373      */
374     protected function initial_question_walker($stats) {
375         $stats->markaverage = $stats->totalmarks / $stats->s;
377         if ($stats->maxmark != 0) {
378             $stats->facility = $stats->markaverage / $stats->maxmark;
379         } else {
380             $stats->facility = null;
381         }
383         $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
385         $stats->summarksaverage = $stats->totalsummarks / $stats->s;
387         sort($stats->markarray, SORT_NUMERIC);
388         sort($stats->othermarksarray, SORT_NUMERIC);
389     }
391     /**
392      * Now we know the averages, accumulate the date needed to compute the higher
393      * moments of the question scores.
394      *
395      * @param object $step        the state to add to the statistics.
396      * @param calculated $stats       the question statistics we are accumulating.
397      * @param array  $summarks    of the sum of marks for each question usage, indexed by question usage id
398      */
399     protected function secondary_steps_walker($step, $stats, $summarks) {
400         $markdifference = $step->mark - $stats->markaverage;
401         if ($stats->subquestion) {
402             $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
403         } else {
404             $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
405         }
406         $overallmarkdifference = $summarks[$step->questionusageid] - $stats->summarksaverage;
408         $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
409         $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
411         $stats->markvariancesum += pow($markdifference, 2);
412         $stats->othermarkvariancesum += pow($othermarkdifference, 2);
413         $stats->covariancesum += $markdifference * $othermarkdifference;
414         $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
415         $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
416     }
418     /**
419      * Perform more per-question statistics calculations.
420      *
421      * @param calculated $stats question stats to update.
422      */
423     protected function secondary_question_walker($stats) {
425         if ($stats->s > 1) {
426             $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
427             $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
428             $stats->covariance = $stats->covariancesum / ($stats->s - 1);
429             $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
430             $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
431                 ($stats->s - 1);
432             $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
434             if ($stats->covariancewithoverallmark >= 0) {
435                 $stats->negcovar = 0;
436             } else {
437                 $stats->negcovar = 1;
438             }
439         } else {
440             $stats->markvariance = null;
441             $stats->othermarkvariance = null;
442             $stats->covariance = null;
443             $stats->covariancemax = null;
444             $stats->covariancewithoverallmark = null;
445             $stats->sd = null;
446             $stats->negcovar = 0;
447         }
449         if ($stats->markvariance * $stats->othermarkvariance) {
450             $stats->discriminationindex = 100 * $stats->covariance /
451                 sqrt($stats->markvariance * $stats->othermarkvariance);
452         } else {
453             $stats->discriminationindex = null;
454         }
456         if ($stats->covariancemax) {
457             $stats->discriminativeefficiency = 100 * $stats->covariance /
458                 $stats->covariancemax;
459         } else {
460             $stats->discriminativeefficiency = null;
461         }
462     }
464     /**
465      * @param object $questiondata
466      * @return number the random guess score for this question.
467      */
468     protected function get_random_guess_score($questiondata) {
469         return \question_bank::get_qtype(
470             $questiondata->qtype, false)->get_random_guess_score($questiondata);
471     }
473     /**
474      * @param $qubaids \qubaid_condition
475      */
476     protected function cache_stats($qubaids) {
477         foreach ($this->questionstats as $questionstat) {
478             $questionstat->cache($qubaids);
479         }
481         foreach ($this->subquestionstats as $subquestionstat) {
482             $subquestionstat->cache($qubaids);
483         }
484     }