MDL-42957 quiz statistics : Hardcoded sql LIMIT clause
[moodle.git] / question / classes / statistics / questions / calculator.php
CommitLineData
e68e4ccf
JP
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
515b3ae6 18 * Question statistics calculator class. Used in the quiz statistics report but also available for use elsewhere.
e68e4ccf
JP
19 *
20 * @package core
21 * @subpackage questionbank
22 * @copyright 2013 Open University
23 * @author Jamie Pratt <me@jamiep.org>
24 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
25 */
26
515b3ae6 27namespace core_question\statistics\questions;
e68e4ccf
JP
28defined('MOODLE_INTERNAL') || die();
29
e68e4ccf
JP
30/**
31 * This class has methods to compute the question statistics from the raw data.
32 *
33 * @copyright 2013 Open University
34 * @author Jamie Pratt <me@jamiep.org>
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36 */
515b3ae6 37class calculator {
e68e4ccf
JP
38
39 /**
515b3ae6 40 * @var calculated[]
e68e4ccf 41 */
515b3ae6 42 public $questionstats = array();
e68e4ccf
JP
43
44 /**
515b3ae6 45 * @var calculated_for_subquestion[]
e68e4ccf 46 */
515b3ae6 47 public $subquestionstats = array();
e68e4ccf
JP
48
49 /**
515b3ae6 50 * @var float
e68e4ccf 51 */
515b3ae6 52 protected $sumofmarkvariance = 0;
e68e4ccf 53
515b3ae6 54 protected $randomselectors = array();
e68e4ccf 55
515b3ae6
JP
56 /**
57 * Constructor.
58 *
59 * @param object[] questions to analyze, keyed by slot, also analyses sub questions for random questions.
60 * we expect some extra fields - slot, maxmark and number on the full question data objects.
61 */
62 public function __construct($questions) {
63 foreach ($questions as $slot => $question) {
64 $this->questionstats[$slot] = new calculated();
65 $this->questionstats[$slot]->questionid = $question->id;
66 $this->questionstats[$slot]->question = $question;
67 $this->questionstats[$slot]->slot = $slot;
68 $this->questionstats[$slot]->positions = $question->number;
69 $this->questionstats[$slot]->maxmark = $question->maxmark;
70 $this->questionstats[$slot]->randomguessscore = $this->get_random_guess_score($question);
e68e4ccf 71 }
e68e4ccf
JP
72 }
73
74 /**
515b3ae6
JP
75 * @param $qubaids \qubaid_condition
76 * @return array containing two arrays calculated[] and calculated_for_subquestion[].
e68e4ccf
JP
77 */
78 public function calculate($qubaids) {
79 set_time_limit(0);
80
81 list($lateststeps, $summarks, $summarksavg) = $this->get_latest_steps($qubaids);
82
83 if ($lateststeps) {
e68e4ccf
JP
84
85 // Compute the statistics of position, and for random questions, work
86 // out which questions appear in which positions.
87 foreach ($lateststeps as $step) {
515b3ae6 88 $this->initial_steps_walker($step, $this->questionstats[$step->slot], $summarks);
e68e4ccf
JP
89
90 // If this is a random question what is the real item being used?
515b3ae6
JP
91 if ($step->questionid != $this->questionstats[$step->slot]->questionid) {
92 if (!isset($this->subquestionstats[$step->questionid])) {
93 $this->subquestionstats[$step->questionid] = new calculated_for_subquestion();
94 $this->subquestionstats[$step->questionid]->questionid = $step->questionid;
95 $this->subquestionstats[$step->questionid]->maxmark = $step->maxmark;
96 } else if ($this->subquestionstats[$step->questionid]->maxmark != $step->maxmark) {
97 $this->subquestionstats[$step->questionid]->differentweights = true;
e68e4ccf
JP
98 }
99
515b3ae6 100 $this->initial_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, false);
e68e4ccf 101
515b3ae6
JP
102 $number = $this->questionstats[$step->slot]->question->number;
103 $this->subquestionstats[$step->questionid]->usedin[$number] = $number;
e68e4ccf 104
515b3ae6
JP
105 $randomselectorstring = $this->questionstats[$step->slot]->question->category. '/'
106 .$this->questionstats[$step->slot]->question->questiontext;
e68e4ccf
JP
107 if (!isset($this->randomselectors[$randomselectorstring])) {
108 $this->randomselectors[$randomselectorstring] = array();
109 }
515b3ae6 110 $this->randomselectors[$randomselectorstring][$step->questionid] = $step->questionid;
e68e4ccf
JP
111 }
112 }
113
114 foreach ($this->randomselectors as $key => $notused) {
115 ksort($this->randomselectors[$key]);
116 }
117
118 // Compute the statistics of question id, if we need any.
515b3ae6
JP
119 $subquestions = question_load_questions(array_keys($this->subquestionstats));
120 foreach ($subquestions as $qid => $subquestion) {
121 $this->subquestionstats[$qid]->question = $subquestion;
122 $this->subquestionstats[$qid]->question->maxmark = $this->subquestionstats[$qid]->maxmark;
123 $this->subquestionstats[$qid]->randomguessscore = $this->get_random_guess_score($subquestion);
e68e4ccf 124
515b3ae6 125 $this->initial_question_walker($this->subquestionstats[$qid]);
e68e4ccf 126
515b3ae6 127 if ($this->subquestionstats[$qid]->differentweights) {
e68e4ccf
JP
128 // TODO output here really sucks, but throwing is too severe.
129 global $OUTPUT;
515b3ae6
JP
130 $name = $this->subquestionstats[$qid]->question->name;
131 echo $OUTPUT->notification( get_string('erroritemappearsmorethanoncewithdifferentweight',
132 'quiz_statistics', $name));
e68e4ccf
JP
133 }
134
515b3ae6
JP
135 if ($this->subquestionstats[$qid]->usedin) {
136 sort($this->subquestionstats[$qid]->usedin, SORT_NUMERIC);
137 $this->subquestionstats[$qid]->positions = implode(',', $this->subquestionstats[$qid]->usedin);
e68e4ccf 138 } else {
515b3ae6 139 $this->subquestionstats[$qid]->positions = '';
e68e4ccf
JP
140 }
141 }
142
143 // Finish computing the averages, and put the subquestion data into the
144 // corresponding questions.
145
146 // This cannot be a foreach loop because we need to have both
147 // $question and $nextquestion available, but apart from that it is
148 // foreach ($this->questions as $qid => $question).
515b3ae6
JP
149 reset($this->questionstats);
150 while (list($slot, $questionstat) = each($this->questionstats)) {
151 $nextquestionstats = current($this->questionstats);
152
153 $this->initial_question_walker($questionstat);
154
155 if ($questionstat->question->qtype == 'random') {
156 $randomselectorstring = $questionstat->question->category .'/'. $questionstat->question->questiontext;
157 if ($nextquestionstats && $nextquestionstats->question->qtype == 'random') {
158 $nextrandomselectorstring =
159 $nextquestionstats->question->category .'/'. $nextquestionstats->question->questiontext;
e68e4ccf
JP
160 if ($randomselectorstring == $nextrandomselectorstring) {
161 continue; // Next loop iteration.
162 }
163 }
164 if (isset($this->randomselectors[$randomselectorstring])) {
515b3ae6 165 $questionstat->subquestions = implode(',', $this->randomselectors[$randomselectorstring]);
e68e4ccf
JP
166 }
167 }
168 }
169
170 // Go through the records one more time.
171 foreach ($lateststeps as $step) {
515b3ae6 172 $this->secondary_steps_walker($step, $this->questionstats[$step->slot], $summarks, $summarksavg);
e68e4ccf 173
59abb707 174 if ($this->questionstats[$step->slot]->subquestions) {
515b3ae6 175 $this->secondary_steps_walker($step, $this->subquestionstats[$step->questionid], $summarks, $summarksavg);
e68e4ccf
JP
176 }
177 }
178
179 $sumofcovariancewithoverallmark = 0;
515b3ae6
JP
180 foreach ($this->questionstats as $questionstat) {
181 $this->secondary_question_walker($questionstat);
e68e4ccf 182
515b3ae6 183 $this->sumofmarkvariance += $questionstat->markvariance;
e68e4ccf 184
515b3ae6
JP
185 if ($questionstat->covariancewithoverallmark >= 0) {
186 $sumofcovariancewithoverallmark += sqrt($questionstat->covariancewithoverallmark);
e68e4ccf
JP
187 }
188 }
189
515b3ae6
JP
190 foreach ($this->subquestionstats as $subquestionstat) {
191 $this->secondary_question_walker($subquestionstat);
e68e4ccf
JP
192 }
193
515b3ae6 194 foreach ($this->questionstats as $questionstat) {
e68e4ccf 195 if ($sumofcovariancewithoverallmark) {
515b3ae6
JP
196 if ($questionstat->negcovar) {
197 $questionstat->effectiveweight = null;
e68e4ccf 198 } else {
515b3ae6 199 $questionstat->effectiveweight = 100 * sqrt($questionstat->covariancewithoverallmark) /
e68e4ccf
JP
200 $sumofcovariancewithoverallmark;
201 }
202 } else {
515b3ae6 203 $questionstat->effectiveweight = null;
e68e4ccf
JP
204 }
205 }
206 $this->cache_stats($qubaids);
207 }
515b3ae6 208 return array($this->questionstats, $this->subquestionstats);
e68e4ccf
JP
209 }
210
211 /**
515b3ae6
JP
212 * Load cached statistics from the database.
213 *
214 * @param $qubaids \qubaid_condition
215 * @return array containing two arrays calculated[] and calculated_for_subquestion[].
e68e4ccf 216 */
515b3ae6 217 public function get_cached($qubaids) {
e68e4ccf 218 global $DB;
515b3ae6 219 $timemodified = time() - self::TIME_TO_CACHE;
59abb707 220 $questionstatrecs = $DB->get_records_select('question_statistics', 'hashcode = ? AND timemodified > ?',
515b3ae6
JP
221 array($qubaids->get_hash_code(), $timemodified));
222
223 $questionids = array();
224 foreach ($questionstatrecs as $fromdb) {
225 if (!$fromdb->slot) {
226 $questionids[] = $fromdb->questionid;
227 }
e68e4ccf 228 }
515b3ae6
JP
229 $subquestions = question_load_questions($questionids);
230 foreach ($questionstatrecs as $fromdb) {
231 if ($fromdb->slot) {
232 $this->questionstats[$fromdb->slot]->populate_from_record($fromdb);
233 // Array created in constructor and populated from question.
234 } else {
235 $this->subquestionstats[$fromdb->questionid] = new calculated_for_subquestion();
236 $this->subquestionstats[$fromdb->questionid]->populate_from_record($fromdb);
237 $this->subquestionstats[$fromdb->questionid]->question = $subquestions[$fromdb->questionid];
238 }
239 }
240 return array($this->questionstats, $this->subquestionstats);
241 }
e68e4ccf 242
515b3ae6
JP
243 /**
244 * Find time of non-expired statistics in the database.
245 *
246 * @param $qubaids \qubaid_condition
247 * @return integer|boolean Time of cached record that matches this qubaid_condition or false is non found.
248 */
249 public function get_last_calculated_time($qubaids) {
250 global $DB;
251
252 $timemodified = time() - self::TIME_TO_CACHE;
e4b17111 253 return $DB->get_field_select('question_statistics', 'timemodified', 'hashcode = ? AND timemodified > ?',
515b3ae6
JP
254 array($qubaids->get_hash_code(), $timemodified));
255 }
256
257 /** @var integer Time after which statistics are automatically recomputed. */
258 const TIME_TO_CACHE = 900; // 15 minutes.
259
260 /**
261 * Used when computing Coefficient of Internal Consistency by quiz statistics.
262 *
263 * @return float
264 */
265 public function get_sum_of_mark_variance() {
266 return $this->sumofmarkvariance;
267 }
268
269 /**
270 * @param $qubaids \qubaid_condition
271 * @return array with three items
272 * - $lateststeps array of latest step data for the question usages
273 * - $summarks array of total marks for each usage, indexed by usage id
274 * - $summarksavg the average of the total marks over all the usages
275 */
276 protected function get_latest_steps($qubaids) {
277 $dm = new \question_engine_data_mapper();
278
279 $fields = " qas.id,
280 qa.questionusageid,
281 qa.questionid,
282 qa.slot,
283 qa.maxmark,
284 qas.fraction * qa.maxmark as mark";
285
286 $lateststeps = $dm->load_questions_usages_latest_steps($qubaids, array_keys($this->questionstats), $fields);
287 $summarks = array();
288 if ($lateststeps) {
289 foreach ($lateststeps as $step) {
290 if (!isset($summarks[$step->questionusageid])) {
291 $summarks[$step->questionusageid] = 0;
292 }
293 $summarks[$step->questionusageid] += $step->mark;
294 }
295 $summarksavg = array_sum($summarks) / count($summarks);
296 } else {
297 $summarksavg = null;
e68e4ccf
JP
298 }
299
515b3ae6 300 return array($lateststeps, $summarks, $summarksavg);
e68e4ccf
JP
301 }
302
303 /**
304 * Update $stats->totalmarks, $stats->markarray, $stats->totalothermarks
305 * and $stats->othermarksarray to include another state.
306 *
515b3ae6
JP
307 * @param object $step the state to add to the statistics.
308 * @param calculated $stats the question statistics we are accumulating.
309 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
310 * @param bool $positionstat whether this is a statistic of position of question.
e68e4ccf
JP
311 */
312 protected function initial_steps_walker($step, $stats, $summarks, $positionstat = true) {
313 $stats->s++;
314 $stats->totalmarks += $step->mark;
315 $stats->markarray[] = $step->mark;
316
317 if ($positionstat) {
318 $stats->totalothermarks += $summarks[$step->questionusageid] - $step->mark;
319 $stats->othermarksarray[] = $summarks[$step->questionusageid] - $step->mark;
320
321 } else {
322 $stats->totalothermarks += $summarks[$step->questionusageid];
323 $stats->othermarksarray[] = $summarks[$step->questionusageid];
324 }
325 }
326
327 /**
328 * Perform some computations on the per-question statistics calculations after
329 * we have been through all the states.
330 *
515b3ae6 331 * @param calculated $stats question stats to update.
e68e4ccf
JP
332 */
333 protected function initial_question_walker($stats) {
334 $stats->markaverage = $stats->totalmarks / $stats->s;
335
336 if ($stats->maxmark != 0) {
337 $stats->facility = $stats->markaverage / $stats->maxmark;
338 } else {
339 $stats->facility = null;
340 }
341
342 $stats->othermarkaverage = $stats->totalothermarks / $stats->s;
343
344 sort($stats->markarray, SORT_NUMERIC);
345 sort($stats->othermarksarray, SORT_NUMERIC);
346 }
347
348 /**
349 * Now we know the averages, accumulate the date needed to compute the higher
350 * moments of the question scores.
351 *
515b3ae6
JP
352 * @param object $step the state to add to the statistics.
353 * @param calculated $stats the question statistics we are accumulating.
354 * @param array $summarks of the sum of marks for each question usage, indexed by question usage id
e68e4ccf
JP
355 * @param float $summarksavg the average sum of marks for all question usages
356 */
357 protected function secondary_steps_walker($step, $stats, $summarks, $summarksavg) {
358 $markdifference = $step->mark - $stats->markaverage;
359 if ($stats->subquestion) {
360 $othermarkdifference = $summarks[$step->questionusageid] - $stats->othermarkaverage;
361 } else {
515b3ae6 362 $othermarkdifference = $summarks[$step->questionusageid] - $step->mark - $stats->othermarkaverage;
e68e4ccf
JP
363 }
364 $overallmarkdifference = $summarks[$step->questionusageid] - $summarksavg;
365
366 $sortedmarkdifference = array_shift($stats->markarray) - $stats->markaverage;
515b3ae6 367 $sortedothermarkdifference = array_shift($stats->othermarksarray) - $stats->othermarkaverage;
e68e4ccf
JP
368
369 $stats->markvariancesum += pow($markdifference, 2);
370 $stats->othermarkvariancesum += pow($othermarkdifference, 2);
371 $stats->covariancesum += $markdifference * $othermarkdifference;
372 $stats->covariancemaxsum += $sortedmarkdifference * $sortedothermarkdifference;
373 $stats->covariancewithoverallmarksum += $markdifference * $overallmarkdifference;
374 }
375
376 /**
377 * Perform more per-question statistics calculations.
378 *
515b3ae6 379 * @param calculated $stats question stats to update.
e68e4ccf
JP
380 */
381 protected function secondary_question_walker($stats) {
515b3ae6 382
e68e4ccf
JP
383 if ($stats->s > 1) {
384 $stats->markvariance = $stats->markvariancesum / ($stats->s - 1);
385 $stats->othermarkvariance = $stats->othermarkvariancesum / ($stats->s - 1);
386 $stats->covariance = $stats->covariancesum / ($stats->s - 1);
387 $stats->covariancemax = $stats->covariancemaxsum / ($stats->s - 1);
388 $stats->covariancewithoverallmark = $stats->covariancewithoverallmarksum /
515b3ae6 389 ($stats->s - 1);
e68e4ccf
JP
390 $stats->sd = sqrt($stats->markvariancesum / ($stats->s - 1));
391
515b3ae6
JP
392 if ($stats->covariancewithoverallmark >= 0) {
393 $stats->negcovar = 0;
394 } else {
395 $stats->negcovar = 1;
396 }
e68e4ccf
JP
397 } else {
398 $stats->markvariance = null;
399 $stats->othermarkvariance = null;
400 $stats->covariance = null;
401 $stats->covariancemax = null;
402 $stats->covariancewithoverallmark = null;
403 $stats->sd = null;
515b3ae6 404 $stats->negcovar = 0;
e68e4ccf
JP
405 }
406
407 if ($stats->markvariance * $stats->othermarkvariance) {
408 $stats->discriminationindex = 100 * $stats->covariance /
515b3ae6 409 sqrt($stats->markvariance * $stats->othermarkvariance);
e68e4ccf
JP
410 } else {
411 $stats->discriminationindex = null;
412 }
413
414 if ($stats->covariancemax) {
415 $stats->discriminativeefficiency = 100 * $stats->covariance /
515b3ae6 416 $stats->covariancemax;
e68e4ccf
JP
417 } else {
418 $stats->discriminativeefficiency = null;
419 }
420 }
421
422 /**
423 * @param object $questiondata
424 * @return number the random guess score for this question.
425 */
426 protected function get_random_guess_score($questiondata) {
515b3ae6
JP
427 return \question_bank::get_qtype(
428 $questiondata->qtype, false)->get_random_guess_score($questiondata);
e68e4ccf
JP
429 }
430
431 /**
515b3ae6 432 * @param $qubaids \qubaid_condition
e68e4ccf 433 */
515b3ae6
JP
434 protected function cache_stats($qubaids) {
435 foreach ($this->questionstats as $questionstat) {
436 $questionstat->cache($qubaids);
e68e4ccf
JP
437 }
438
515b3ae6
JP
439 foreach ($this->subquestionstats as $subquestionstat) {
440 $subquestionstat->cache($qubaids);
e68e4ccf
JP
441 }
442 }
443
444}