MDL-60357 core_search: Future modified times cause serious problems
authorsam marshall <s.marshall@open.ac.uk>
Thu, 5 Oct 2017 15:04:44 +0000 (16:04 +0100)
committersam marshall <s.marshall@open.ac.uk>
Fri, 6 Oct 2017 12:38:32 +0000 (13:38 +0100)
search/classes/manager.php
search/classes/skip_future_documents_iterator.php [new file with mode: 0644]
search/tests/manager_test.php

index 3e8daac..698c447 100644 (file)
@@ -698,8 +698,10 @@ class manager {
             if ($timelimit) {
                 $options['stopat'] = $stopat;
             }
-            $iterator = new \core\dml\recordset_walk($recordset, array($searcharea, 'get_document'), $options);
+            $iterator = new skip_future_documents_iterator(new \core\dml\recordset_walk(
+                    $recordset, array($searcharea, 'get_document'), $options));
             $result = $this->engine->add_documents($iterator, $searcharea, $options);
+            $recordset->close();
             if (count($result) === 5) {
                 list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial) = $result;
             } else {
diff --git a/search/classes/skip_future_documents_iterator.php b/search/classes/skip_future_documents_iterator.php
new file mode 100644 (file)
index 0000000..3204c83
--- /dev/null
@@ -0,0 +1,94 @@
+<?php
+// This file is part of Moodle - http://moodle.org/
+//
+// Moodle is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// Moodle is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
+
+/**
+ * Iterator for skipping search recordset documents that are in the future.
+ *
+ * @package core_search
+ * @copyright 2017 The Open University
+ * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
+ */
+
+namespace core_search;
+
+defined('MOODLE_INTERNAL') || die();
+
+/**
+ * Iterator for skipping search recordset documents that are in the future.
+ *
+ * This iterator stops iterating if it receives a document that was modified later than the
+ * specified cut-off time (usually current time).
+ *
+ * This iterator assumes that its parent iterator returns documents in modified order (which is
+ * required to be the case for search indexing). This means we will stop retrieving data from the
+ * recordset
+ *
+ * @copyright 2017 The Open University
+ * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
+ */
+class skip_future_documents_iterator implements \Iterator {
+    /** @var \Iterator Parent iterator */
+    protected $parent;
+
+    /** @var int Cutoff time; anything later than this will cause the iterator to stop */
+    protected $cutoff;
+
+    /**
+     * Constructor.
+     *
+     * @param \Iterator $parent Parent iterator, must return search documents in modified order
+     * @param int $cutoff Cut-off time, default is current time
+     */
+    public function __construct(\Iterator $parent, $cutoff = 0) {
+        $this->parent = $parent;
+        if ($cutoff) {
+            $this->cutoff = $cutoff;
+        } else {
+            $this->cutoff = time();
+        }
+    }
+
+    public function current() {
+        return $this->parent->current();
+    }
+
+    public function next() {
+        $this->parent->next();
+    }
+
+    public function key() {
+        return $this->parent->key();
+    }
+
+    public function valid() {
+        // Check parent.
+        if (!$this->parent->valid()) {
+            return false;
+        }
+
+        // See if document is after the cut-off time.
+        $doc = $this->parent->current();
+        if ($doc->get('modified') > $this->cutoff) {
+            return false;
+        }
+
+        return true;
+    }
+
+    public function rewind() {
+        $this->parent->rewind();
+    }
+}
index 9ae2a4d..c5f8886 100644 (file)
@@ -261,6 +261,58 @@ class search_manager_testcase extends advanced_testcase {
         $this->assertFalse(get_config($componentname, $varname . '_partial'));
     }
 
+    /**
+     * Tests that documents with modified time in the future are NOT indexed (as this would cause
+     * a problem by preventing it from indexing other documents modified between now and the future
+     * date).
+     */
+    public function test_future_documents() {
+        $this->resetAfterTest();
+
+        // Create a course and a forum.
+        $generator = $this->getDataGenerator();
+        $course = $generator->create_course();
+        $forum = $generator->create_module('forum', ['course' => $course->id]);
+
+        // Index everything up to current. Ensure the course is older than current second so it
+        // definitely doesn't get indexed again next time.
+        $this->waitForSecond();
+        $search = testable_core_search::instance();
+        $search->index(false, 0);
+        $search->get_engine()->get_and_clear_added_documents();
+
+        // Add 2 discussions to the forum, one of which happend just now, but the other is
+        // incorrectly set to the future.
+        $now = time();
+        $userid = get_admin()->id;
+        $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
+                'forum' => $forum->id, 'userid' => $userid, 'timemodified' => $now,
+                'name' => 'Frog']);
+        $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
+                'forum' => $forum->id, 'userid' => $userid, 'timemodified' => $now + 100,
+                'name' => 'Toad']);
+
+        // Wait for a second so we're not actually on the same second as the forum post (there's a
+        // 1 second overlap between indexing; it would get indexed in both checks below otherwise).
+        $this->waitForSecond();
+
+        // Index.
+        $search->index(false);
+        $added = $search->get_engine()->get_and_clear_added_documents();
+        $this->assertCount(1, $added);
+        $this->assertEquals('Frog', $added[0]->get('title'));
+
+        // Check latest time - it should be the same as $now, not the + 100.
+        $searcharea = $search->get_search_area($this->forumpostareaid);
+        list($componentname, $varname) = $searcharea->get_config_var_name();
+        $this->assertEquals($now, get_config($componentname, $varname . '_lastindexrun'));
+
+        // Index again - there should be nothing to index this time.
+        $search->index(false);
+        $added = $search->get_engine()->get_and_clear_added_documents();
+        $this->assertCount(0, $added);
+    }
+
     /**
      * Adding this test here as get_areas_user_accesses process is the same, results just depend on the context level.
      *