From 2d2fcc1c9ef9ba2045387de2a823950cef821f06 Mon Sep 17 00:00:00 2001 From: sam marshall Date: Thu, 5 Oct 2017 16:04:44 +0100 Subject: [PATCH] MDL-60357 core_search: Future modified times cause serious problems --- search/classes/manager.php | 4 +- .../skip_future_documents_iterator.php | 94 +++++++++++++++++++ search/tests/manager_test.php | 52 ++++++++++ 3 files changed, 149 insertions(+), 1 deletion(-) create mode 100644 search/classes/skip_future_documents_iterator.php diff --git a/search/classes/manager.php b/search/classes/manager.php index 3e8daac9d8e..698c447d506 100644 --- a/search/classes/manager.php +++ b/search/classes/manager.php @@ -698,8 +698,10 @@ class manager { if ($timelimit) { $options['stopat'] = $stopat; } - $iterator = new \core\dml\recordset_walk($recordset, array($searcharea, 'get_document'), $options); + $iterator = new skip_future_documents_iterator(new \core\dml\recordset_walk( + $recordset, array($searcharea, 'get_document'), $options)); $result = $this->engine->add_documents($iterator, $searcharea, $options); + $recordset->close(); if (count($result) === 5) { list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial) = $result; } else { diff --git a/search/classes/skip_future_documents_iterator.php b/search/classes/skip_future_documents_iterator.php new file mode 100644 index 00000000000..3204c833f37 --- /dev/null +++ b/search/classes/skip_future_documents_iterator.php @@ -0,0 +1,94 @@ +. + +/** + * Iterator for skipping search recordset documents that are in the future. + * + * @package core_search + * @copyright 2017 The Open University + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ + +namespace core_search; + +defined('MOODLE_INTERNAL') || die(); + +/** + * Iterator for skipping search recordset documents that are in the future. + * + * This iterator stops iterating if it receives a document that was modified later than the + * specified cut-off time (usually current time). + * + * This iterator assumes that its parent iterator returns documents in modified order (which is + * required to be the case for search indexing). This means we will stop retrieving data from the + * recordset + * + * @copyright 2017 The Open University + * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later + */ +class skip_future_documents_iterator implements \Iterator { + /** @var \Iterator Parent iterator */ + protected $parent; + + /** @var int Cutoff time; anything later than this will cause the iterator to stop */ + protected $cutoff; + + /** + * Constructor. + * + * @param \Iterator $parent Parent iterator, must return search documents in modified order + * @param int $cutoff Cut-off time, default is current time + */ + public function __construct(\Iterator $parent, $cutoff = 0) { + $this->parent = $parent; + if ($cutoff) { + $this->cutoff = $cutoff; + } else { + $this->cutoff = time(); + } + } + + public function current() { + return $this->parent->current(); + } + + public function next() { + $this->parent->next(); + } + + public function key() { + return $this->parent->key(); + } + + public function valid() { + // Check parent. + if (!$this->parent->valid()) { + return false; + } + + // See if document is after the cut-off time. + $doc = $this->parent->current(); + if ($doc->get('modified') > $this->cutoff) { + return false; + } + + return true; + } + + public function rewind() { + $this->parent->rewind(); + } +} diff --git a/search/tests/manager_test.php b/search/tests/manager_test.php index 9ae2a4dab37..c5f8886d355 100644 --- a/search/tests/manager_test.php +++ b/search/tests/manager_test.php @@ -261,6 +261,58 @@ class search_manager_testcase extends advanced_testcase { $this->assertFalse(get_config($componentname, $varname . '_partial')); } + /** + * Tests that documents with modified time in the future are NOT indexed (as this would cause + * a problem by preventing it from indexing other documents modified between now and the future + * date). + */ + public function test_future_documents() { + $this->resetAfterTest(); + + // Create a course and a forum. + $generator = $this->getDataGenerator(); + $course = $generator->create_course(); + $forum = $generator->create_module('forum', ['course' => $course->id]); + + // Index everything up to current. Ensure the course is older than current second so it + // definitely doesn't get indexed again next time. + $this->waitForSecond(); + $search = testable_core_search::instance(); + $search->index(false, 0); + $search->get_engine()->get_and_clear_added_documents(); + + // Add 2 discussions to the forum, one of which happend just now, but the other is + // incorrectly set to the future. + $now = time(); + $userid = get_admin()->id; + $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id, + 'forum' => $forum->id, 'userid' => $userid, 'timemodified' => $now, + 'name' => 'Frog']); + $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id, + 'forum' => $forum->id, 'userid' => $userid, 'timemodified' => $now + 100, + 'name' => 'Toad']); + + // Wait for a second so we're not actually on the same second as the forum post (there's a + // 1 second overlap between indexing; it would get indexed in both checks below otherwise). + $this->waitForSecond(); + + // Index. + $search->index(false); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(1, $added); + $this->assertEquals('Frog', $added[0]->get('title')); + + // Check latest time - it should be the same as $now, not the + 100. + $searcharea = $search->get_search_area($this->forumpostareaid); + list($componentname, $varname) = $searcharea->get_config_var_name(); + $this->assertEquals($now, get_config($componentname, $varname . '_lastindexrun')); + + // Index again - there should be nothing to index this time. + $search->index(false); + $added = $search->get_engine()->get_and_clear_added_documents(); + $this->assertCount(0, $added); + } + /** * Adding this test here as get_areas_user_accesses process is the same, results just depend on the context level. * -- 2.43.0