MDL-55356 core_search: New manager API index_context
authorsam marshall <s.marshall@open.ac.uk>
Mon, 11 Sep 2017 16:30:10 +0000 (17:30 +0100)
committersam marshall <s.marshall@open.ac.uk>
Wed, 11 Oct 2017 16:17:10 +0000 (17:17 +0100)
New function which uses the get_document_recordset function in
search areas to support full and partial indexing of a given
context.

search/classes/base.php
search/classes/engine.php
search/classes/manager.php
search/tests/manager_test.php

index 3ba3297..2c10388 100644 (file)
@@ -310,6 +310,9 @@ abstract class base {
      *     indexfiles => File indexing is enabled if true.
      *     lastindexedtime => The last time this area was indexed. 0 if never indexed.
      *
+     * The lastindexedtime value is not set if indexing a specific context rather than the whole
+     * system.
+     *
      * @param \stdClass $record A record containing, at least, the indexed document id and a modified timestamp
      * @param array     $options Options for document creation
      * @return \core_search\document
index 6957139..de5fff2 100644 (file)
@@ -229,7 +229,7 @@ abstract class engine {
                 continue;
             }
 
-            if ($options['lastindexedtime'] == 0) {
+            if (isset($options['lastindexedtime']) && $options['lastindexedtime'] == 0) {
                 // If we have never indexed this area before, it must be new.
                 $document->set_is_new(true);
             }
index 47b4b23..e8a43a8 100644 (file)
@@ -632,7 +632,7 @@ class manager {
      *
      * @param bool $fullindex Whether we should reindex everything or not.
      * @param float $timelimit Time limit in seconds (0 = no time limit)
-     * @param \progress_trace $progress Optional class for tracking progress
+     * @param \progress_trace|null $progress Optional class for tracking progress
      * @throws \moodle_exception
      * @return bool Whether there was any updated document or not.
      */
@@ -775,6 +775,150 @@ class manager {
         return (bool)$sumdocs;
     }
 
+    /**
+     * Indexes or reindexes a specific context of the system, e.g. one course.
+     *
+     * The function returns an object with field 'complete' (true or false).
+     *
+     * This function supports partial indexing via the time limit parameter. If the time limit
+     * expires, it will return values for $startfromarea and $startfromtime which can be passed
+     * next time to continue indexing.
+     *
+     * @param \context $context Context to restrict index.
+     * @param string $singleareaid If specified, indexes only the given area.
+     * @param float $timelimit Time limit in seconds (0 = no time limit)
+     * @param \progress_trace|null $progress Optional class for tracking progress
+     * @param string $startfromarea Area to start from
+     * @param int $startfromtime Timestamp to start from
+     * @return \stdClass Object indicating success
+     */
+    public function index_context($context, $singleareaid = '', $timelimit = 0,
+            \progress_trace $progress = null, $startfromarea = '', $startfromtime = 0) {
+        if (!$progress) {
+            $progress = new \null_progress_trace();
+        }
+
+        // Work out time to stop, if limited.
+        if ($timelimit) {
+            // Decide time to stop.
+            $stopat = microtime(true) + $timelimit;
+        }
+
+        // No PHP time limit.
+        \core_php_time_limit::raise();
+
+        // Notify the engine that an index starting.
+        $this->engine->index_starting(false);
+
+        $sumdocs = 0;
+
+        // Get all search areas, in consistent order.
+        $searchareas = $this->get_search_areas_list(true);
+        ksort($searchareas);
+
+        // Are we skipping past some that were handled previously?
+        $skipping = $startfromarea ? true : false;
+
+        foreach ($searchareas as $areaid => $searcharea) {
+            // If we're only processing one area id, skip all the others.
+            if ($singleareaid && $singleareaid !== $areaid) {
+                continue;
+            }
+
+            // If we're skipping to a later area, continue through the loop.
+            $referencestarttime = 0;
+            if ($skipping) {
+                if ($areaid !== $startfromarea) {
+                    continue;
+                }
+                // Stop skipping and note the reference start time.
+                $skipping = false;
+                $referencestarttime = $startfromtime;
+            }
+
+            $progress->output('Processing area: ' . $searcharea->get_visible_name());
+
+            $elapsed = microtime(true);
+
+            // Get the recordset of all documents from the area for this context.
+            $recordset = $searcharea->get_document_recordset($referencestarttime, $context);
+            if (!$recordset) {
+                if ($recordset === null) {
+                    $progress->output('Skipping (not relevant to context).', 1);
+                } else {
+                    $progress->output('Skipping (does not support context indexing).', 1);
+                }
+                continue;
+            }
+
+            // Notify the engine that an area is starting.
+            $this->engine->area_index_starting($searcharea, false);
+
+            // Work out search options.
+            $options = [];
+            $options['indexfiles'] = $this->engine->file_indexing_enabled() &&
+                    $searcharea->uses_file_indexing();
+            if ($timelimit) {
+                $options['stopat'] = $stopat;
+            }
+
+            // Construct iterator which will use get_document on the recordset results.
+            $iterator = new \core\dml\recordset_walk($recordset,
+                    array($searcharea, 'get_document'), $options);
+
+            // Use this iterator to add documents.
+            $result = $this->engine->add_documents($iterator, $searcharea, $options);
+            if (count($result) === 5) {
+                list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc, $partial) = $result;
+            } else {
+                // Backward compatibility for engines that don't support partial adding.
+                list($numrecords, $numdocs, $numdocsignored, $lastindexeddoc) = $result;
+                debugging('engine::add_documents() should return $partial (4-value return is deprecated)',
+                        DEBUG_DEVELOPER);
+                $partial = false;
+            }
+
+            if ($numdocs > 0) {
+                $elapsed = round((microtime(true) - $elapsed), 3);
+                $progress->output('Processed ' . $numrecords . ' records containing ' . $numdocs .
+                        ' documents, in ' . $elapsed . ' seconds' .
+                        ($partial ? ' (not complete)' : '') . '.', 1);
+            } else {
+                $progress->output('No documents to index.', 1);
+            }
+
+            // Notify the engine this area is complete, but don't store any times as this is not
+            // part of the 'normal' search index.
+            if (!$this->engine->area_index_complete($searcharea, $numdocs, false)) {
+                $progress->output('Engine reported error.', 1);
+            }
+
+            if ($partial && $timelimit && (microtime(true) >= $stopat)) {
+                $progress->output('Stopping indexing due to time limit.');
+                break;
+            }
+        }
+
+        if ($sumdocs > 0) {
+            $event = \core\event\search_indexed::create(
+                    array('context' => $context));
+            $event->trigger();
+        }
+
+        $this->engine->index_complete($sumdocs, false);
+
+        // Indicate in result whether we completed indexing, or only part of it.
+        $result = new \stdClass();
+        if ($partial) {
+            $result->complete = false;
+            $result->startfromarea = $areaid;
+            $result->startfromtime = $lastindexeddoc;
+        } else {
+            $result->complete = true;
+        }
+        return $result;
+    }
+
     /**
      * Resets areas config.
      *
index 0c1984c..33fa8c5 100644 (file)
@@ -261,6 +261,106 @@ class search_manager_testcase extends advanced_testcase {
         $this->assertFalse(get_config($componentname, $varname . '_partial'));
     }
 
+    /**
+     * Tests that indexing a specified context works correctly.
+     */
+    public function test_context_indexing() {
+        global $USER;
+
+        $this->resetAfterTest();
+        $this->setAdminUser();
+
+        // Create a course and two forums and a page.
+        $generator = $this->getDataGenerator();
+        $course = $generator->create_course();
+        $now = time();
+        $forum1 = $generator->create_module('forum', ['course' => $course->id]);
+        $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
+                'forum' => $forum1->id, 'userid' => $USER->id, 'timemodified' => $now,
+                'name' => 'Frog']);
+        $this->waitForSecond();
+        $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
+                'forum' => $forum1->id, 'userid' => $USER->id, 'timemodified' => $now + 2,
+                'name' => 'Zombie']);
+        $forum2 = $generator->create_module('forum', ['course' => $course->id]);
+        $this->waitForSecond();
+        $generator->get_plugin_generator('mod_forum')->create_discussion(['course' => $course->id,
+                'forum' => $forum2->id, 'userid' => $USER->id, 'timemodified' => $now + 1,
+                'name' => 'Toad']);
+        $generator->create_module('page', ['course' => $course->id]);
+        $generator->create_module('forum', ['course' => $course->id]);
+
+        // Index forum 1 only.
+        $search = testable_core_search::instance();
+        $buffer = new progress_trace_buffer(new text_progress_trace(), false);
+        $result = $search->index_context(\context_module::instance($forum1->cmid), '', 0, $buffer);
+        $this->assertTrue($result->complete);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+
+        // Confirm that output only processed 1 forum activity and 2 posts.
+        var_dump(strpos($log, "area: Forum - activity information\n  Processed 1 "));
+        $this->assertNotFalse(strpos($log, "area: Forum - activity information\n  Processed 1 "));
+        $this->assertNotFalse(strpos($log, "area: Forum - posts\n  Processed 2 "));
+
+        // Confirm that some areas for different types of context were skipped.
+        $this->assertNotFalse(strpos($log, "area: Users\n  Skipping"));
+        $this->assertNotFalse(strpos($log, "area: My courses\n  Skipping"));
+
+        // Confirm that another module area had no results.
+        $this->assertNotFalse(strpos($log, "area: Page\n  No documents"));
+
+        // Index whole course.
+        $result = $search->index_context(\context_course::instance($course->id), '', 0, $buffer);
+        $this->assertTrue($result->complete);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+
+        // Confirm that output processed 3 forum activities and 3 posts.
+        $this->assertNotFalse(strpos($log, "area: Forum - activity information\n  Processed 3 "));
+        $this->assertNotFalse(strpos($log, "area: Forum - posts\n  Processed 3 "));
+
+        // The course area was also included this time.
+        $this->assertNotFalse(strpos($log, "area: My courses\n  Processed 1 "));
+
+        // Confirm that another module area had results too.
+        $this->assertNotFalse(strpos($log, "area: Page\n  Processed 1 "));
+
+        // Index whole course, but only forum posts.
+        $result = $search->index_context(\context_course::instance($course->id), 'mod_forum-post',
+                0, $buffer);
+        $this->assertTrue($result->complete);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+
+        // Confirm that output processed 3 posts but not forum activities.
+        $this->assertFalse(strpos($log, "area: Forum - activity information"));
+        $this->assertNotFalse(strpos($log, "area: Forum - posts\n  Processed 3 "));
+
+        // Set time limit and retry index of whole course, taking 3 tries to complete it.
+        $search->get_engine()->set_add_delay(0.4);
+        $result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+        $this->assertFalse($result->complete);
+        $this->assertNotFalse(strpos($log, "area: Forum - activity information\n  Processed 2 "));
+
+        $result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer,
+                $result->startfromarea, $result->startfromtime);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+        $this->assertNotFalse(strpos($log, "area: Forum - activity information\n  Processed 2 "));
+        $this->assertNotFalse(strpos($log, "area: Forum - posts\n  Processed 2 "));
+        $this->assertFalse($result->complete);
+
+        $result = $search->index_context(\context_course::instance($course->id), '', 1, $buffer,
+                $result->startfromarea, $result->startfromtime);
+        $log = $buffer->get_buffer();
+        $buffer->reset_buffer();
+        $this->assertNotFalse(strpos($log, "area: Forum - posts\n  Processed 2 "));
+        $this->assertTrue($result->complete);
+    }
+
     /**
      * Adding this test here as get_areas_user_accesses process is the same, results just depend on the context level.
      *