Merge branch 'MDL-53516-master' of git://github.com/merrill-oakland/moodle
authorDan Poltawski <dan@moodle.com>
Tue, 29 Mar 2016 08:35:44 +0000 (16:35 +0800)
committerDan Poltawski <dan@moodle.com>
Tue, 29 Mar 2016 08:35:44 +0000 (16:35 +0800)
1  2 
search/engine/solr/classes/document.php
search/engine/solr/classes/engine.php
search/engine/solr/tests/engine_test.php

@@@ -72,41 -115,45 +115,80 @@@ class document extends \core_search\doc
       * @return int
       */
      protected function get_text_format() {
 -        return FORMAT_MARKDOWN;
 +        return FORMAT_HTML;
 +    }
 +
 +    /**
 +     * Formats a text string coming from the search engine.
 +     *
 +     * @param  string $text Text to format
 +     * @return string HTML text to be renderer
 +     */
 +    protected function format_text($text) {
 +        // Since we allow output for highlighting, we need to encode html entities.
 +        // This ensures plaintext html chars don't become valid html.
 +        $out = s($text);
 +
 +        $startcount = 0;
 +        $endcount = 0;
 +
 +        // Remove end/start pairs that span a few common seperation characters. Allows us to highlight phrases instead of words.
 +        $regex = '|'.engine::HIGHLIGHT_END.'([ .,-]{0,3})'.engine::HIGHLIGHT_START.'|';
 +        $out = preg_replace($regex, '$1', $out);
 +
 +        // Now replace our start and end highlight markers.
 +        $out = str_replace(engine::HIGHLIGHT_START, '<span class="highlight">', $out, $startcount);
 +        $out = str_replace(engine::HIGHLIGHT_END, '</span>', $out, $endcount);
 +
 +        // This makes sure any highlight tags are balanced, incase truncation or the highlight text contained our markers.
 +        while ($startcount > $endcount) {
 +            $out .= '</span>';
 +            $endcount++;
 +        }
 +        while ($startcount < $endcount) {
 +            $out = '<span class="highlight">' . $out;
 +            $endcount++;
 +        }
 +
 +        return parent::format_text($out);
      }
+     /**
+      * Apply any defaults to unset fields before export. Called after document building, but before export.
+      *
+      * Sub-classes of this should make sure to call parent::apply_defaults().
+      */
+     protected function apply_defaults() {
+         parent::apply_defaults();
+         // We want to set the solr_filegroupingid to id if it isn't set.
+         if (!isset($this->data['solr_filegroupingid'])) {
+             $this->data['solr_filegroupingid'] = $this->data['id'];
+         }
+     }
+     /**
+      * Export the data for the given file in relation to this document.
+      *
+      * @param \stored_file $file The stored file we are talking about.
+      * @return array
+      */
+     public function export_file_for_engine($file) {
+         $data = $this->export_for_engine();
+         // Content is index in the main document.
+         unset($data['content']);
+         unset($data['description1']);
+         unset($data['description2']);
+         // Going to append the fileid to give it a unique id.
+         $data['id'] = $data['id'].'-solrfile'.$file->get_id();
+         $data['type'] = \core_search\manager::TYPE_FILE;
+         $data['solr_fileid'] = $file->get_id();
+         $data['solr_filecontenthash'] = $file->get_contenthash();
+         $data['solr_fileindexedcontent'] = self::INDEXED_FILE_TRUE;
+         $data['title'] = $file->get_filename();
+         return $data;
+     }
  }
Simple merge
@@@ -276,26 -285,221 +285,244 @@@ class search_solr_engine_testcase exten
          $this->assertEquals($originalid, $results[0]->get('id'));
      }
  
 +    public function test_highlight() {
 +        global $PAGE;
 +
 +        $this->search->index();
 +
 +        $querydata = new stdClass();
 +        $querydata->q = 'message';
 +
 +        $results = $this->search->search($querydata);
 +        $this->assertCount(2, $results);
 +
 +        $result = reset($results);
 +
 +        $regex = '|'.\search_solr\engine::HIGHLIGHT_START.'message'.\search_solr\engine::HIGHLIGHT_END.'|';
 +        $this->assertRegExp($regex, $result->get('content'));
 +
 +        $searchrenderer = $PAGE->get_renderer('core_search');
 +        $exported = $result->export_for_template($searchrenderer);
 +
 +        $regex = '|<span class="highlight">message</span>|';
 +        $this->assertRegExp($regex, $exported['content']);
 +    }
++
+     public function test_index_file() {
+         if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
+             $this->markTestSkipped('Solr file indexing not enabled.');
+             return;
+         }
+         // Very simple test.
+         $this->search->index();
+         $querydata = new stdClass();
+         $querydata->q = '"File contents"';
+         $this->assertCount(2, $this->search->search($querydata));
+     }
+     public function test_reindexing_files() {
+         if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
+             $this->markTestSkipped('Solr file indexing not enabled.');
+             return;
+         }
+         // Get engine and area to work with.
+         $engine = $this->search->get_engine();
+         $areaid = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities');
+         $area = \core_search\manager::get_search_area($areaid);
+         // Get a single record to make a doc from.
+         $recordset = $area->get_recordset_by_timestamp(0);
+         $record = $recordset->current();
+         $recordset->close();
+         $doc = $area->get_document($record);
+         // Now we are going to make some files.
+         $fs = get_file_storage();
+         $syscontext = \context_system::instance();
+         $files = array();
+         $filerecord = array(
+             'contextid' => $syscontext->id,
+             'component' => 'core',
+             'filearea'  => 'unittest',
+             'itemid'    => 0,
+             'filepath'  => '/',
+         );
+         // We make enough so that we pass the 500 files threashold. That is the boundary when getting files.
+         $boundary = 500;
+         $top = (int)($boundary * 1.1);
+         for ($i = 0; $i < $top; $i++) {
+             $filerecord['filename']  = 'searchfile'.$i;
+             $file = $fs->create_file_from_string($filerecord, 'Some FileContents'.$i);
+             $doc->add_stored_file($file);
+             $files[] = $file;
+         }
+         // Add the doc with lots of files, then commit.
+         $engine->add_document($doc, true);
+         $engine->area_index_complete($area->get_area_id());
+         // Indexes we are going to check. 0 means we will delete, 1 means we will keep.
+         $checkfiles = array(
+             0 => 0,                        // Check the begining of the set.
+             1 => 1,
+             2 => 0,
+             ($top - 3) => 0,               // Check the end of the set.
+             ($top - 2) => 1,
+             ($top - 1) => 0,
+             ($boundary - 2) => 0,          // Check at the boundary between fetch groups.
+             ($boundary - 1) => 0,
+             $boundary => 0,
+             ($boundary + 1) => 0,
+             ((int)($boundary * 0.5)) => 1, // Make sure we keep some middle ones.
+             ((int)($boundary * 1.05)) => 1
+         );
+         $querydata = new stdClass();
+         // First, check that all the files are currently there.
+         foreach ($checkfiles as $key => $unused) {
+             $querydata->q = 'FileContents'.$key;
+             $this->assertCount(1, $this->search->search($querydata));
+             $querydata->q = 'searchfile'.$key;
+             $this->assertCount(1, $this->search->search($querydata));
+         }
+         // Remove the files we want removed from the files array.
+         foreach ($checkfiles as $key => $keep) {
+             if (!$keep) {
+                 unset($files[$key]);
+             }
+         }
+         // And make us a new file to add.
+         $filerecord['filename']  = 'searchfileNew';
+         $files[] = $fs->create_file_from_string($filerecord, 'Some FileContentsNew');
+         $checkfiles['New'] = 1;
+         $doc = $area->get_document($record);
+         foreach($files as $file) {
+             $doc->add_stored_file($file);
+         }
+         // Reindex the document with the changed files.
+         $engine->add_document($doc, true);
+         $engine->area_index_complete($area->get_area_id());
+         cache_helper::purge_by_definition('core', 'search_results');
+         // Go through our check array, and see if the file is there or not.
+         foreach ($checkfiles as $key => $keep) {
+             $querydata->q = 'FileContents'.$key;
+             $this->assertCount($keep, $this->search->search($querydata));
+             $querydata->q = 'searchfile'.$key;
+             $this->assertCount($keep, $this->search->search($querydata));
+         }
+         // Now check that we get one result when we search from something in all of them.
+         $querydata->q = 'Some';
+         $this->assertCount(1, $this->search->search($querydata));
+     }
+     public function test_index_filtered_file() {
+         if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
+             $this->markTestSkipped('Solr file indexing not enabled.');
+             return;
+         }
+         // Get engine and area to work with.
+         $engine = $this->search->get_engine();
+         $areaid = \core_search\manager::generate_areaid('core_mocksearch', 'role_capabilities');
+         $area = \core_search\manager::get_search_area($areaid);
+         // Get a single record to make a doc from.
+         $recordset = $area->get_recordset_by_timestamp(0);
+         $record = $recordset->current();
+         $recordset->close();
+         $doc = $area->get_document($record);
+         // Now we are going to make some files.
+         $fs = get_file_storage();
+         $syscontext = \context_system::instance();
+         $files = array();
+         $filerecord = array(
+             'contextid' => $syscontext->id,
+             'component' => 'core',
+             'filearea'  => 'unittest',
+             'itemid'    => 0,
+             'filepath'  => '/',
+             'filename'  => 'largefile'
+         );
+         // We need to make a file greater than 1kB in size, which is the lowest filter size.
+         $contents = 'Some LargeFindContent to find.';
+         for ($i = 0; $i < 200; $i++) {
+             $contents .= ' The quick brown fox jumps over the lazy dog.';
+         }
+         $this->assertGreaterThan(1024, strlen($contents));
+         $file = $fs->create_file_from_string($filerecord, $contents);
+         $doc->add_stored_file($file);
+         $filerecord['filename'] = 'smallfile';
+         $file = $fs->create_file_from_string($filerecord, 'Some SmallFindContent to find.');
+         $doc->add_stored_file($file);
+         $engine->add_document($doc, true);
+         $engine->area_index_complete($area->get_area_id());
+         $querydata = new stdClass();
+         // We shouldn't be able to find the large file contents.
+         $querydata->q = 'LargeFindContent';
+         $this->assertCount(0, $this->search->search($querydata));
+         // But we should be able to find the filename.
+         $querydata->q = 'largefile';
+         $this->assertCount(1, $this->search->search($querydata));
+         // We should be able to find the small file contents.
+         $querydata->q = 'SmallFindContent';
+         $this->assertCount(1, $this->search->search($querydata));
+         // And we should be able to find the filename.
+         $querydata->q = 'smallfile';
+         $this->assertCount(1, $this->search->search($querydata));
+     }
+     public function test_delete_by_id() {
+         if (defined('TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING') && (TEST_SEARCH_SOLR_DISABLE_FILE_INDEXING == 1)) {
+             $this->markTestSkipped('Solr file indexing not enabled.');
+             return;
+         }
+         // First get files in the index.
+         $this->search->index();
+         $engine = $this->search->get_engine();
+         $querydata = new stdClass();
+         // Then search to make sure they are there.
+         $querydata->q = '"File contents"';
+         $results = $this->search->search($querydata);
+         $this->assertCount(2, $results);
+         $first = reset($results);
+         $deleteid = $first->get('id');
+         $engine->delete_by_id($deleteid);
+         cache_helper::purge_by_definition('core', 'search_results');
+         // Check that we don't get a result for it anymore.
+         $results = $this->search->search($querydata);
+         $this->assertCount(1, $results);
+         $result = reset($results);
+         $this->assertNotEquals($deleteid, $result->get('id'));
+     }
  }