Merge branch 'MDL-53393-master' of git://github.com/cameron1729/moodle
authorEloy Lafuente (stronk7) <stronk7@moodle.org>
Tue, 22 Mar 2016 02:16:51 +0000 (03:16 +0100)
committerEloy Lafuente (stronk7) <stronk7@moodle.org>
Tue, 22 Mar 2016 02:16:51 +0000 (03:16 +0100)
lib/html2text/Html2Text.php
lib/tests/fixtures/messageinbound/evolution.test
lib/tests/fixtures/messageinbound/outlook.test
lib/tests/html2text_test.php
lib/tests/weblib_test.php
lib/thirdpartylibs.xml
mod/quiz/tests/locallib_test.php
question/type/essay/tests/upgradelibnewqe_test.php

index 0654e2c..0ed5e40 100644 (file)
@@ -23,6 +23,8 @@ class Html2Text
 {
     const ENCODING = 'UTF-8';
 
+    protected $htmlFuncFlags;
+
     /**
      * Contains the HTML content to convert.
      *
@@ -47,27 +49,25 @@ class Html2Text
     protected $search = array(
         "/\r/",                                           // Non-legal carriage return
         "/[\n\t]+/",                                      // Newlines and tabs
-        '/<head[^>]*>.*?<\/head>/i',                      // <head>
-        '/<script[^>]*>.*?<\/script>/i',                  // <script>s -- which strip_tags supposedly has problems with
-        '/<style[^>]*>.*?<\/style>/i',                    // <style>s -- which strip_tags supposedly has problems with
-        '/<p[^>]*>/i',                                    // <P>
-        '/<br[^>]*>/i',                                   // <br>
-        '/<i[^>]*>(.*?)<\/i>/i',                          // <i>
-        '/<em[^>]*>(.*?)<\/em>/i',                        // <em>
-        '/(<ul[^>]*>|<\/ul>)/i',                          // <ul> and </ul>
-        '/(<ol[^>]*>|<\/ol>)/i',                          // <ol> and </ol>
-        '/(<dl[^>]*>|<\/dl>)/i',                          // <dl> and </dl>
-        '/<li[^>]*>(.*?)<\/li>/i',                        // <li> and </li>
-        '/<dd[^>]*>(.*?)<\/dd>/i',                        // <dd> and </dd>
-        '/<dt[^>]*>(.*?)<\/dt>/i',                        // <dt> and </dt>
-        '/<li[^>]*>/i',                                   // <li>
-        '/<hr[^>]*>/i',                                   // <hr>
-        '/<div[^>]*>/i',                                  // <div>
-        '/(<table[^>]*>|<\/table>)/i',                    // <table> and </table>
-        '/(<tr[^>]*>|<\/tr>)/i',                          // <tr> and </tr>
-        '/<td[^>]*>(.*?)<\/td>/i',                        // <td> and </td>
+        '/<head\b[^>]*>.*?<\/head>/i',                    // <head>
+        '/<script\b[^>]*>.*?<\/script>/i',                // <script>s -- which strip_tags supposedly has problems with
+        '/<style\b[^>]*>.*?<\/style>/i',                  // <style>s -- which strip_tags supposedly has problems with
+        '/<i\b[^>]*>(.*?)<\/i>/i',                        // <i>
+        '/<em\b[^>]*>(.*?)<\/em>/i',                      // <em>
+        '/(<ul\b[^>]*>|<\/ul>)/i',                        // <ul> and </ul>
+        '/(<ol\b[^>]*>|<\/ol>)/i',                        // <ol> and </ol>
+        '/(<dl\b[^>]*>|<\/dl>)/i',                        // <dl> and </dl>
+        '/<li\b[^>]*>(.*?)<\/li>/i',                      // <li> and </li>
+        '/<dd\b[^>]*>(.*?)<\/dd>/i',                      // <dd> and </dd>
+        '/<dt\b[^>]*>(.*?)<\/dt>/i',                      // <dt> and </dt>
+        '/<li\b[^>]*>/i',                                 // <li>
+        '/<hr\b[^>]*>/i',                                 // <hr>
+        '/<div\b[^>]*>/i',                                // <div>
+        '/(<table\b[^>]*>|<\/table>)/i',                  // <table> and </table>
+        '/(<tr\b[^>]*>|<\/tr>)/i',                        // <tr> and </tr>
+        '/<td\b[^>]*>(.*?)<\/td>/i',                      // <td> and </td>
         '/<span class="_html2text_ignore">.+?<\/span>/i', // <span class="_html2text_ignore">...</span>
-        '/<(img)[^>]*alt=\"([^>"]+)\"[^>]*>/i',           // <img> with alt tag
+        '/<(img)\b[^>]*alt=\"([^>"]+)\"[^>]*>/i',         // <img> with alt tag
     );
 
     /**
@@ -82,8 +82,6 @@ class Html2Text
         '',                              // <head>
         '',                              // <script>s -- which strip_tags supposedly has problems with
         '',                              // <style>s -- which strip_tags supposedly has problems with
-        "\n\n",                          // <P>
-        "\n",                            // <br>
         '_\\1_',                         // <i>
         '_\\1_',                         // <em>
         "\n\n",                          // <ul> and </ul>
@@ -137,6 +135,8 @@ class Html2Text
      */
     protected $callbackSearch = array(
         '/<(h)[123456]( [^>]*)?>(.*?)<\/h[123456]>/i',           // h1 - h6
+        '/[ ]*<(p)( [^>]*)?>(.*?)<\/p>[ ]*/si',                  // <p> with surrounding whitespace.
+        '/<(br)[^>]*>[ ]*/i',                                    // <br> with leading whitespace after the newline.
         '/<(b)( [^>]*)?>(.*?)<\/b>/i',                           // <b>
         '/<(strong)( [^>]*)?>(.*?)<\/strong>/i',                 // <strong>
         '/<(th)( [^>]*)?>(.*?)<\/th>/i',                         // <th> and </th>
@@ -212,6 +212,7 @@ class Html2Text
                                 // 'inline' (show links inline)
                                 // 'nextline' (show links on the next line)
                                 // 'table' (if a table of link URLs should be listed after the text.
+                                // 'bbcode' (show links as bbcode)
 
         'width' => 70,          //  Maximum width of the formatted text, in columns.
                                 //  Set this value to 0 (or less) to ignore word wrapping
@@ -237,6 +238,9 @@ class Html2Text
 
         $this->html = $html;
         $this->options = array_merge($this->options, $options);
+        $this->htmlFuncFlags = (PHP_VERSION_ID < 50400)
+            ? ENT_COMPAT
+            : ENT_COMPAT | ENT_HTML5;
     }
 
     /**
@@ -319,6 +323,16 @@ class Html2Text
     }
 
     protected function convert()
+    {
+       $origEncoding = mb_internal_encoding();
+       mb_internal_encoding(self::ENCODING);
+
+       $this->doConvert();
+
+       mb_internal_encoding($origEncoding);
+    }
+
+    protected function doConvert()
     {
         $this->linkList = array();
 
@@ -346,7 +360,7 @@ class Html2Text
         $text = preg_replace_callback($this->callbackSearch, array($this, 'pregCallback'), $text);
         $text = strip_tags($text);
         $text = preg_replace($this->entSearch, $this->entReplace, $text);
-        $text = html_entity_decode($text, ENT_QUOTES, self::ENCODING);
+        $text = html_entity_decode($text, $this->htmlFuncFlags, self::ENCODING);
 
         // Remove unknown/unhandled entities (this cannot be done in search-and-replace block)
         $text = preg_replace('/&([a-zA-Z0-9]{2,6}|#[0-9]{2,4});/', '', $text);
@@ -396,7 +410,7 @@ class Html2Text
             $url = $link;
         } else {
             $url = $this->baseurl;
-            if (substr($link, 0, 1) != '/') {
+            if (mb_substr($link, 0, 1) != '/') {
                 $url .= '/';
             }
             $url .= $link;
@@ -411,6 +425,8 @@ class Html2Text
             return $display . ' [' . ($index + 1) . ']';
         } elseif ($linkMethod == 'nextline') {
             return $display . "\n[" . $url . ']';
+        } elseif ($linkMethod == 'bbcode') {
+            return sprintf('[url=%s]%s[/url]', $url, $display);
         } else { // link_method defaults to inline
             return $display . ' [' . $url . ']';
         }
@@ -420,7 +436,8 @@ class Html2Text
     {
         // get the content of PRE element
         while (preg_match('/<pre[^>]*>(.*)<\/pre>/ismU', $text, $matches)) {
-            $this->preContent = $matches[1];
+            // Replace br tags with newlines to prevent the search-and-replace callback from killing whitespace
+            $this->preContent = preg_replace('/(<br\b[^>]*>)/i', "\n", $matches[1]);
 
             // Run our defined tags search-and-replace with callback
             $this->preContent = preg_replace_callback(
@@ -456,11 +473,13 @@ class Html2Text
     protected function convertBlockquotes(&$text)
     {
         if (preg_match_all('/<\/*blockquote[^>]*>/i', $text, $matches, PREG_OFFSET_CAPTURE)) {
+            $originalText = $text;
             $start = 0;
             $taglen = 0;
             $level = 0;
             $diff = 0;
             foreach ($matches[0] as $m) {
+                $m[1] = mb_strlen(substr($originalText, 0, $m[1]));
                 if ($m[0][0] == '<' && $m[0][1] == '/') {
                     $level--;
                     if ($level < 0) {
@@ -471,7 +490,7 @@ class Html2Text
                         $end = $m[1];
                         $len = $end - $taglen - $start;
                         // Get blockquote content
-                        $body = substr($text, $start + $taglen - $diff, $len);
+                        $body = mb_substr($text, $start + $taglen - $diff, $len);
 
                         // Set text width
                         $pWidth = $this->options['width'];
@@ -481,20 +500,21 @@ class Html2Text
                         $this->converter($body);
                         // Add citation markers and create PRE block
                         $body = preg_replace('/((^|\n)>*)/', '\\1> ', trim($body));
-                        $body = '<pre>' . htmlspecialchars($body) . '</pre>';
+                        $body = '<pre>' . htmlspecialchars($body, $this->htmlFuncFlags, self::ENCODING) . '</pre>';
                         // Re-set text width
                         $this->options['width'] = $pWidth;
                         // Replace content
-                        $text = substr($text, 0, $start - $diff)
-                            . $body . substr($text, $end + strlen($m[0]) - $diff);
+                        $text = mb_substr($text, 0, $start - $diff)
+                            . $body
+                            . mb_substr($text, $end + mb_strlen($m[0]) - $diff);
 
-                        $diff = $len + $taglen + strlen($m[0]) - strlen($body);
+                        $diff += $len + $taglen + mb_strlen($m[0]) - mb_strlen($body);
                         unset($body);
                     }
                 } else {
                     if ($level == 0) {
                         $start = $m[1];
-                        $taglen = strlen($m[0]);
+                        $taglen = mb_strlen($m[0]);
                     }
                     $level++;
                 }
@@ -510,7 +530,18 @@ class Html2Text
      */
     protected function pregCallback($matches)
     {
-        switch (strtolower($matches[1])) {
+        switch (mb_strtolower($matches[1])) {
+            case 'p':
+                // Replace newlines with spaces.
+                $para = str_replace("\n", " ", $matches[3]);
+
+                // Trim trailing and leading whitespace within the tag.
+                $para = trim($para);
+
+                // Add trailing newlines for this para.
+                return "\n" . $para . "\n";
+            case 'br':
+                return "\n";
             case 'b':
             case 'strong':
                 return $this->toupper($matches[3]);
@@ -553,7 +584,7 @@ class Html2Text
     protected function toupper($str)
     {
         // string can contain HTML tags
-        $chunks = preg_split('/(<[^>]*>)/', $str, null, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
+        $chunks = preg_split('/(<[^>]*>)/', $str, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
 
         // convert toupper only the text between HTML tags
         foreach ($chunks as $i => $chunk) {
@@ -573,15 +604,9 @@ class Html2Text
      */
     protected function strtoupper($str)
     {
-        $str = html_entity_decode($str, ENT_COMPAT, self::ENCODING);
-
-        if (function_exists('mb_strtoupper')) {
-            $str = mb_strtoupper($str, self::ENCODING);
-        } else {
-            $str = strtoupper($str);
-        }
-
-        $str = htmlspecialchars($str, ENT_COMPAT, self::ENCODING);
+        $str = html_entity_decode($str, $this->htmlFuncFlags, self::ENCODING);
+        $str = mb_strtoupper($str);
+        $str = htmlspecialchars($str, $this->htmlFuncFlags, self::ENCODING);
 
         return $str;
     }
index 7b8e5ca..e321ffa 100644 (file)
@@ -11,7 +11,7 @@ positive."
 
 ----EXPECTEDHTML----
 An ion meets his atom friend on the street and says he's lost an
- electron. "Are you sure?" asks the atom. The ion replies, "I'm positive."
+electron. "Are you sure?" asks the atom. The ion replies, "I'm positive."
 
 ----FULLSOURCE----
 Message-ID: <1430198383.10608.0.camel@jean>
index aa24402..1fb726d 100644 (file)
@@ -18,7 +18,7 @@ Sending mail via clent and it seems to go all good...
 Havent tried this before and it is awesome....
 
 Cheers
- Rajesh
+Rajesh
 
 ----FULLSOURCE----
 Delivered-To: moodlehqtest+aaaaaaaaaaiaaaaaaaaabqaaaaaaaaazd63zvl6kcy04ioh+@example.com
index 4214eca..44c18b8 100644 (file)
@@ -133,8 +133,8 @@ have been fixed <strong><a href="http://third.url/view.php">last week</a></stron
      * Basic text formatting.
      */
     public function test_simple() {
-        $this->assertSame("_Hello_ WORLD!", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
-        $this->assertSame("All the WORLD’S a stage.\n\n-- William Shakespeare", html_to_text('<p>All the <strong>world’s</strong> a stage.</p><p>-- William Shakespeare</p>'));
+        $this->assertSame("_Hello_ WORLD!\n", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
+        $this->assertSame("All the WORLD’S a stage.\n\n-- William Shakespeare\n", html_to_text('<p>All the <strong>world’s</strong> a stage.</p><p>-- William Shakespeare</p>'));
         $this->assertSame("HELLO WORLD!\n\n", html_to_text('<h1>Hello world!</h1>'));
         $this->assertSame("Hello\nworld!", html_to_text('Hello<br />world!'));
     }
index 899b088..ba2d2be 100644 (file)
@@ -88,9 +88,9 @@ class core_weblib_testcase extends advanced_testcase {
     }
 
     public function test_format_text_email() {
-        $this->assertSame("This is a TEST",
+        $this->assertSame("This is a TEST\n",
             format_text_email('<p>This is a <strong>test</strong></p>', FORMAT_HTML));
-        $this->assertSame("This is a TEST",
+        $this->assertSame("This is a TEST\n",
             format_text_email('<p class="frogs">This is a <strong class=\'fishes\'>test</strong></p>', FORMAT_HTML));
         $this->assertSame('& so is this',
             format_text_email('&amp; so is this', FORMAT_HTML));
index 4377c9d..0886d4b 100644 (file)
     <location>html2text.php</location>
     <name>HTML2Text</name>
     <license>GPL</license>
-    <version>3.0.0</version>
+    <version>4.0.1</version>
     <licenseversion>2.0+</licenseversion>
   </library>
   <library>
index 6d97ab2..e169ee7 100644 (file)
@@ -152,7 +152,7 @@ class mod_quiz_locallib_testcase extends advanced_testcase {
 
         $summary = quiz_question_tostring($question);
         $this->assertEquals('<span class="questionname">The question name</span> ' .
-                '<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]</span>', $summary);
+                '<span class="questiontext">What sort of INEQUALITY is x &lt; y[?]' . "\n" . '</span>', $summary);
     }
 
     /**
index b76e7e9..90610ea 100644 (file)
@@ -218,7 +218,7 @@ class qtype_essay_attempt_upgrader_test extends question_attempt_upgrader_test_b
             'minfraction' => 0,
             'maxfraction' => 1,
             'flagged' => 0,
-            'questionsummary' => "Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_",
+            'questionsummary' => "Give two examples of facilities within XML schemas that cannot be found in Document Type Definitions (DTDs).\n_(2 marks)_",
             'rightanswer' => '',
             'responsesummary' => "Variable can be typed \n\nxml Schemas fully support Namespaces",
             'timemodified' => 1273068477,