MDL-40633 filter_urltolink breaking some image links.
authorTim Hunt <T.J.Hunt@open.ac.uk>
Fri, 12 Jul 2013 10:41:12 +0000 (11:41 +0100)
committerTim Hunt <T.J.Hunt@open.ac.uk>
Fri, 12 Jul 2013 12:06:09 +0000 (13:06 +0100)
This was a regression caused by MDL-22390. HTML like
<img src="http://www.example.com/logo.gif" />
was being broken.

filter/urltolink/filter.php
filter/urltolink/tests/filter_test.php

index 9d01e8d..290e7ed 100644 (file)
@@ -123,6 +123,7 @@ class filter_urltolink extends moodle_text_filter {
         }
 
         // TODO MDL-21296 - use of unicode modifiers may cause a timeout
+        $urlstart = '(?:http(s)?://|(?<!://)(www\.))';
         $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
         $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
         $port = '(?::\d*)';
@@ -131,7 +132,7 @@ class filter_urltolink extends moodle_text_filter {
         $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
         $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
 
-        $regex = "(?<!=[\"'])(?:http(s)?://|(www\.))((?:$domainsegment\.)+$domainsegment|$numericip)" .
+        $regex = "(?<!=[\"'])$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
                 "($port?$path$querystring?$fragment?)(?<![]),.;])";
         if ($unicoderegexp) {
             $regex = '#' . $regex . '#ui';
index 70fb3f1..e448d26 100644 (file)
@@ -137,11 +137,13 @@ class filter_urltolink_testcase extends basic_testcase {
             '<br />This is some text. www.moodle.com then some more text<br />' => '<br />This is some text. <a href="http://www.moodle.com" class="_blanktarget">www.moodle.com</a> then some more text<br />',
             //check we aren't modifying img tags
             'image<img src="http://moodle.org/logo/logo-240x60.gif" />' => 'image<img src="http://moodle.org/logo/logo-240x60.gif" />',
-            'image<img src="www.moodle.org/logo/logo-240x60.gif" />' => 'image<img src="www.moodle.org/logo/logo-240x60.gif" />',
+            'image<img src="www.moodle.org/logo/logo-240x60.gif" />'    => 'image<img src="www.moodle.org/logo/logo-240x60.gif" />',
+            'image<img src="http://www.example.com/logo.gif" />'        => 'image<img src="http://www.example.com/logo.gif" />',
             //and another url within one tag
             '<td background="http://moodle.org">&nbsp;</td>' => '<td background="http://moodle.org">&nbsp;</td>',
             '<td background="www.moodle.org">&nbsp;</td>' => '<td background="www.moodle.org">&nbsp;</td>',
             '<form name="input" action="http://moodle.org/submit.asp" method="get">'=>'<form name="input" action="http://moodle.org/submit.asp" method="get">',
+            '<td background="https://www.moodle.org">&nbsp;</td>' => '<td background="https://www.moodle.org">&nbsp;</td>',
             //partially escaped img tag
             'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />' => 'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />',
             //fully escaped img tag. Commented out as part of MDL-21183