MDL-40633 filter_urltolink breaking some image links.
[moodle.git] / filter / urltolink / tests / filter_test.php
CommitLineData
2a7ace11
PS
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Unit test for the filter_urltolink
19 *
20 * @package filter_urltolink
21 * @category phpunit
22 * @copyright 2010 David Mudrak <david@moodle.com>
23 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
24 */
25
26defined('MOODLE_INTERNAL') || die();
27
28global $CFG;
29require_once($CFG->dirroot . '/filter/urltolink/filter.php'); // Include the code to test
30
31
32class filter_urltolink_testcase extends basic_testcase {
33
34 /**
35 * Helper function that represents the legacy implementation
36 * of convert_urls_into_links()
37 */
38 protected function old_convert_urls_into_links(&$text) {
39 /// Make lone URLs into links. eg http://moodle.com/
40 $text = preg_replace("%([[:space:]]|^|\(|\[)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])%i",
41 '$1<a href="$2://$3$4" target="_blank">$2://$3$4</a>', $text);
42 /// eg www.moodle.com
43 $text = preg_replace("%([[:space:]]|^|\(|\[)www\.([^[:space:]]*)([[:alnum:]#?/&=])%i",
44 '$1<a href="http://www.$2$3" target="_blank">www.$2$3</a>', $text);
45 }
46
c2c6af94 47 function get_convert_urls_into_links_test_cases() {
2a7ace11
PS
48 $texts = array (
49 //just a url
50 'http://moodle.org - URL' => '<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> - URL',
51 'www.moodle.org - URL' => '<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> - URL',
52 //url with params
53 'URL: http://moodle.org/s/i=1&j=2' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>',
54 //url with escaped params
55 'URL: www.moodle.org/s/i=1&amp;j=2' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>',
56 //https url with params
57 'URL: https://moodle.org/s/i=1&j=2' => 'URL: <a href="https://moodle.org/s/i=1&j=2" class="_blanktarget">https://moodle.org/s/i=1&j=2</a>',
58 //url with port and params
59 'URL: http://moodle.org:8080/s/i=1' => 'URL: <a href="http://moodle.org:8080/s/i=1" class="_blanktarget">http://moodle.org:8080/s/i=1</a>',
c2c6af94
TH
60 // URL with complex fragment.
61 'Most voted issues: https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel' => 'Most voted issues: <a href="https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel" class="_blanktarget">https://tracker.moodle.org/browse/MDL#selectedTab=com.atlassian.jira.plugin.system.project%3Apopularissues-panel</a>',
62 // Domain with more parts
63 'URL: www.bbc.co.uk.' => 'URL: <a href="http://www.bbc.co.uk" class="_blanktarget">www.bbc.co.uk</a>.',
64 // URL in brackets.
2a7ace11
PS
65 '(http://moodle.org) - URL' => '(<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>) - URL',
66 '(www.moodle.org) - URL' => '(<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>) - URL',
c2c6af94
TH
67 // URL in brackets with a path.
68 '(http://example.com/index.html) - URL' => '(<a href="http://example.com/index.html" class="_blanktarget">http://example.com/index.html</a>) - URL',
69 '(www.example.com/index.html) - URL' => '(<a href="http://www.example.com/index.html" class="_blanktarget">www.example.com/index.html</a>) - URL',
70 // URL in brackets with anchor.
71 '(http://moodle.org/main#anchor) - URL' => '(<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>) - URL',
72 '(www.moodle.org/main#anchor) - URL' => '(<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>) - URL',
73 // URL in square brackets.
2a7ace11
PS
74 '[http://moodle.org] - URL' => '[<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>] - URL',
75 '[www.moodle.org] - URL' => '[<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>] - URL',
c2c6af94
TH
76 // URL in square brackets with a path.
77 '[http://example.com/index.html] - URL' => '[<a href="http://example.com/index.html" class="_blanktarget">http://example.com/index.html</a>] - URL',
78 '[www.example.com/index.html] - URL' => '[<a href="http://www.example.com/index.html" class="_blanktarget">www.example.com/index.html</a>] - URL',
79 // URL in square brackets with anchor.
2a7ace11
PS
80 '[http://moodle.org/main#anchor] - URL' => '[<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>] - URL',
81 '[www.moodle.org/main#anchor] - URL' => '[<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>] - URL',
82 //brackets within the url
83 'URL: http://cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://cc.org/url_(withpar)_go/?i=2" class="_blanktarget">http://cc.org/url_(withpar)_go/?i=2</a>',
84 'URL: www.cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(withpar)_go/?i=2" class="_blanktarget">www.cc.org/url_(withpar)_go/?i=2</a>',
85 'URL: http://cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">http://cc.org/url_(with)_(par)_go/?i=2</a>',
86 'URL: www.cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">www.cc.org/url_(with)_(par)_go/?i=2</a>',
c2c6af94
TH
87 // URL legitimately ending in a bracket. Commented out as part of MDL-22390. See next tests for work-arounds.
88 // 'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>',
2a7ace11
PS
89 'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29</a> - URL',
90 'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/(#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/(#Parentheses_.28_.29</a> - URL',
91 //escaped brackets in url
92 'http://en.wikipedia.org/wiki/Slash_%28punctuation%29'=>'<a href="http://en.wikipedia.org/wiki/Slash_%28punctuation%29" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_%28punctuation%29</a>',
93 //anchor tag
94 'URL: <a href="http://moodle.org">http://moodle.org</a>' => 'URL: <a href="http://moodle.org">http://moodle.org</a>',
95 'URL: <a href="http://moodle.org">www.moodle.org</a>' => 'URL: <a href="http://moodle.org">www.moodle.org</a>',
96 'URL: <a href="http://moodle.org"> http://moodle.org</a>' => 'URL: <a href="http://moodle.org"> http://moodle.org</a>',
97 'URL: <a href="http://moodle.org"> www.moodle.org</a>' => 'URL: <a href="http://moodle.org"> www.moodle.org</a>',
98 //escaped anchor tag. Commented out as part of MDL-21183
99 //htmlspecialchars('escaped anchor tag <a href="http://moodle.org">www.moodle.org</a>') => 'escaped anchor tag &lt;a href="http://moodle.org"&gt; www.moodle.org&lt;/a&gt;',
100 //trailing fullstop
101 'URL: http://moodle.org/s/i=1&j=2.' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>.',
102 'URL: www.moodle.org/s/i=1&amp;j=2.' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>.',
103 //trailing unmatched bracket
104 'URL: http://moodle.org)<br />' => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>)<br />',
105 //partially escaped html
106 'URL: <p>text www.moodle.org&lt;/p> text' => 'URL: <p>text <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>&lt;/p> text',
107 //decimal url parameter
108 'URL: www.moodle.org?u=1.23' => 'URL: <a href="http://www.moodle.org?u=1.23" class="_blanktarget">www.moodle.org?u=1.23</a>',
109 //escaped space in url
110 'URL: www.moodle.org?u=test+param&' => 'URL: <a href="http://www.moodle.org?u=test+param&" class="_blanktarget">www.moodle.org?u=test+param&</a>',
2a7ace11
PS
111 //multiple urls
112 'URL: http://moodle.org www.moodle.org'
113 => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>',
114 //containing anchor tags including a class parameter and a url to convert
115 'URL: <a href="http://moodle.org">http://moodle.org</a> www.moodle.org <a class="customclass" href="http://moodle.org">http://moodle.org</a>'
116 => 'URL: <a href="http://moodle.org">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> <a class="customclass" href="http://moodle.org">http://moodle.org</a>',
117 //subdomain
118 'http://subdomain.moodle.org - URL' => '<a href="http://subdomain.moodle.org" class="_blanktarget">http://subdomain.moodle.org</a> - URL',
119 //multiple subdomains
120 'http://subdomain.subdomain.moodle.org - URL' => '<a href="http://subdomain.subdomain.moodle.org" class="_blanktarget">http://subdomain.subdomain.moodle.org</a> - URL',
121 //looks almost like a link but isnt
122 'This contains http, http:// and www but no actual links.'=>'This contains http, http:// and www but no actual links.',
123 //no link at all
124 'This is a story about moodle.coming to a cinema near you.'=>'This is a story about moodle.coming to a cinema near you.',
125 //URLs containing utf 8 characters
126 'http://Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">http://Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
127 'www.Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://www.Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">www.Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
128 //text containing utf 8 characters outside of a url
129 'Iñtërnâtiônàlizætiøn is important to http://moodle.org'=>'Iñtërnâtiônàlizætiøn is important to <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>',
130 //too hard to identify without additional regexs
131 'moodle.org' => 'moodle.org',
132 //some text with no link between related html tags
133 '<b>no link here</b>' => '<b>no link here</b>',
134 //some text with a link between related html tags
135 '<b>a link here www.moodle.org</b>' => '<b>a link here <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a></b>',
136 //some text containing a link within unrelated tags
137 '<br />This is some text. www.moodle.com then some more text<br />' => '<br />This is some text. <a href="http://www.moodle.com" class="_blanktarget">www.moodle.com</a> then some more text<br />',
138 //check we aren't modifying img tags
139 'image<img src="http://moodle.org/logo/logo-240x60.gif" />' => 'image<img src="http://moodle.org/logo/logo-240x60.gif" />',
e93cdb69
TH
140 'image<img src="www.moodle.org/logo/logo-240x60.gif" />' => 'image<img src="www.moodle.org/logo/logo-240x60.gif" />',
141 'image<img src="http://www.example.com/logo.gif" />' => 'image<img src="http://www.example.com/logo.gif" />',
2a7ace11
PS
142 //and another url within one tag
143 '<td background="http://moodle.org">&nbsp;</td>' => '<td background="http://moodle.org">&nbsp;</td>',
144 '<td background="www.moodle.org">&nbsp;</td>' => '<td background="www.moodle.org">&nbsp;</td>',
145 '<form name="input" action="http://moodle.org/submit.asp" method="get">'=>'<form name="input" action="http://moodle.org/submit.asp" method="get">',
e93cdb69 146 '<td background="https://www.moodle.org">&nbsp;</td>' => '<td background="https://www.moodle.org">&nbsp;</td>',
2a7ace11
PS
147 //partially escaped img tag
148 'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />' => 'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />',
149 //fully escaped img tag. Commented out as part of MDL-21183
150 //htmlspecialchars('fully escaped img tag <img src="http://moodle.org/logo/logo-240x60.gif" />') => 'fully escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" /&gt;',
151 //Double http with www
152 'One more link like http://www.moodle.org to test' => 'One more link like <a href="http://www.moodle.org" class="_blanktarget">http://www.moodle.org</a> to test',
153 //Encoded URLs in the path
154 'URL: http://127.0.0.1/one%28parenthesis%29/path?param=value' => 'URL: <a href="http://127.0.0.1/one%28parenthesis%29/path?param=value" class="_blanktarget">http://127.0.0.1/one%28parenthesis%29/path?param=value</a>',
155 'URL: www.localhost.com/one%28parenthesis%29/path?param=value' => 'URL: <a href="http://www.localhost.com/one%28parenthesis%29/path?param=value" class="_blanktarget">www.localhost.com/one%28parenthesis%29/path?param=value</a>',
156 //Encoded URLs in the query
157 'URL: http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1' => 'URL: <a href="http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1" class="_blanktarget">http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1</a>',
158 'URL: www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1' => 'URL: <a href="http://www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1" class="_blanktarget">www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1</a>',
159 //URLs in Javascript. Commented out as part of MDL-21183
160 //'var url="http://moodle.org";'=>'var url="http://moodle.org";',
161 //'var url = "http://moodle.org";'=>'var url = "http://moodle.org";',
162 //'var url="www.moodle.org";'=>'var url="www.moodle.org";',
163 //'var url = "www.moodle.org";'=>'var url = "www.moodle.org";',
164 //doctype. do we care about this failing?
165 //'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">'=>'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">'
166 );
167
c2c6af94 168 $data = array();
2a7ace11 169 foreach ($texts as $text => $correctresult) {
c2c6af94
TH
170 $data[] = array($text, $correctresult);
171 }
172 return $data;
173 }
2a7ace11 174
c2c6af94
TH
175 /**
176 * @dataProvider get_convert_urls_into_links_test_cases
177 */
178 function test_convert_urls_into_links($text, $correctresult) {
179 $testablefilter = new testable_filter_urltolink();
180 $testablefilter->convert_urls_into_links($text);
181 $this->assertEquals($correctresult, $text);
182 }
2a7ace11 183
c2c6af94
TH
184 function test_convert_urls_into_links_performance() {
185 $testablefilter = new testable_filter_urltolink();
2a7ace11 186
2a7ace11
PS
187 $reps = 1000;
188 $text = file_get_contents(__DIR__ . '/fixtures/sample.txt');
189 $time_start = microtime(true);
190 for($i=0;$i<$reps;$i++) {
191 $testablefilter->convert_urls_into_links($text);
192 }
193 $time_end = microtime(true);
194 $new_time = $time_end - $time_start;
195
196 $time_start = microtime(true);
197 for($i=0;$i<$reps;$i++) {
198 $this->old_convert_urls_into_links($text);
199 }
200 $time_end = microtime(true);
201 $old_time = $time_end - $time_start;
202
203 $fast_enough = false;
204 if( $new_time < $old_time ) {
205 $fast_enough = true;
206 }
207
208 $this->assertEquals($fast_enough, true, 'Timing test: ' . $new_time . 'secs (new) < ' . $old_time . 'secs (old)');
209 }
210}
211
212
213/**
214 * Test subclass that makes all the protected methods we want to test public.
215 */
216class testable_filter_urltolink extends filter_urltolink {
217 public function __construct() {
218 }
219 public function convert_urls_into_links(&$text) {
220 parent::convert_urls_into_links($text);
221 }
222}