MDL-23979 fixed non standard entities result
[moodle.git] / lib / simpletest / testweblib.php
1 <?php
2 /**
3  * Unit tests for (some of) ../weblib.php.
4  *
5  * @copyright &copy; 2006 The Open University
6  * @author T.J.Hunt@open.ac.uk
7  * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
8  * @package moodlecore
9  */
11 if (!defined('MOODLE_INTERNAL')) {
12     die('Direct access to this script is forbidden.');    ///  It must be included from a Moodle page
13 }
15 class web_test extends UnitTestCase {
17     public static $includecoverage = array('lib/weblib.php');
19     function setUp() {
20     }
22     function tearDown() {
23     }
25     function test_format_string() {
26         // Ampersands
27         $this->assertEqual(format_string("& &&&&& &&"), "&amp; &amp;&amp;&amp;&amp;&amp; &amp;&amp;");
28         $this->assertEqual(format_string("ANother & &&&&& Category"), "ANother &amp; &amp;&amp;&amp;&amp;&amp; Category");
29         $this->assertEqual(format_string("ANother & &&&&& Category", true), "ANother &amp; &amp;&amp;&amp;&amp;&amp; Category");
30         $this->assertEqual(format_string("Nick's Test Site & Other things", true), "Nick's Test Site &amp; Other things");
32         // String entities
33         $this->assertEqual(format_string("&quot;"), "&quot;");
35         // Digital entities
36         $this->assertEqual(format_string("&11234;"), "&11234;");
38         // Unicode entities
39         $this->assertEqual(format_string("&#4475;"), "&#4475;");
40     }
42     function test_s() {
43           $this->assertEqual(s("This Breaks \" Strict"), "This Breaks &quot; Strict");
44           $this->assertEqual(s("This Breaks <a>\" Strict</a>"), "This Breaks &lt;a&gt;&quot; Strict&lt;/a&gt;");
45     }
47     function test_format_text_email() {
48         $this->assertEqual("\n\nThis is a TEST",
49             format_text_email('<p>This is a <strong>test</strong></p>',FORMAT_HTML));
50         $this->assertEqual("\n\nThis is a TEST",
51             format_text_email('<p class="frogs">This is a <strong class=\'fishes\'>test</strong></p>',FORMAT_HTML));
52         $this->assertEqual('& so is this',
53             format_text_email('&amp; so is this',FORMAT_HTML));
54         $tl = textlib_get_instance();
55         $this->assertEqual('Two bullets: '.$tl->code2utf8(8226).' *',
56             format_text_email('Two bullets: &#x2022; &#8226;',FORMAT_HTML));
57         $this->assertEqual($tl->code2utf8(0x7fd2).$tl->code2utf8(0x7fd2),
58             format_text_email('&#x7fd2;&#x7FD2;',FORMAT_HTML));
59     }
61     function test_highlight() {
62         $this->assertEqual(highlight('good', 'This is good'), 'This is <span class="highlight">good</span>');
63         $this->assertEqual(highlight('SpaN', 'span'), '<span class="highlight">span</span>');
64         $this->assertEqual(highlight('span', 'SpaN'), '<span class="highlight">SpaN</span>');
65         $this->assertEqual(highlight('span', '<span>span</span>'), '<span><span class="highlight">span</span></span>');
66         $this->assertEqual(highlight('good is', 'He is good'), 'He <span class="highlight">is</span> <span class="highlight">good</span>');
67         $this->assertEqual(highlight('+good', 'This is good'), 'This is <span class="highlight">good</span>');
68         $this->assertEqual(highlight('-good', 'This is good'), 'This is good');
69         $this->assertEqual(highlight('+good', 'This is goodness'), 'This is goodness');
70         $this->assertEqual(highlight('good', 'This is goodness'), 'This is <span class="highlight">good</span>ness');
71     }
73     function test_replace_ampersands() {
74         $this->assertEqual(replace_ampersands_not_followed_by_entity("This & that &nbsp;"), "This &amp; that &nbsp;");
75         $this->assertEqual(replace_ampersands_not_followed_by_entity("This &nbsp that &nbsp;"), "This &amp;nbsp that &nbsp;");
76     }
78     function test_strip_links() {
79         $this->assertEqual(strip_links('this is a <a href="http://someaddress.com/query">link</a>'), 'this is a link');
80     }
82     function test_wikify_links() {
83         $this->assertEqual(wikify_links('this is a <a href="http://someaddress.com/query">link</a>'), 'this is a link [ http://someaddress.com/query ]');
84     }
86     function test_fix_non_standard_entities() {
87         $this->assertEqual(fix_non_standard_entities('&#x00A3&#0228'), '&#xA3;&#228;');
88         $this->assertEqual(fix_non_standard_entities('&#x00A3;&#0228;'), '&#xA3;&#228;');
89     }
91     function test_compare_url() {
92         $url1 = new moodle_url('index.php', array('var1' => 1, 'var2' => 2));
93         $url2 = new moodle_url('index2.php', array('var1' => 1, 'var2' => 2, 'var3' => 3));
95         $this->assertFalse($url1->compare($url2, URL_MATCH_BASE));
96         $this->assertFalse($url1->compare($url2, URL_MATCH_PARAMS));
97         $this->assertFalse($url1->compare($url2, URL_MATCH_EXACT));
99         $url2 = new moodle_url('index.php', array('var1' => 1, 'var3' => 3));
101         $this->assertTrue($url1->compare($url2, URL_MATCH_BASE));
102         $this->assertFalse($url1->compare($url2, URL_MATCH_PARAMS));
103         $this->assertFalse($url1->compare($url2, URL_MATCH_EXACT));
105         $url2 = new moodle_url('index.php', array('var1' => 1, 'var2' => 2, 'var3' => 3));
107         $this->assertTrue($url1->compare($url2, URL_MATCH_BASE));
108         $this->assertTrue($url1->compare($url2, URL_MATCH_PARAMS));
109         $this->assertFalse($url1->compare($url2, URL_MATCH_EXACT));
111         $url2 = new moodle_url('index.php', array('var2' => 2, 'var1' => 1));
113         $this->assertTrue($url1->compare($url2, URL_MATCH_BASE));
114         $this->assertTrue($url1->compare($url2, URL_MATCH_PARAMS));
115         $this->assertTrue($url1->compare($url2, URL_MATCH_EXACT));
116     }
118     function old_convert_urls_into_links(&$text) {
119         /// Make lone URLs into links.   eg http://moodle.com/
120         $text = preg_replace("%([[:space:]]|^|\(|\[)([[:alnum:]]+)://([^[:space:]]*)([[:alnum:]#?/&=])%i",
121                           '$1<a href="$2://$3$4" target="_blank">$2://$3$4</a>', $text);
122         /// eg www.moodle.com
123         $text = preg_replace("%([[:space:]]|^|\(|\[)www\.([^[:space:]]*)([[:alnum:]#?/&=])%i",
124                           '$1<a href="http://www.$2$3" target="_blank">www.$2$3</a>', $text);
125     }
127     function get_test_text(){
128         return <<<END
129 http://www.lipsum.com
130 Lorem Ipsum is simply dummy text of the printing and typesetting industry. Lorem Ipsum has been the industry's standard dummy text ever since the 1500s, when an unknown printer took a galley of type and scrambled it to make a type specimen book. It has survived not only five centuries, but also the leap into electronic typesetting, remaining essentially unchanged. It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages, and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.
131 Why do we use it?<a href="dummylink.htm">dummy</a>
133 It is a long established fact that a reader will be distracted by the readable content of a page when looking at its layout. The point of using Lorem Ipsum is that it has a more-or-less normal distribution of letters, as opposed to using 'Content here, content here', making it look like readable English. Many desktop publishing packages and web page editors now use Lorem Ipsum as their default model text, and a search for 'lorem ipsum' will uncover many web sites still in their infancy. Various versions have evolved over the years, sometimes by accident, sometimes on purpose (injected humour and the like).
135 Where does it come from?
137 Contrary to popular belief, Lorem Ipsum is not simply random text. It has roots in a piece of classical Latin literature from 45 BC, making it over 2000 years old. Richard McClintock, a Latin professor at Hampden-Sydney College in Virginia, looked up one of the more obscure Latin words, consectetur, from a Lorem Ipsum passage, and going through the cites of the word in classical literature, discovered the undoubtable source. Lorem Ipsum comes from sections 1.10.32 and 1.10.33 of "de Finibus Bonorum et Malorum" (The Extremes of Good and Evil) by Cicero, written in 45 BC. This book is a treatise on the theory of ethics, very popular during the Renaissance. The first line of Lorem Ipsum, "Lorem ipsum dolor sit amet..", comes from a line in section 1.10.32.
139 The standard chunk of Lorem Ipsum used since the 1500s is reproduced below for those interested. Sections 1.10.32 and 1.10.33 from "de Finibus Bonorum et Malorum" by Cicero are also reproduced in their exact original form, accompanied by English versions from the 1914 translation by H. Rackham.
140 Where can I get some?
142 There are many variations of passages of Lorem Ipsum available, but the majority have suffered alteration in some form, by injected humour, or randomised words which don't look even slightly believable. If you are going to use a passage of Lorem Ipsum, you need to be sure there isn't anything embarrassing hidden in the middle of text. All the Lorem Ipsum generators on the Internet tend to repeat predefined chunks as necessary, making this the first true generator on the Internet. It uses a dictionary of over 200 Latin words, combined with a handful of model sentence structures, to generate Lorem Ipsum which looks reasonable. The generated Lorem Ipsum is therefore always free from repetition, injected humour, or non-characteristic words etc.
143 <a href="http://en.wikipedia.org/wiki/Lorem_ipsum">Wikipedia</a>
144 http://www.lorem-ipsum.info/
145 END;
146     }
148     function test_convert_urls_into_links() {
149         $texts = array (
150                      //just a url
151                      'http://moodle.org - URL' => '<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> - URL',
152                      'www.moodle.org - URL' => '<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> - URL',
153                      //url with params
154                      'URL: http://moodle.org/s/i=1&j=2' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>',
155                      //url with escaped params
156                      'URL: www.moodle.org/s/i=1&amp;j=2' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>',
157                      //https url with params
158                      'URL: https://moodle.org/s/i=1&j=2' => 'URL: <a href="https://moodle.org/s/i=1&j=2" class="_blanktarget">https://moodle.org/s/i=1&j=2</a>',
159                      //url with port and params
160                      'URL: http://moodle.org:8080/s/i=1' => 'URL: <a href="http://moodle.org:8080/s/i=1" class="_blanktarget">http://moodle.org:8080/s/i=1</a>',
161                      //url in brackets
162                      '(http://moodle.org) - URL' => '(<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>) - URL',
163                      '(www.moodle.org) - URL' => '(<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>) - URL',
164                      //url in square brackets
165                      '[http://moodle.org] - URL' => '[<a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>] - URL',
166                      '[www.moodle.org] - URL' => '[<a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>] - URL',
167                      //url in brackets with anchor
168                      '[http://moodle.org/main#anchor] - URL' => '[<a href="http://moodle.org/main#anchor" class="_blanktarget">http://moodle.org/main#anchor</a>] - URL',
169                      '[www.moodle.org/main#anchor] - URL' => '[<a href="http://www.moodle.org/main#anchor" class="_blanktarget">www.moodle.org/main#anchor</a>] - URL',
170                      //brackets within the url
171                      'URL: http://cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://cc.org/url_(withpar)_go/?i=2" class="_blanktarget">http://cc.org/url_(withpar)_go/?i=2</a>',
172                      'URL: www.cc.org/url_(withpar)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(withpar)_go/?i=2" class="_blanktarget">www.cc.org/url_(withpar)_go/?i=2</a>',
173                      'URL: http://cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">http://cc.org/url_(with)_(par)_go/?i=2</a>',
174                      'URL: www.cc.org/url_(with)_(par)_go/?i=2' => 'URL: <a href="http://www.cc.org/url_(with)_(par)_go/?i=2" class="_blanktarget">www.cc.org/url_(with)_(par)_go/?i=2</a>',
175                      'http://en.wikipedia.org/wiki/Slash_(punctuation)'=>'<a href="http://en.wikipedia.org/wiki/Slash_(punctuation)" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_(punctuation)</a>',
176                      'http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/%28#Parentheses_.28_.29</a> - URL',
177                      'http://en.wikipedia.org/wiki/(#Parentheses_.28_.29 - URL' => '<a href="http://en.wikipedia.org/wiki/(#Parentheses_.28_.29" class="_blanktarget">http://en.wikipedia.org/wiki/(#Parentheses_.28_.29</a> - URL',
178                      //escaped brackets in url
179                      'http://en.wikipedia.org/wiki/Slash_%28punctuation%29'=>'<a href="http://en.wikipedia.org/wiki/Slash_%28punctuation%29" class="_blanktarget">http://en.wikipedia.org/wiki/Slash_%28punctuation%29</a>',
180                      //anchor tag
181                      'URL: <a href="http://moodle.org">http://moodle.org</a>' => 'URL: <a href="http://moodle.org">http://moodle.org</a>',
182                      'URL: <a href="http://moodle.org">www.moodle.org</a>' => 'URL: <a href="http://moodle.org">www.moodle.org</a>',
183                      'URL: <a href="http://moodle.org"> http://moodle.org</a>' => 'URL: <a href="http://moodle.org"> http://moodle.org</a>',
184                      'URL: <a href="http://moodle.org"> www.moodle.org</a>' => 'URL: <a href="http://moodle.org"> www.moodle.org</a>',
185                      //escaped anchor tag. Commented out as part of MDL-21183
186                      //htmlspecialchars('escaped anchor tag <a href="http://moodle.org">www.moodle.org</a>') => 'escaped anchor tag &lt;a href="http://moodle.org"&gt; www.moodle.org&lt;/a&gt;',
187                      //trailing fullstop
188                      'URL: http://moodle.org/s/i=1&j=2.' => 'URL: <a href="http://moodle.org/s/i=1&j=2" class="_blanktarget">http://moodle.org/s/i=1&j=2</a>.',
189                      'URL: www.moodle.org/s/i=1&amp;j=2.' => 'URL: <a href="http://www.moodle.org/s/i=1&amp;j=2" class="_blanktarget">www.moodle.org/s/i=1&amp;j=2</a>.',
190                      //trailing unmatched bracket
191                      'URL: http://moodle.org)<br />' => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>)<br />',
192                      //partially escaped html
193                      'URL: <p>text www.moodle.org&lt;/p> text' => 'URL: <p>text <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>&lt;/p> text',
194                      //decimal url parameter
195                      'URL: www.moodle.org?u=1.23' => 'URL: <a href="http://www.moodle.org?u=1.23" class="_blanktarget">www.moodle.org?u=1.23</a>',
196                      //escaped space in url
197                      'URL: www.moodle.org?u=test+param&' => 'URL: <a href="http://www.moodle.org?u=test+param&" class="_blanktarget">www.moodle.org?u=test+param&</a>',
198                      //odd characters in url param
199                      'URL: www.moodle.org?param=:)' => 'URL: <a href="http://www.moodle.org?param=:)" class="_blanktarget">www.moodle.org?param=:)</a>',
200                      //multiple urls
201                      'URL: http://moodle.org www.moodle.org'
202                      => 'URL: <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a>',
203                      //containing anchor tags including a class parameter and a url to convert
204                      'URL: <a href="http://moodle.org">http://moodle.org</a> www.moodle.org <a class="customclass" href="http://moodle.org">http://moodle.org</a>'
205                      => 'URL: <a href="http://moodle.org">http://moodle.org</a> <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a> <a class="customclass" href="http://moodle.org">http://moodle.org</a>',
206                      //subdomain
207                      'http://subdomain.moodle.org - URL' => '<a href="http://subdomain.moodle.org" class="_blanktarget">http://subdomain.moodle.org</a> - URL',
208                      //multiple subdomains
209                      'http://subdomain.subdomain.moodle.org - URL' => '<a href="http://subdomain.subdomain.moodle.org" class="_blanktarget">http://subdomain.subdomain.moodle.org</a> - URL',
210                      //looks almost like a link but isnt
211                      'This contains http, http:// and www but no actual links.'=>'This contains http, http:// and www but no actual links.',
212                      //no link at all
213                      'This is a story about moodle.coming to a cinema near you.'=>'This is a story about moodle.coming to a cinema near you.',
214                      //URLs containing utf 8 characters
215                      'http://Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">http://Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
216                      'www.Iñtërnâtiônàlizætiøn.com?ô=nëø'=>'<a href="http://www.Iñtërnâtiônàlizætiøn.com?ô=nëø" class="_blanktarget">www.Iñtërnâtiônàlizætiøn.com?ô=nëø</a>',
217                      //text containing utf 8 characters outside of a url
218                      'Iñtërnâtiônàlizætiøn is important to http://moodle.org'=>'Iñtërnâtiônàlizætiøn is important to <a href="http://moodle.org" class="_blanktarget">http://moodle.org</a>',
219                      //too hard to identify without additional regexs
220                      'moodle.org' => 'moodle.org',
221                      //some text with no link between related html tags
222                      '<b>no link here</b>' => '<b>no link here</b>',
223                      //some text with a link between related html tags
224                      '<b>a link here www.moodle.org</b>' => '<b>a link here <a href="http://www.moodle.org" class="_blanktarget">www.moodle.org</a></b>',
225                      //some text containing a link within unrelated tags
226                      '<br />This is some text. www.moodle.com then some more text<br />' => '<br />This is some text. <a href="http://www.moodle.com" class="_blanktarget">www.moodle.com</a> then some more text<br />',
227                      //check we aren't modifying img tags
228                      'image<img src="http://moodle.org/logo/logo-240x60.gif" />' => 'image<img src="http://moodle.org/logo/logo-240x60.gif" />',
229                      'image<img src="www.moodle.org/logo/logo-240x60.gif" />' => 'image<img src="www.moodle.org/logo/logo-240x60.gif" />',
230                      //and another url within one tag
231                      '<td background="http://moodle.org">&nbsp;</td>' => '<td background="http://moodle.org">&nbsp;</td>',
232                      '<td background="www.moodle.org">&nbsp;</td>' => '<td background="www.moodle.org">&nbsp;</td>',
233                      '<form name="input" action="http://moodle.org/submit.asp" method="get">'=>'<form name="input" action="http://moodle.org/submit.asp" method="get">',
234                      //partially escaped img tag
235                      'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />' => 'partially escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" />',
236                      //fully escaped img tag. Commented out as part of MDL-21183
237                      //htmlspecialchars('fully escaped img tag <img src="http://moodle.org/logo/logo-240x60.gif" />') => 'fully escaped img tag &lt;img src="http://moodle.org/logo/logo-240x60.gif" /&gt;',
238                      //Double http with www
239                      'One more link like http://www.moodle.org to test' => 'One more link like <a href="http://www.moodle.org" class="_blanktarget">http://www.moodle.org</a> to test',
240                      //Encoded URLs in the path
241                      'URL: http://127.0.0.1/one%28parenthesis%29/path?param=value' => 'URL: <a href="http://127.0.0.1/one%28parenthesis%29/path?param=value" class="_blanktarget">http://127.0.0.1/one%28parenthesis%29/path?param=value</a>',
242                      'URL: www.localhost.com/one%28parenthesis%29/path?param=value' => 'URL: <a href="http://www.localhost.com/one%28parenthesis%29/path?param=value" class="_blanktarget">www.localhost.com/one%28parenthesis%29/path?param=value</a>',
243                      //Encoded URLs in the query
244                      'URL: http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1' => 'URL: <a href="http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1" class="_blanktarget">http://127.0.0.1/path/to?param=value_with%28parenthesis%29&param2=1</a>',
245                      'URL: www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1' => 'URL: <a href="http://www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1" class="_blanktarget">www.localhost.com/path/to?param=value_with%28parenthesis%29&param2=1</a>',
246                      //URLs in Javascript. Commented out as part of MDL-21183
247                      //'var url="http://moodle.org";'=>'var url="http://moodle.org";',
248                      //'var url = "http://moodle.org";'=>'var url = "http://moodle.org";',
249                      //'var url="www.moodle.org";'=>'var url="www.moodle.org";',
250                      //'var url = "www.moodle.org";'=>'var url = "www.moodle.org";',
251                      //doctype. do we care about this failing?
252                      //'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">'=>'<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN http://www.w3.org/TR/html4/strict.dtd">'
253                  );
254        foreach ($texts as $text => $correctresult) {
255             $msg = "Testing text: ". str_replace('%', '%%', $text) . ": %s"; // Escape original '%' so sprintf() wont get confused
257             convert_urls_into_links($text);
259             $this->assertEqual($text, $correctresult, $msg);
260         }
262         //performance testing
263         $reps = 1000;
265         $time_start = microtime(true);
266         for($i=0;$i<$reps;$i++) {
267             $text = $this->get_test_text();
268             convert_urls_into_links($text);
269         }
270         $time_end = microtime(true);
271         $new_time = $time_end - $time_start;
273         $time_start = microtime(true);
274         for($i=0;$i<$reps;$i++) {
275             $text = $this->get_test_text();
276             $this->old_convert_urls_into_links($text);
277         }
278         $time_end = microtime(true);
279         $old_time = $time_end - $time_start;
281         $fast_enough = false;
282         if( $new_time < $old_time ) {
283             $fast_enough = true;
284         }
286         $this->assertEqual($fast_enough, true, 'Timing test: ' . $new_time . 'secs (new) < ' . $old_time . 'secs (old)');
287     }
289     public function test_html_to_text_simple() {
290         $this->assertEqual("\n\n_Hello_ WORLD!", html_to_text('<p><i>Hello</i> <b>world</b>!</p>'));
291     }
293     public function test_html_to_text_image() {
294         $this->assertEqual('[edit]', html_to_text('<img src="edit.png" alt="edit" />'));
295     }
297     public function test_html_to_text_nowrap() {
298         $long = "Here is a long string, more than 75 characters long, since by default html_to_text wraps text at 75 chars.";
299         $this->assertEqual($long, html_to_text($long, 0));
300     }