MDL-60337 htmlpurifier: non-ascii domain names
[moodle.git] / lib / tests / htmlpurifier_test.php
CommitLineData
5bd40408
PS
1<?php
2// This file is part of Moodle - http://moodle.org/
3//
4// Moodle is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// Moodle is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with Moodle. If not, see <http://www.gnu.org/licenses/>.
16
17/**
18 * Unit tests for the HTMLPurifier integration
19 *
7aea08e1 20 * @package core
5bd40408
PS
21 * @category phpunit
22 * @copyright 2012 Petr Skoda {@link http://skodak.org}
23 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
24 */
25
26defined('MOODLE_INTERNAL') || die();
27
28
7aea08e1
SH
29/**
30 * HTMLPurifier test case
31 *
32 * @package core
33 * @category phpunit
34 * @copyright 2012 Petr Skoda {@link http://skodak.org}
35 * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36 */
5bd40408
PS
37class core_htmlpurifier_testcase extends basic_testcase {
38
39 /**
cd51a944 40 * Verify _blank target is allowed.
5bd40408
PS
41 */
42 public function test_allow_blank_target() {
1688564a
CB
43 // See MDL-52651 for an explanation as to why the rel="noreferrer" attribute is expected here.
44 // Also note we do not need to test links with an existing rel attribute as the HTML Purifier is configured to remove
45 // the rel attribute.
5bd40408 46 $text = '<a href="http://moodle.org" target="_blank">Some link</a>';
7c117764 47 $expected = '<a href="http://moodle.org" target="_blank" rel="noreferrer noopener">Some link</a>';
5bd40408 48 $result = format_text($text, FORMAT_HTML);
1688564a 49 $this->assertSame($expected, $result);
5bd40408
PS
50
51 $result = format_text('<a href="http://moodle.org" target="some">Some link</a>', FORMAT_HTML);
52 $this->assertSame('<a href="http://moodle.org">Some link</a>', $result);
53 }
54
55 /**
cd51a944 56 * Verify our nolink tag accepted.
5bd40408
PS
57 */
58 public function test_nolink() {
cd51a944 59 // We can not use format text because nolink changes result.
5bd40408
PS
60 $text = '<nolink><div>no filters</div></nolink>';
61 $result = purify_html($text, array());
62 $this->assertSame($text, $result);
63
64 $text = '<nolink>xxx<em>xx</em><div>xxx</div></nolink>';
65 $result = purify_html($text, array());
66 $this->assertSame($text, $result);
67 }
68
69 /**
cd51a944 70 * Verify our tex tag accepted.
5bd40408
PS
71 */
72 public function test_tex() {
73 $text = '<tex>a+b=c</tex>';
74 $result = purify_html($text, array());
75 $this->assertSame($text, $result);
76 }
77
78 /**
cd51a944 79 * Verify our algebra tag accepted.
5bd40408
PS
80 */
81 public function test_algebra() {
82 $text = '<algebra>a+b=c</algebra>';
83 $result = purify_html($text, array());
84 $this->assertSame($text, $result);
85 }
86
87 /**
cd51a944 88 * Verify our hacky multilang works.
5bd40408
PS
89 */
90 public function test_multilang() {
91 $text = '<lang lang="en">hmmm</lang><lang lang="anything">hm</lang>';
92 $result = purify_html($text, array());
93 $this->assertSame($text, $result);
94
95 $text = '<span lang="en" class="multilang">hmmm</span><span lang="anything" class="multilang">hm</span>';
96 $result = purify_html($text, array());
97 $this->assertSame($text, $result);
98
99 $text = '<span lang="en">hmmm</span>';
100 $result = purify_html($text, array());
101 $this->assertNotSame($text, $result);
102
cd51a944 103 // Keep standard lang tags.
5bd40408
PS
104
105 $text = '<span lang="de_DU" class="multilang">asas</span>';
106 $result = purify_html($text, array());
107 $this->assertSame($text, $result);
108
109 $text = '<lang lang="de_DU">xxxxxx</lang>';
110 $result = purify_html($text, array());
111 $this->assertSame($text, $result);
112 }
113
114 /**
115 * Tests the 'allowid' option for format_text.
5bd40408
PS
116 */
117 public function test_format_text_allowid() {
cd51a944 118 // Start off by not allowing ids (default).
5bd40408
PS
119 $options = array(
120 'nocache' => true
121 );
122 $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
123 $this->assertSame('<div>Frog</div>', $result);
124
cd51a944 125 // Now allow ids.
5bd40408
PS
126 $options['allowid'] = true;
127 $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
128 $this->assertSame('<div id="example">Frog</div>', $result);
129 }
130
528a7b44
PS
131 public function test_allowobjectembed() {
132 global $CFG;
133
134 $this->assertSame('0', $CFG->allowobjectembed);
135
136 $text = '<object width="425" height="350">
137<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
138<param name="wmode" value="transparent" />
139<embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" />
140</object>hmmm';
141 $result = purify_html($text, array());
142 $this->assertSame('hmmm', trim($result));
143
144 $CFG->allowobjectembed = '1';
145
146 $expected = '<object width="425" height="350" data="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash">
147<param name="allowScriptAccess" value="never" />
148<param name="allowNetworking" value="internal" />
149<param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
150<param name="wmode" value="transparent" />
151<embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" allowscriptaccess="never" allownetworking="internal" />
152</object>hmmm';
153 $result = purify_html($text, array());
154 $this->assertSame(str_replace("\n", '', $expected), str_replace("\n", '', $result));
155
156 $CFG->allowobjectembed = '0';
157
158 $result = purify_html($text, array());
159 $this->assertSame('hmmm', trim($result));
160 }
161
5bd40408
PS
162 /**
163 * Test if linebreaks kept unchanged.
5bd40408 164 */
7aea08e1 165 public function test_line_breaking() {
5bd40408
PS
166 $text = "\n\raa\rsss\nsss\r";
167 $this->assertSame($text, purify_html($text));
168 }
169
170 /**
171 * Test fixing of strict problems.
5bd40408 172 */
7aea08e1 173 public function test_tidy() {
5bd40408
PS
174 $text = "<p>xx";
175 $this->assertSame('<p>xx</p>', purify_html($text));
176
177 $text = "<P>xx</P>";
178 $this->assertSame('<p>xx</p>', purify_html($text));
179
180 $text = "xx<br>";
181 $this->assertSame('xx<br />', purify_html($text));
182 }
183
184 /**
cd51a944 185 * Test nesting - this used to cause problems in earlier versions.
5bd40408 186 */
7aea08e1 187 public function test_nested_lists() {
5bd40408
PS
188 $text = "<ul><li>One<ul><li>Two</li></ul></li><li>Three</li></ul>";
189 $this->assertSame($text, purify_html($text));
190 }
191
192 /**
193 * Test that XSS protection works, complete smoke tests are in htmlpurifier itself.
5bd40408 194 */
7aea08e1 195 public function test_cleaning_nastiness() {
5bd40408
PS
196 $text = "x<SCRIPT>alert('XSS')</SCRIPT>x";
197 $this->assertSame('xx', purify_html($text));
198
199 $text = '<DIV STYLE="background-image:url(javascript:alert(\'XSS\'))">xx</DIV>';
200 $this->assertSame('<div>xx</div>', purify_html($text));
201
202 $text = '<DIV STYLE="width:expression(alert(\'XSS\'));">xx</DIV>';
203 $this->assertSame('<div>xx</div>', purify_html($text));
204
205 $text = 'x<IFRAME SRC="javascript:alert(\'XSS\');"></IFRAME>x';
206 $this->assertSame('xx', purify_html($text));
207
208 $text = 'x<OBJECT TYPE="text/x-scriptlet" DATA="http://ha.ckers.org/scriptlet.html"></OBJECT>x';
209 $this->assertSame('xx', purify_html($text));
210
211 $text = 'x<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>x';
212 $this->assertSame('xx', purify_html($text));
213
214 $text = 'x<form></form>x';
215 $this->assertSame('xx', purify_html($text));
216 }
3f0fe2b8
PS
217
218 /**
219 * Test internal function used for clean_text() speedup.
3f0fe2b8 220 */
d72bb486 221 public function test_is_purify_html_necessary() {
cd51a944 222 // First our shortcuts.
3f0fe2b8
PS
223 $text = "";
224 $this->assertFalse(is_purify_html_necessary($text));
225 $this->assertSame($text, purify_html($text));
226
227 $text = "666";
228 $this->assertFalse(is_purify_html_necessary($text));
229 $this->assertSame($text, purify_html($text));
230
231 $text = "abc\ndef \" ' ";
232 $this->assertFalse(is_purify_html_necessary($text));
233 $this->assertSame($text, purify_html($text));
234
235 $text = "abc\n<p>def</p>efg<p>hij</p>";
236 $this->assertFalse(is_purify_html_necessary($text));
237 $this->assertSame($text, purify_html($text));
238
239 $text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
240 $this->assertFalse(is_purify_html_necessary($text));
241 $this->assertSame($text, purify_html($text));
242
cd51a944 243 // Now failures.
3f0fe2b8
PS
244 $text = "&nbsp;";
245 $this->assertTrue(is_purify_html_necessary($text));
246
247 $text = "Gin & Tonic";
248 $this->assertTrue(is_purify_html_necessary($text));
249
250 $text = "Gin > Tonic";
251 $this->assertTrue(is_purify_html_necessary($text));
252
253 $text = "Gin < Tonic";
254 $this->assertTrue(is_purify_html_necessary($text));
255
256 $text = "<div>abc</div>";
257 $this->assertTrue(is_purify_html_necessary($text));
258
259 $text = "<span>abc</span>";
260 $this->assertTrue(is_purify_html_necessary($text));
261
262 $text = "<br>abc";
263 $this->assertTrue(is_purify_html_necessary($text));
264
265 $text = "<p class='xxx'>abc</p>";
266 $this->assertTrue(is_purify_html_necessary($text));
267
268 $text = "<p>abc<em></p></em>";
269 $this->assertTrue(is_purify_html_necessary($text));
270
271 $text = "<p>abc";
272 $this->assertTrue(is_purify_html_necessary($text));
273 }
d72bb486
PS
274
275 public function test_allowed_schemes() {
cd51a944 276 // First standard schemas.
d72bb486
PS
277 $text = '<a href="http://www.example.com/course/view.php?id=5">link</a>';
278 $this->assertSame($text, purify_html($text));
279
280 $text = '<a href="https://www.example.com/course/view.php?id=5">link</a>';
281 $this->assertSame($text, purify_html($text));
282
283 $text = '<a href="ftp://user@ftp.example.com/some/file.txt">link</a>';
284 $this->assertSame($text, purify_html($text));
285
286 $text = '<a href="nntp://example.com/group/123">link</a>';
287 $this->assertSame($text, purify_html($text));
288
289 $text = '<a href="news:groupname">link</a>';
290 $this->assertSame($text, purify_html($text));
291
292 $text = '<a href="mailto:user@example.com">link</a>';
293 $this->assertSame($text, purify_html($text));
294
cd51a944 295 // Extra schemes allowed in moodle.
d72bb486
PS
296 $text = '<a href="irc://irc.example.com/3213?pass">link</a>';
297 $this->assertSame($text, purify_html($text));
298
299 $text = '<a href="rtsp://www.example.com/movie.mov">link</a>';
300 $this->assertSame($text, purify_html($text));
301
817b2020
TB
302 $text = '<a href="rtmp://www.example.com/video.f4v">link</a>';
303 $this->assertSame($text, purify_html($text));
304
d72bb486
PS
305 $text = '<a href="teamspeak://speak.example.com/?par=val?par2=val2">link</a>';
306 $this->assertSame($text, purify_html($text));
307
308 $text = '<a href="gopher://gopher.example.com/resource">link</a>';
309 $this->assertSame($text, purify_html($text));
310
311 $text = '<a href="mms://www.example.com/movie.mms">link</a>';
312 $this->assertSame($text, purify_html($text));
313
cd51a944 314 // Now some borked or dangerous schemes.
d72bb486
PS
315 $text = '<a href="javascript://www.example.com">link</a>';
316 $this->assertSame('<a>link</a>', purify_html($text));
317
318 $text = '<a href="hmmm://www.example.com">link</a>';
319 $this->assertSame('<a>link</a>', purify_html($text));
320 }
37c10287 321
a3cc6261
MG
322 /**
323 * Test non-ascii domain names
324 */
325 public function test_idn() {
326
327 // Example of domain that gives the same result in IDNA2003 and IDNA2008 .
328 $text = '<a href="http://правительство.рф">правительство.рф</a>';
329 $expected = '<a href="http://xn--80aealotwbjpid2k.xn--p1ai">правительство.рф</a>';
330 $this->assertSame($expected, purify_html($text));
331
332 // Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters .
333 $text = '<a href="http://teßt.de">teßt.de</a>';
334 $expected = '<a href="http://xn--tet-6ka.de">teßt.de</a>';
335 $this->assertSame($expected, purify_html($text));
336
337 $text = '<a href="http://βόλος.com">http://βόλος.com</a>';
338 $expected = '<a href="http://xn--nxasmm1c.com">http://βόλος.com</a>';
339 $this->assertSame($expected, purify_html($text));
340
341 $text = '<a href="http://نامه‌ای.com">http://نامه‌ای.com</a>';
342 $expected = '<a href="http://xn--mgba3gch31f060k.com">http://نامه‌ای.com</a>';
343 $this->assertSame($expected, purify_html($text));
344 }
345
37c10287
CB
346 /**
347 * Tests media tags.
348 *
349 * @dataProvider media_tags_provider
350 * @param string $mediatag HTML media tag
351 * @param string $expected expected result
352 */
353 public function test_media_tags($mediatag, $expected) {
28e27ac8 354 $actual = format_text($mediatag, FORMAT_MOODLE, ['filter' => false]);
37c10287
CB
355 $this->assertEquals($expected, $actual);
356 }
357
358 /**
359 * Test cases for the test_media_tags test.
360 */
361 public function media_tags_provider() {
c353674f 362 // Takes an array of attributes, then generates a test for each of them.
de741b15 363 $generatetestcases = function($prefix, array $attrs, array $templates) {
c353674f
CB
364 return array_reduce($attrs, function($carry, $attr) use ($prefix, $templates) {
365 $testcase = [$prefix . '/' . $attr => [
366 sprintf($templates[0], $attr),
367 sprintf($templates[1], $attr)
37c10287
CB
368 ]];
369 return empty(array_values($carry)[0]) ? $testcase : $carry + $testcase;
370 }, [[]]);
371 };
372
373 $audioattrs = [
374 'preload="auto"', 'autoplay=""', 'loop=""', 'muted=""', 'controls=""',
375 'crossorigin="anonymous"', 'crossorigin="use-credentials"'
376 ];
377 $videoattrs = [
378 'crossorigin="anonymous"', 'crossorigin="use-credentials"',
379 'poster="https://upload.wikimedia.org/wikipedia/en/1/14/Space_jam.jpg"',
28e27ac8
MG
380 'preload="auto"', 'autoplay=""', 'playsinline=""', 'loop=""', 'muted=""',
381 'controls=""', 'width="420"', 'height="69"'
37c10287
CB
382 ];
383 return $generatetestcases('Plain audio', $audioattrs + ['src="http://example.com/jam.wav"'], [
384 '<audio %1$s>Looks like you can\'t slam the jams.</audio>',
385 '<div class="text_to_html"><audio %1$s>Looks like you can\'t slam the jams.</audio></div>'
386 ]) + $generatetestcases('Audio with one source', $audioattrs, [
387 '<audio %1$s><source src="http://example.com/getup.wav">No tasty jams for you.</audio>',
388 '<div class="text_to_html">' .
389 '<audio %1$s>' .
28e27ac8 390 '<source src="http://example.com/getup.wav" />' .
37c10287
CB
391 'No tasty jams for you.' .
392 '</audio>' .
393 '</div>'
394 ]) + $generatetestcases('Audio with multiple sources', $audioattrs, [
395 '<audio %1$s>' .
396 '<source src="http://example.com/getup.wav" type="audio/wav">' .
397 '<source src="http://example.com/getup.mp3" type="audio/mpeg">' .
398 '<source src="http://example.com/getup.ogg" type="audio/ogg">' .
399 'No tasty jams for you.' .
400 '</audio>',
401 '<div class="text_to_html">' .
28e27ac8
MG
402 '<audio %1$s>' .
403 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
404 '<source src="http://example.com/getup.mp3" type="audio/mpeg" />' .
405 '<source src="http://example.com/getup.ogg" type="audio/ogg" />' .
406 'No tasty jams for you.' .
407 '</audio>' .
408 '</div>'
409 ]) + $generatetestcases('Audio with sources and tracks', $audioattrs, [
410 '<audio %1$s>' .
411 '<source src="http://example.com/getup.wav" type="audio/wav">' .
412 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
413 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
414 'No tasty jams for you.' .
415 '</audio>',
416 '<div class="text_to_html">' .
417 '<audio %1$s>' .
418 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
419 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
420 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
37c10287
CB
421 'No tasty jams for you.' .
422 '</audio>' .
423 '</div>'
424 ]) + $generatetestcases('Plain video', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
425 '<video %1$s>Oh, that\'s pretty bad 😦</video>',
426 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
28e27ac8
MG
427 ]) + $generatetestcases('Video with illegal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
428 '<video %1$s><subtag></subtag>Oh, that\'s pretty bad 😦</video>',
429 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
430 ]) + $generatetestcases('Video with legal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
431 '<video %1$s>Did not work <a href="http://example.com/prettygood.mp4">click here to download</a></video>',
432 '<div class="text_to_html"><video %1$s>Did not work <a href="http://example.com/prettygood.mp4">' .
433 'click here to download</a></video></div>'
434 ]) + $generatetestcases('Source tag without video or audio', $videoattrs, [
435 'some text <source src="http://example.com/getup.wav" type="audio/wav"> the end',
436 '<div class="text_to_html">some text the end</div>'
37c10287
CB
437 ]) + $generatetestcases('Video with one source', $videoattrs, [
438 '<video %1$s><source src="http://example.com/prettygood.mp4">Oh, that\'s pretty bad 😦</video>',
439 '<div class="text_to_html">' .
440 '<video %1$s>' .
28e27ac8 441 '<source src="http://example.com/prettygood.mp4" />' .
37c10287
CB
442 'Oh, that\'s pretty bad 😦' .
443 '</video>' .
444 '</div>'
445 ]) + $generatetestcases('Video with multiple sources', $videoattrs, [
446 '<video %1$s>' .
447 '<source src="http://example.com/prettygood.mp4" type="video/mp4">' .
448 '<source src="http://example.com/eljefe.mp4" type="video/mp4">' .
28e27ac8 449 '<source src="http://example.com/turnitup.mov" type="video/mov">' .
37c10287
CB
450 'Oh, that\'s pretty bad 😦' .
451 '</video>',
452 '<div class="text_to_html">' .
453 '<video %1$s>' .
28e27ac8
MG
454 '<source src="http://example.com/prettygood.mp4" type="video/mp4" />' .
455 '<source src="http://example.com/eljefe.mp4" type="video/mp4" />' .
456 '<source src="http://example.com/turnitup.mov" type="video/mov" />' .
37c10287
CB
457 'Oh, that\'s pretty bad 😦' .
458 '</video>' .
459 '</div>'
28e27ac8
MG
460 ]) + $generatetestcases('Video with sources and tracks', $audioattrs, [
461 '<video %1$s>' .
462 '<source src="http://example.com/getup.wav" type="audio/wav">' .
463 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
464 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
465 'No tasty jams for you.' .
466 '</video>',
467 '<div class="text_to_html">' .
468 '<video %1$s>' .
469 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
470 '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
471 '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
472 'No tasty jams for you.' .
473 '</video>' .
474 '</div>'
0df9dce7 475 ]) + ['Video with invalid crossorigin' => [
28e27ac8 476 '<video src="http://example.com/turnitup.mov" crossorigin="can i pls hab?">' .
37c10287
CB
477 'Oh, that\'s pretty bad 😦' .
478 '</video>',
479 '<div class="text_to_html">' .
28e27ac8 480 '<video src="http://example.com/turnitup.mov">' .
37c10287 481 'Oh, that\'s pretty bad 😦' .
28e27ac8 482 '</video>' .
37c10287 483 '</div>'
0df9dce7 484 ]] + ['Audio with invalid crossorigin' => [
28e27ac8 485 '<audio src="http://example.com/getup.wav" crossorigin="give me. the jams.">' .
37c10287
CB
486 'nyemnyemnyem' .
487 '</audio>',
488 '<div class="text_to_html">' .
28e27ac8 489 '<audio src="http://example.com/getup.wav">' .
37c10287
CB
490 'nyemnyemnyem' .
491 '</audio>' .
492 '</div>'
0df9dce7
MG
493 ]] + ['Other attributes' => [
494 '<video src="http://example.com/turnitdown.mov" class="nofilter" data-something="data attribute" someattribute="somevalue" onclick="boom">' .
495 '<source src="http://example.com/getup.wav" type="audio/wav" class="shouldberemoved" data-sourcedata="source data" onmouseover="kill session" />' .
496 '<track src="http://example.com/subtitles_en.vtt" class="shouldberemoved" data-trackdata="track data" onmouseover="removeme" />' .
497 'Do not remove attribute class but remove other attributes' .
498 '</video>',
499 '<div class="text_to_html">' .
500 '<video src="http://example.com/turnitdown.mov" class="nofilter">' .
501 '<source src="http://example.com/getup.wav" type="audio/wav" />' .
502 '<track src="http://example.com/subtitles_en.vtt" />' .
503 'Do not remove attribute class but remove other attributes' .
504 '</video>' .
505 '</div>'
506 ]];
37c10287 507 }
5bd40408 508}