f7d5679462ea9e762653fff6a4b0b5394b56a967
[moodle.git] / lib / tests / htmlpurifier_test.php
1 <?php
2 // This file is part of Moodle - http://moodle.org/
3 //
4 // Moodle is free software: you can redistribute it and/or modify
5 // it under the terms of the GNU General Public License as published by
6 // the Free Software Foundation, either version 3 of the License, or
7 // (at your option) any later version.
8 //
9 // Moodle is distributed in the hope that it will be useful,
10 // but WITHOUT ANY WARRANTY; without even the implied warranty of
11 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 // GNU General Public License for more details.
13 //
14 // You should have received a copy of the GNU General Public License
15 // along with Moodle.  If not, see <http://www.gnu.org/licenses/>.
17 /**
18  * Unit tests for the HTMLPurifier integration
19  *
20  * @package    core
21  * @category   phpunit
22  * @copyright  2012 Petr Skoda {@link http://skodak.org}
23  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
24  */
26 defined('MOODLE_INTERNAL') || die();
29 /**
30  * HTMLPurifier test case
31  *
32  * @package    core
33  * @category   phpunit
34  * @copyright  2012 Petr Skoda {@link http://skodak.org}
35  * @license    http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
36  */
37 class core_htmlpurifier_testcase extends basic_testcase {
39     /**
40      * Verify _blank target is allowed.
41      */
42     public function test_allow_blank_target() {
43         // See MDL-52651 for an explanation as to why the rel="noreferrer" attribute is expected here.
44         // Also note we do not need to test links with an existing rel attribute as the HTML Purifier is configured to remove
45         // the rel attribute.
46         $text = '<a href="http://moodle.org" target="_blank">Some link</a>';
47         $expected = '<a href="http://moodle.org" target="_blank" rel="noreferrer noopener">Some link</a>';
48         $result = format_text($text, FORMAT_HTML);
49         $this->assertSame($expected, $result);
51         $result = format_text('<a href="http://moodle.org" target="some">Some link</a>', FORMAT_HTML);
52         $this->assertSame('<a href="http://moodle.org">Some link</a>', $result);
53     }
55     /**
56      * Verify our nolink tag accepted.
57      */
58     public function test_nolink() {
59         // We can not use format text because nolink changes result.
60         $text = '<nolink><div>no filters</div></nolink>';
61         $result = purify_html($text, array());
62         $this->assertSame($text, $result);
64         $text = '<nolink>xxx<em>xx</em><div>xxx</div></nolink>';
65         $result = purify_html($text, array());
66         $this->assertSame($text, $result);
67     }
69     /**
70      * Verify our tex tag accepted.
71      */
72     public function test_tex() {
73         $text = '<tex>a+b=c</tex>';
74         $result = purify_html($text, array());
75         $this->assertSame($text, $result);
76     }
78     /**
79      * Verify our algebra tag accepted.
80      */
81     public function test_algebra() {
82         $text = '<algebra>a+b=c</algebra>';
83         $result = purify_html($text, array());
84         $this->assertSame($text, $result);
85     }
87     /**
88      * Verify our hacky multilang works.
89      */
90     public function test_multilang() {
91         $text = '<lang lang="en">hmmm</lang><lang lang="anything">hm</lang>';
92         $result = purify_html($text, array());
93         $this->assertSame($text, $result);
95         $text = '<span lang="en" class="multilang">hmmm</span><span lang="anything" class="multilang">hm</span>';
96         $result = purify_html($text, array());
97         $this->assertSame($text, $result);
99         $text = '<span lang="en">hmmm</span>';
100         $result = purify_html($text, array());
101         $this->assertNotSame($text, $result);
103         // Keep standard lang tags.
105         $text = '<span lang="de_DU" class="multilang">asas</span>';
106         $result = purify_html($text, array());
107         $this->assertSame($text, $result);
109         $text = '<lang lang="de_DU">xxxxxx</lang>';
110         $result = purify_html($text, array());
111         $this->assertSame($text, $result);
112     }
114     /**
115      * Tests the 'allowid' option for format_text.
116      */
117     public function test_format_text_allowid() {
118         // Start off by not allowing ids (default).
119         $options = array(
120             'nocache' => true
121         );
122         $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
123         $this->assertSame('<div>Frog</div>', $result);
125         // Now allow ids.
126         $options['allowid'] = true;
127         $result = format_text('<div id="example">Frog</div>', FORMAT_HTML, $options);
128         $this->assertSame('<div id="example">Frog</div>', $result);
129     }
131     public function test_allowobjectembed() {
132         global $CFG;
134         $this->assertSame('0', $CFG->allowobjectembed);
136         $text = '<object width="425" height="350">
137 <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
138 <param name="wmode" value="transparent" />
139 <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" />
140 </object>hmmm';
141         $result = purify_html($text, array());
142         $this->assertSame('hmmm', trim($result));
144         $CFG->allowobjectembed = '1';
146         $expected = '<object width="425" height="350" data="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash">
147 <param name="allowScriptAccess" value="never" />
148 <param name="allowNetworking" value="internal" />
149 <param name="movie" value="http://www.youtube.com/v/AyPzM5WK8ys" />
150 <param name="wmode" value="transparent" />
151 <embed src="http://www.youtube.com/v/AyPzM5WK8ys" type="application/x-shockwave-flash" wmode="transparent" width="425" height="350" allowscriptaccess="never" allownetworking="internal" />
152 </object>hmmm';
153         $result = purify_html($text, array());
154         $this->assertSame(str_replace("\n", '', $expected), str_replace("\n", '', $result));
156         $CFG->allowobjectembed = '0';
158         $result = purify_html($text, array());
159         $this->assertSame('hmmm', trim($result));
160     }
162     /**
163      * Test if linebreaks kept unchanged.
164      */
165     public function test_line_breaking() {
166         $text = "\n\raa\rsss\nsss\r";
167         $this->assertSame($text, purify_html($text));
168     }
170     /**
171      * Test fixing of strict problems.
172      */
173     public function test_tidy() {
174         $text = "<p>xx";
175         $this->assertSame('<p>xx</p>', purify_html($text));
177         $text = "<P>xx</P>";
178         $this->assertSame('<p>xx</p>', purify_html($text));
180         $text = "xx<br>";
181         $this->assertSame('xx<br />', purify_html($text));
182     }
184     /**
185      * Test nesting - this used to cause problems in earlier versions.
186      */
187     public function test_nested_lists() {
188         $text = "<ul><li>One<ul><li>Two</li></ul></li><li>Three</li></ul>";
189         $this->assertSame($text, purify_html($text));
190     }
192     /**
193      * Test that XSS protection works, complete smoke tests are in htmlpurifier itself.
194      */
195     public function test_cleaning_nastiness() {
196         $text = "x<SCRIPT>alert('XSS')</SCRIPT>x";
197         $this->assertSame('xx', purify_html($text));
199         $text = '<DIV STYLE="background-image:url(javascript:alert(\'XSS\'))">xx</DIV>';
200         $this->assertSame('<div>xx</div>', purify_html($text));
202         $text = '<DIV STYLE="width:expression(alert(\'XSS\'));">xx</DIV>';
203         $this->assertSame('<div>xx</div>', purify_html($text));
205         $text = 'x<IFRAME SRC="javascript:alert(\'XSS\');"></IFRAME>x';
206         $this->assertSame('xx', purify_html($text));
208         $text = 'x<OBJECT TYPE="text/x-scriptlet" DATA="http://ha.ckers.org/scriptlet.html"></OBJECT>x';
209         $this->assertSame('xx', purify_html($text));
211         $text = 'x<EMBED SRC="http://ha.ckers.org/xss.swf" AllowScriptAccess="always"></EMBED>x';
212         $this->assertSame('xx', purify_html($text));
214         $text = 'x<form></form>x';
215         $this->assertSame('xx', purify_html($text));
216     }
218     /**
219      * Test internal function used for clean_text() speedup.
220      */
221     public function test_is_purify_html_necessary() {
222         // First our shortcuts.
223         $text = "";
224         $this->assertFalse(is_purify_html_necessary($text));
225         $this->assertSame($text, purify_html($text));
227         $text = "666";
228         $this->assertFalse(is_purify_html_necessary($text));
229         $this->assertSame($text, purify_html($text));
231         $text = "abc\ndef \" ' ";
232         $this->assertFalse(is_purify_html_necessary($text));
233         $this->assertSame($text, purify_html($text));
235         $text = "abc\n<p>def</p>efg<p>hij</p>";
236         $this->assertFalse(is_purify_html_necessary($text));
237         $this->assertSame($text, purify_html($text));
239         $text = "<br />abc\n<p>def<em>efg</em><strong>hi<br />j</strong></p>";
240         $this->assertFalse(is_purify_html_necessary($text));
241         $this->assertSame($text, purify_html($text));
243         // Now failures.
244         $text = "&nbsp;";
245         $this->assertTrue(is_purify_html_necessary($text));
247         $text = "Gin & Tonic";
248         $this->assertTrue(is_purify_html_necessary($text));
250         $text = "Gin > Tonic";
251         $this->assertTrue(is_purify_html_necessary($text));
253         $text = "Gin < Tonic";
254         $this->assertTrue(is_purify_html_necessary($text));
256         $text = "<div>abc</div>";
257         $this->assertTrue(is_purify_html_necessary($text));
259         $text = "<span>abc</span>";
260         $this->assertTrue(is_purify_html_necessary($text));
262         $text = "<br>abc";
263         $this->assertTrue(is_purify_html_necessary($text));
265         $text = "<p class='xxx'>abc</p>";
266         $this->assertTrue(is_purify_html_necessary($text));
268         $text = "<p>abc<em></p></em>";
269         $this->assertTrue(is_purify_html_necessary($text));
271         $text = "<p>abc";
272         $this->assertTrue(is_purify_html_necessary($text));
273     }
275     public function test_allowed_schemes() {
276         // First standard schemas.
277         $text = '<a href="http://www.example.com/course/view.php?id=5">link</a>';
278         $this->assertSame($text, purify_html($text));
280         $text = '<a href="https://www.example.com/course/view.php?id=5">link</a>';
281         $this->assertSame($text, purify_html($text));
283         $text = '<a href="ftp://user@ftp.example.com/some/file.txt">link</a>';
284         $this->assertSame($text, purify_html($text));
286         $text = '<a href="nntp://example.com/group/123">link</a>';
287         $this->assertSame($text, purify_html($text));
289         $text = '<a href="news:groupname">link</a>';
290         $this->assertSame($text, purify_html($text));
292         $text = '<a href="mailto:user@example.com">link</a>';
293         $this->assertSame($text, purify_html($text));
295         // Extra schemes allowed in moodle.
296         $text = '<a href="irc://irc.example.com/3213?pass">link</a>';
297         $this->assertSame($text, purify_html($text));
299         $text = '<a href="rtsp://www.example.com/movie.mov">link</a>';
300         $this->assertSame($text, purify_html($text));
302         $text = '<a href="rtmp://www.example.com/video.f4v">link</a>';
303         $this->assertSame($text, purify_html($text));
305         $text = '<a href="teamspeak://speak.example.com/?par=val?par2=val2">link</a>';
306         $this->assertSame($text, purify_html($text));
308         $text = '<a href="gopher://gopher.example.com/resource">link</a>';
309         $this->assertSame($text, purify_html($text));
311         $text = '<a href="mms://www.example.com/movie.mms">link</a>';
312         $this->assertSame($text, purify_html($text));
314         // Now some borked or dangerous schemes.
315         $text = '<a href="javascript://www.example.com">link</a>';
316         $this->assertSame('<a>link</a>', purify_html($text));
318         $text = '<a href="hmmm://www.example.com">link</a>';
319         $this->assertSame('<a>link</a>', purify_html($text));
320     }
322     /**
323      * Tests media tags.
324      *
325      * @dataProvider media_tags_provider
326      * @param string $mediatag HTML media tag
327      * @param string $expected expected result
328      */
329     public function test_media_tags($mediatag, $expected) {
330         $actual = format_text($mediatag, FORMAT_MOODLE, ['filter' => false]);
331         $this->assertEquals($expected, $actual);
332     }
334     /**
335      * Test cases for the test_media_tags test.
336      */
337     public function media_tags_provider() {
338         // Takes an array of attributes, then generates a test for each of them.
339         $generatetestcases = function($prefix, array $attrs, array $templates) {
340             return array_reduce($attrs, function($carry, $attr) use ($prefix, $templates) {
341                 $testcase = [$prefix . '/' . $attr => [
342                     sprintf($templates[0], $attr),
343                     sprintf($templates[1], $attr)
344                 ]];
345                 return empty(array_values($carry)[0]) ? $testcase : $carry + $testcase;
346             }, [[]]);
347         };
349         $audioattrs = [
350             'preload="auto"', 'autoplay=""', 'loop=""', 'muted=""', 'controls=""',
351             'crossorigin="anonymous"', 'crossorigin="use-credentials"'
352         ];
353         $videoattrs = [
354             'crossorigin="anonymous"', 'crossorigin="use-credentials"',
355             'poster="https://upload.wikimedia.org/wikipedia/en/1/14/Space_jam.jpg"',
356             'preload="auto"', 'autoplay=""', 'playsinline=""', 'loop=""', 'muted=""',
357             'controls=""', 'width="420"', 'height="69"'
358         ];
359         return $generatetestcases('Plain audio', $audioattrs + ['src="http://example.com/jam.wav"'], [
360                 '<audio %1$s>Looks like you can\'t slam the jams.</audio>',
361                 '<div class="text_to_html"><audio %1$s>Looks like you can\'t slam the jams.</audio></div>'
362             ]) + $generatetestcases('Audio with one source', $audioattrs, [
363                 '<audio %1$s><source src="http://example.com/getup.wav">No tasty jams for you.</audio>',
364                 '<div class="text_to_html">' .
365                     '<audio %1$s>' .
366                         '<source src="http://example.com/getup.wav" />' .
367                         'No tasty jams for you.' .
368                     '</audio>' .
369                 '</div>'
370             ]) + $generatetestcases('Audio with multiple sources', $audioattrs, [
371                 '<audio %1$s>' .
372                     '<source src="http://example.com/getup.wav" type="audio/wav">' .
373                     '<source src="http://example.com/getup.mp3" type="audio/mpeg">' .
374                     '<source src="http://example.com/getup.ogg" type="audio/ogg">' .
375                     'No tasty jams for you.' .
376                 '</audio>',
377                 '<div class="text_to_html">' .
378                     '<audio %1$s>' .
379                         '<source src="http://example.com/getup.wav" type="audio/wav" />' .
380                         '<source src="http://example.com/getup.mp3" type="audio/mpeg" />' .
381                         '<source src="http://example.com/getup.ogg" type="audio/ogg" />' .
382                         'No tasty jams for you.' .
383                     '</audio>' .
384                 '</div>'
385             ]) + $generatetestcases('Audio with sources and tracks', $audioattrs, [
386                 '<audio %1$s>' .
387                     '<source src="http://example.com/getup.wav" type="audio/wav">' .
388                     '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
389                     '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
390                     'No tasty jams for you.' .
391                 '</audio>',
392                 '<div class="text_to_html">' .
393                     '<audio %1$s>' .
394                         '<source src="http://example.com/getup.wav" type="audio/wav" />' .
395                         '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
396                         '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
397                         'No tasty jams for you.' .
398                     '</audio>' .
399                 '</div>'
400             ]) + $generatetestcases('Plain video', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
401                 '<video %1$s>Oh, that\'s pretty bad 😦</video>',
402                 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
403             ]) + $generatetestcases('Video with illegal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
404                 '<video %1$s><subtag></subtag>Oh, that\'s pretty bad 😦</video>',
405                 '<div class="text_to_html"><video %1$s>Oh, that\'s pretty bad 😦</video></div>'
406             ]) + $generatetestcases('Video with legal subtag', $videoattrs + ['src="http://example.com/prettygood.mp4'], [
407                 '<video %1$s>Did not work <a href="http://example.com/prettygood.mp4">click here to download</a></video>',
408                 '<div class="text_to_html"><video %1$s>Did not work <a href="http://example.com/prettygood.mp4">' .
409                 'click here to download</a></video></div>'
410             ]) + $generatetestcases('Source tag without video or audio', $videoattrs, [
411                 'some text <source src="http://example.com/getup.wav" type="audio/wav"> the end',
412                 '<div class="text_to_html">some text  the end</div>'
413             ]) + $generatetestcases('Video with one source', $videoattrs, [
414                 '<video %1$s><source src="http://example.com/prettygood.mp4">Oh, that\'s pretty bad 😦</video>',
415                 '<div class="text_to_html">' .
416                     '<video %1$s>' .
417                         '<source src="http://example.com/prettygood.mp4" />' .
418                         'Oh, that\'s pretty bad 😦' .
419                     '</video>' .
420                 '</div>'
421             ]) + $generatetestcases('Video with multiple sources', $videoattrs, [
422                 '<video %1$s>' .
423                     '<source src="http://example.com/prettygood.mp4" type="video/mp4">' .
424                     '<source src="http://example.com/eljefe.mp4" type="video/mp4">' .
425                     '<source src="http://example.com/turnitup.mov" type="video/mov">' .
426                     'Oh, that\'s pretty bad 😦' .
427                 '</video>',
428                 '<div class="text_to_html">' .
429                     '<video %1$s>' .
430                         '<source src="http://example.com/prettygood.mp4" type="video/mp4" />' .
431                         '<source src="http://example.com/eljefe.mp4" type="video/mp4" />' .
432                         '<source src="http://example.com/turnitup.mov" type="video/mov" />' .
433                         'Oh, that\'s pretty bad 😦' .
434                     '</video>' .
435                 '</div>'
436             ]) + $generatetestcases('Video with sources and tracks', $audioattrs, [
437                 '<video %1$s>' .
438                     '<source src="http://example.com/getup.wav" type="audio/wav">' .
439                     '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en">' .
440                     '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es">' .
441                     'No tasty jams for you.' .
442                 '</video>',
443                 '<div class="text_to_html">' .
444                     '<video %1$s>' .
445                         '<source src="http://example.com/getup.wav" type="audio/wav" />' .
446                         '<track kind="subtitles" src="http://example.com/subtitles_en.vtt" label="English" srclang="en" />' .
447                         '<track kind="subtitles" src="http://example.com/subtitles_es.vtt" label="Espanol" srclang="es" />' .
448                     'No tasty jams for you.' .
449                     '</video>' .
450                 '</div>'
451             ]) + ['Video with invalid crossorigin' => [
452                     '<video src="http://example.com/turnitup.mov" crossorigin="can i pls hab?">' .
453                         'Oh, that\'s pretty bad 😦' .
454                     '</video>',
455                     '<div class="text_to_html">' .
456                         '<video src="http://example.com/turnitup.mov">' .
457                            'Oh, that\'s pretty bad 😦' .
458                         '</video>' .
459                     '</div>'
460             ]] + ['Audio with invalid crossorigin' => [
461                     '<audio src="http://example.com/getup.wav" crossorigin="give me. the jams.">' .
462                         'nyemnyemnyem' .
463                     '</audio>',
464                     '<div class="text_to_html">' .
465                         '<audio src="http://example.com/getup.wav">' .
466                             'nyemnyemnyem' .
467                         '</audio>' .
468                     '</div>'
469             ]] + ['Other attributes' => [
470                 '<video src="http://example.com/turnitdown.mov" class="nofilter" data-something="data attribute" someattribute="somevalue" onclick="boom">' .
471                     '<source src="http://example.com/getup.wav" type="audio/wav" class="shouldberemoved" data-sourcedata="source data" onmouseover="kill session" />' .
472                     '<track src="http://example.com/subtitles_en.vtt" class="shouldberemoved" data-trackdata="track data" onmouseover="removeme" />' .
473                     'Do not remove attribute class but remove other attributes' .
474                 '</video>',
475                 '<div class="text_to_html">' .
476                     '<video src="http://example.com/turnitdown.mov" class="nofilter">' .
477                         '<source src="http://example.com/getup.wav" type="audio/wav" />' .
478                         '<track src="http://example.com/subtitles_en.vtt" />' .
479                         'Do not remove attribute class but remove other attributes' .
480                     '</video>' .
481                 '</div>'
482             ]];
483     }