From a3cc6261fcb02a7338db38e3839476023cd0de16 Mon Sep 17 00:00:00 2001 From: Marina Glancy Date: Tue, 3 Oct 2017 18:03:12 +0800 Subject: [PATCH] MDL-60337 htmlpurifier: non-ascii domain names --- .../HTMLPurifier/AttrDef/URI/Host.php | 2 +- lib/htmlpurifier/readme_moodle.txt | 4 ++++ lib/tests/htmlpurifier_test.php | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php index 3b4d186743e..e54a3344a74 100644 --- a/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php +++ b/lib/htmlpurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -97,7 +97,7 @@ class HTMLPurifier_AttrDef_URI_Host extends HTMLPurifier_AttrDef // PHP 5.3 and later support this functionality natively if (function_exists('idn_to_ascii')) { - $string = idn_to_ascii($string); + $string = idn_to_ascii($string, IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46); // If we have Net_IDNA2 support, we can support IRIs by // punycoding them. (This is the most portable thing to do, diff --git a/lib/htmlpurifier/readme_moodle.txt b/lib/htmlpurifier/readme_moodle.txt index 43c714b177a..7b85b2f358d 100644 --- a/lib/htmlpurifier/readme_moodle.txt +++ b/lib/htmlpurifier/readme_moodle.txt @@ -13,3 +13,7 @@ Description of HTML Purifier v4.9.3 library import into Moodle HTMLPurifier.path.php * add locallib.php with Moodle specific extensions to /lib/htmlpurifier/ * add this readme_moodle.txt to /lib/htmlpurifier/ + +Modifications: +* MDL-60337 use correct IDN variant for converting domain names to ascii + Check status of https://github.com/ezyang/htmlpurifier/pull/148 diff --git a/lib/tests/htmlpurifier_test.php b/lib/tests/htmlpurifier_test.php index f7d5679462e..e14b3191fdf 100644 --- a/lib/tests/htmlpurifier_test.php +++ b/lib/tests/htmlpurifier_test.php @@ -319,6 +319,30 @@ class core_htmlpurifier_testcase extends basic_testcase { $this->assertSame('link', purify_html($text)); } + /** + * Test non-ascii domain names + */ + public function test_idn() { + + // Example of domain that gives the same result in IDNA2003 and IDNA2008 . + $text = 'правительство.рф'; + $expected = 'правительство.рф'; + $this->assertSame($expected, purify_html($text)); + + // Examples of deviations from http://www.unicode.org/reports/tr46/#Table_Deviation_Characters . + $text = 'teßt.de'; + $expected = 'teßt.de'; + $this->assertSame($expected, purify_html($text)); + + $text = 'http://βόλος.com'; + $expected = 'http://βόλος.com'; + $this->assertSame($expected, purify_html($text)); + + $text = 'http://نامه‌ای.com'; + $expected = 'http://نامه‌ای.com'; + $this->assertSame($expected, purify_html($text)); + } + /** * Tests media tags. * -- 2.43.0