MDL-58413 mod_url: support internationalized domain names.
authorLuca Bösch <luca.boesch@bfh.ch>
Wed, 4 Mar 2020 09:23:25 +0000 (10:23 +0100)
committerLuca Bösch <luca.boesch@bfh.ch>
Fri, 20 Mar 2020 09:24:53 +0000 (10:24 +0100)
Internationalized domain names (IDN) do work in the URL resource.

mod/url/locallib.php
mod/url/tests/lib_test.php

index 128db02..eb467b9 100644 (file)
@@ -39,7 +39,7 @@ require_once("$CFG->dirroot/mod/url/lib.php");
 function url_appears_valid_url($url) {
     if (preg_match('/^(\/|https?:|ftp:)/i', $url)) {
         // note: this is not exact validation, we look for severely malformed URLs only
-        return (bool)preg_match('/^[a-z]+:\/\/([^:@\s]+:[^@\s]+@)?[a-z0-9_\.\-]+(:[0-9]+)?(\/[^#]*)?(#.*)?$/i', $url);
+        return (bool) preg_match('/^[a-z]+:\/\/([^:@\s]+:[^@\s]+@)?[^ @]+(:[0-9]+)?(\/[^#]*)?(#.*)?$/i', $url);
     } else {
         return (bool)preg_match('/^[a-z]+:\/\/...*$/i', $url);
     }
@@ -88,10 +88,23 @@ function url_get_full_url($url, $cm, $course, $config=null) {
     // make sure there are no encoded entities, it is ok to do this twice
     $fullurl = html_entity_decode($url->externalurl, ENT_QUOTES, 'UTF-8');
 
+    $letters = '\pL';
+    $latin = 'a-zA-Z';
+    $digits = '0-9';
+    $symbols = '\x{20E3}\x{00AE}\x{00A9}\x{203C}\x{2047}\x{2048}\x{2049}\x{3030}\x{303D}\x{2139}\x{2122}\x{3297}\x{3299}' .
+               '\x{2300}-\x{23FF}\x{2600}-\x{27BF}\x{2B00}-\x{2BF0}';
+    $arabic = '\x{FE00}-\x{FEFF}';
+    $math = '\x{2190}-\x{21FF}\x{2900}-\x{297F}';
+    $othernumbers = '\x{2460}-\x{24FF}';
+    $geometric = '\x{25A0}-\x{25FF}';
+    $emojis = '\x{1F000}-\x{1F6FF}';
+
     if (preg_match('/^(\/|https?:|ftp:)/i', $fullurl) or preg_match('|^/|', $fullurl)) {
         // encode extra chars in URLs - this does not make it always valid, but it helps with some UTF-8 problems
-        $allowed = "a-zA-Z0-9".preg_quote(';/?:@=&$_.+!*(),-#%', '/');
-        $fullurl = preg_replace_callback("/[^$allowed]/", 'url_filter_callback', $fullurl);
+        // Thanks to 💩.la emojis count as valid, too.
+        $allowed = "[" . $letters . $latin . $digits . $symbols . $arabic . $math . $othernumbers . $geometric .
+            $emojis . "]" . preg_quote(';/?:@=&$_.+!*(),-#%', '/');
+        $fullurl = preg_replace_callback("/[^$allowed]/u", 'url_filter_callback', $fullurl);
     } else {
         // encode special chars only
         $fullurl = str_replace('"', '%22', $fullurl);
index 76e6d65..50198f0 100644 (file)
@@ -53,6 +53,25 @@ class mod_url_lib_testcase extends advanced_testcase {
     public function test_url_appears_valid_url() {
         $this->assertTrue(url_appears_valid_url('http://example'));
         $this->assertTrue(url_appears_valid_url('http://www.example.com'));
+        $this->assertTrue(url_appears_valid_url('http://www.examplé.com'));
+        $this->assertTrue(url_appears_valid_url('http://💩.la'));
+        $this->assertTrue(url_appears_valid_url('http://香港大學.香港'));
+        $this->assertTrue(url_appears_valid_url('http://وزارة-الأتصالات.مصر'));
+        $this->assertTrue(url_appears_valid_url('http://www.теннис-алт.рф'));
+        $this->assertTrue(url_appears_valid_url('http://имена.бг'));
+        $this->assertTrue(url_appears_valid_url('http://straße.de'));
+        $this->assertTrue(url_appears_valid_url('http://キース.コム'));
+        $this->assertTrue(url_appears_valid_url('http://太亞.中国'));
+        $this->assertTrue(url_appears_valid_url('http://www.რეგისტრაცია.გე'));
+        $this->assertTrue(url_appears_valid_url('http://уміц.укр'));
+        $this->assertTrue(url_appears_valid_url('http://현대.한국'));
+        $this->assertTrue(url_appears_valid_url('http://мон.мон'));
+        $this->assertTrue(url_appears_valid_url('http://тест.қаз'));
+        $this->assertTrue(url_appears_valid_url('http://рнидс.срб'));
+        $this->assertTrue(url_appears_valid_url('http://اسماء.شبكة'));
+        $this->assertTrue(url_appears_valid_url('http://www.informationssäkerhet.se'));
+        $this->assertTrue(url_appears_valid_url('http://москва.рф/services'));
+        $this->assertTrue(url_appears_valid_url('http://detdumærker.dk'));
         $this->assertTrue(url_appears_valid_url('http://www.exa-mple2.com'));
         $this->assertTrue(url_appears_valid_url('http://www.example.com/~nobody/index.html'));
         $this->assertTrue(url_appears_valid_url('http://www.example.com#hmm'));
@@ -60,6 +79,7 @@ class mod_url_lib_testcase extends advanced_testcase {
         $this->assertTrue(url_appears_valid_url('http://www.example.com/žlutý koníček/lala.txt'));
         $this->assertTrue(url_appears_valid_url('http://www.example.com/žlutý koníček/lala.txt#hmmmm'));
         $this->assertTrue(url_appears_valid_url('http://www.example.com/index.php?xx=yy&zz=aa'));
+        $this->assertTrue(url_appears_valid_url('http://www.example.com:80/index.php?xx=yy&zz=aa'));
         $this->assertTrue(url_appears_valid_url('https://user:password@www.example.com/žlutý koníček/lala.txt'));
         $this->assertTrue(url_appears_valid_url('ftp://user:password@www.example.com/žlutý koníček/lala.txt'));
 
@@ -67,7 +87,6 @@ class mod_url_lib_testcase extends advanced_testcase {
         $this->assertFalse(url_appears_valid_url('http:/example.com'));
         $this->assertFalse(url_appears_valid_url('http://'));
         $this->assertFalse(url_appears_valid_url('http://www.exa mple.com'));
-        $this->assertFalse(url_appears_valid_url('http://www.examplé.com'));
         $this->assertFalse(url_appears_valid_url('http://@www.example.com'));
         $this->assertFalse(url_appears_valid_url('http://user:@www.example.com'));