From 9c23901c6a04ffb80b65490ddcb770bb29c61d69 Mon Sep 17 00:00:00 2001
From: Moul <moul@moul.re>
Date: Mon, 14 Jun 2021 18:10:10 +0200
Subject: [PATCH] [enh] #170: Change domain regex to support IDN

Internationalized domain names
Add test
---
 duniterpy/constants.py      |  6 +++++-
 tests/api/test_endpoints.py | 13 +++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/duniterpy/constants.py b/duniterpy/constants.py
index 4723983d..dfa6e2c4 100644
--- a/duniterpy/constants.py
+++ b/duniterpy/constants.py
@@ -40,7 +40,11 @@ IPV6_REGEX = (
     f"fe80:(?::{IPV6SEG}){{0,4}}%[0-9a-zA-Z]+|::(?:ffff(?::0{{1,4}})?:)?{IPV4_REGEX}|"
     f"(?:{IPV6SEG}:){{1,4}}:{IPV4_REGEX}"
 )
-HOST_REGEX = "[a-z0-9-_.]*(?:.[a-zA-Z])?"
+# https://stackoverflow.com/a/26987741
+HOST_REGEX = (
+    "(((?!-))(xn--|_)?[a-z0-9-]{0,61}[a-z0-9]\\.)*"
+    "(xn--)?([a-z0-9][a-z0-9\\-]{0,60}|[a-z0-9-]{1,30}\\.[a-z]{2,})"
+)
 # https://stackoverflow.com/a/12968117
 PORT_REGEX = (
     "[1-9]\\d{0,3}|0|[1-5]\\d{4}|6[0-4]\\d{3}|65[0-4]\\d{2}|655[0-2]\\d|6553[0-5]"
diff --git a/tests/api/test_endpoints.py b/tests/api/test_endpoints.py
index ef37d364..75dc7a61 100644
--- a/tests/api/test_endpoints.py
+++ b/tests/api/test_endpoints.py
@@ -46,6 +46,19 @@ class TestEndpoint(unittest.TestCase):
 
         self.assertEqual(gva_endpoint.inline(), endpoint_str)
 
+        endpoint_str = "GVA S xn--duniter.org 10902"
+
+        gva_endpoint = endpoint.GVAEndpoint.from_inline(endpoint_str)
+
+        self.assertEqual(gva_endpoint.flags, "S")
+        self.assertEqual(gva_endpoint.server, "xn--duniter.org")
+        self.assertEqual(gva_endpoint.ipv4, None)
+        self.assertEqual(gva_endpoint.ipv6, None)
+        self.assertEqual(gva_endpoint.port, 10902)
+        self.assertEqual(gva_endpoint.path, "")
+
+        self.assertEqual(gva_endpoint.inline(), endpoint_str)
+
     def test_gva_subscription(self):
         endpoint_str = "GVASUB test.domain.com 127.0.0.1 2001:0db8:0000:85a3:0000:0000:ac1f:8001 10902 gva"
 
-- 
GitLab