From 5a1033fe5be764a135adcfff2fdc14edc3e5f327 Mon Sep 17 00:00:00 2001
From: Changqing Li <changqing.li@windriver.com>
Date: Thu, 10 Oct 2019 16:32:19 +0800
Subject: [PATCH] bpo-36742: Fixes handling of pre-normalization characters in
 urlsplit() bpo-36742: Corrects fix to handle decomposition in usernames

Upstream-Status: Backport

https://github.com/python/cpython/commit/98a4dcefbbc3bce5ab07e7c0830a183157250259
https://github.com/python/cpython/commit/f61599b050c621386a3fc6bc480359e2d3bb93de#diff-b577545d73dd0cdb2c337a4c5f89e1d7

CVE: CVE-2019-10160

Signed-off-by: Changqing Li <changqing.li@windriver.com>
---
 Lib/test/test_urlparse.py | 19 +++++++++++++------
 Lib/urlparse.py           | 14 +++++++++-----
 2 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py
index 1830d0b..857ed96 100644
--- a/Lib/test/test_urlparse.py
+++ b/Lib/test/test_urlparse.py
@@ -641,13 +641,20 @@ class UrlParseTestCase(unittest.TestCase):
         self.assertIn(u'\u2100', denorm_chars)
         self.assertIn(u'\uFF03', denorm_chars)
 
+        # bpo-36742: Verify port separators are ignored when they
+        # existed prior to decomposition
+        urlparse.urlsplit(u'http://\u30d5\u309a:80')
+        with self.assertRaises(ValueError):
+            urlparse.urlsplit(u'http://\u30d5\u309a\ufe1380')
+
         for scheme in [u"http", u"https", u"ftp"]:
-            for c in denorm_chars:
-                url = u"{}://netloc{}false.netloc/path".format(scheme, c)
-                if test_support.verbose:
-                    print "Checking %r" % url
-                with self.assertRaises(ValueError):
-                    urlparse.urlsplit(url)
+            for netloc in [u"netloc{}false.netloc", u"n{}user@netloc"]:
+                for c in denorm_chars:
+                    url = u"{}://{}/path".format(scheme, netloc.format(c))
+                    if test_support.verbose:
+                        print "Checking %r" % url
+                    with self.assertRaises(ValueError):
+                        urlparse.urlsplit(url)
 
 def test_main():
     test_support.run_unittest(UrlParseTestCase)
diff --git a/Lib/urlparse.py b/Lib/urlparse.py
index 54eda08..e34b368 100644
--- a/Lib/urlparse.py
+++ b/Lib/urlparse.py
@@ -171,14 +171,18 @@ def _checknetloc(netloc):
     # looking for characters like \u2100 that expand to 'a/c'
     # IDNA uses NFKC equivalence, so normalize for this check
     import unicodedata
-    netloc2 = unicodedata.normalize('NFKC', netloc)
-    if netloc == netloc2:
+    n = netloc.replace(u'@', u'') # ignore characters already included
+    n = n.replace(u':', u'')      # but not the surrounding text
+    n = n.replace(u'#', u'')
+    n = n.replace(u'?', u'')
+
+    netloc2 = unicodedata.normalize('NFKC', n)
+    if n == netloc2:
         return
-    _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay
     for c in '/?#@:':
         if c in netloc2:
-            raise ValueError("netloc '" + netloc2 + "' contains invalid " +
-                             "characters under NFKC normalization")
+            raise ValueError(u"netloc '" + netloc + u"' contains invalid " +
+                             u"characters under NFKC normalization")
 
 def urlsplit(url, scheme='', allow_fragments=True):
     """Parse a URL into 5 components:
-- 
2.7.4