Tree - rpms/python3 - src.fedoraproject.org

rpms / python3

Blame 00320-CVE-2019-9636.patch

Blob History Raw

		b98ac44	`From 23fc0416454c4ad5b9b23d520fbe6d89be3efc24 Mon Sep 17 00:00:00 2001`
		b98ac44	`From: Steve Dower <steve.dower@microsoft.com>`
		b98ac44	`Date: Mon, 11 Mar 2019 21:34:03 -0700`
		b98ac44	`Subject: [PATCH] [3.6] bpo-36216: Add check for characters in netloc that`
		b98ac44	`normalize to separators (GH-12201) (GH-12215)`
		b98ac44
		b98ac44	`---`
		b98ac44	`Doc/library/urllib.parse.rst \| 18 +++++++++++++++`
		b98ac44	`Lib/test/test_urlparse.py \| 23 +++++++++++++++++++`
		b98ac44	`Lib/urllib/parse.py \| 17 ++++++++++++++`
		b98ac44	`.../2019-03-06-09-38-40.bpo-36216.6q1m4a.rst \| 3 +++`
		b98ac44	`4 files changed, 61 insertions(+)`
		b98ac44	`create mode 100644 Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst`
		b98ac44
		b98ac44	`diff --git a/Doc/library/urllib.parse.rst b/Doc/library/urllib.parse.rst`
		b98ac44	`index d991254d5ca1..647af613a315 100644`
		b98ac44	`--- a/Doc/library/urllib.parse.rst`
		b98ac44	`+++ b/Doc/library/urllib.parse.rst`
		b98ac44	`@@ -121,6 +121,11 @@ or on combining URL components into a URL string.`
		b98ac44	Unmatched square brackets in the :attr:`netloc` attribute will raise a
		b98ac44	:exc:`ValueError`.
		b98ac44
		b98ac44	+ Characters in the :attr:`netloc` attribute that decompose under NFKC
		b98ac44	+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
		b98ac44	+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
		b98ac44	`+ decomposed before parsing, no error will be raised.`
		b98ac44	`+`
		b98ac44	`.. versionchanged:: 3.2`
		b98ac44	`Added IPv6 URL parsing capabilities.`
		b98ac44
		b98ac44	`@@ -133,6 +138,10 @@ or on combining URL components into a URL string.`
		b98ac44	Out-of-range port numbers now raise :exc:`ValueError`, instead of
		b98ac44	returning :const:`None`.
		b98ac44
		b98ac44	`+ .. versionchanged:: 3.6.9`
		b98ac44	`+ Characters that affect netloc parsing under NFKC normalization will`
		b98ac44	+ now raise :exc:`ValueError`.
		b98ac44	`+`
		b98ac44
		b98ac44	`.. function:: parse_qs(qs, keep_blank_values=False, strict_parsing=False, encoding='utf-8', errors='replace', max_num_fields=None)`
		b98ac44
		b98ac44	`@@ -256,10 +265,19 @@ or on combining URL components into a URL string.`
		b98ac44	Unmatched square brackets in the :attr:`netloc` attribute will raise a
		b98ac44	:exc:`ValueError`.
		b98ac44
		b98ac44	+ Characters in the :attr:`netloc` attribute that decompose under NFKC
		b98ac44	+ normalization (as used by the IDNA encoding) into any of ``/``, ``?``,
		b98ac44	+ ``#``, ``@``, or ``:`` will raise a :exc:`ValueError`. If the URL is
		b98ac44	`+ decomposed before parsing, no error will be raised.`
		b98ac44	`+`
		b98ac44	`.. versionchanged:: 3.6`
		b98ac44	Out-of-range port numbers now raise :exc:`ValueError`, instead of
		b98ac44	returning :const:`None`.
		b98ac44
		b98ac44	`+ .. versionchanged:: 3.6.9`
		b98ac44	`+ Characters that affect netloc parsing under NFKC normalization will`
		b98ac44	+ now raise :exc:`ValueError`.
		b98ac44	`+`
		b98ac44
		b98ac44	`.. function:: urlunsplit(parts)`
		b98ac44
		b98ac44	`diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py`
		b98ac44	`index be50b47603aa..e6638aee2244 100644`
		b98ac44	`--- a/Lib/test/test_urlparse.py`
		b98ac44	`+++ b/Lib/test/test_urlparse.py`
		b98ac44	`@@ -1,3 +1,5 @@`
		b98ac44	`+import sys`
		b98ac44	`+import unicodedata`
		b98ac44	`import unittest`
		b98ac44	`import urllib.parse`
		b98ac44
		b98ac44	`@@ -984,6 +986,27 @@ def test_all(self):`
		b98ac44	`expected.append(name)`
		b98ac44	`self.assertCountEqual(urllib.parse.__all__, expected)`
		b98ac44
		b98ac44	`+ def test_urlsplit_normalization(self):`
		b98ac44	`+ # Certain characters should never occur in the netloc,`
		b98ac44	`+ # including under normalization.`
		b98ac44	`+ # Ensure that ALL of them are detected and cause an error`
		b98ac44	`+ illegal_chars = '/:#?@'`
		b98ac44	`+ hex_chars = {'{:04X}'.format(ord(c)) for c in illegal_chars}`
		b98ac44	`+ denorm_chars = [`
		b98ac44	`+ c for c in map(chr, range(128, sys.maxunicode))`
		b98ac44	`+ if (hex_chars & set(unicodedata.decomposition(c).split()))`
		b98ac44	`+ and c not in illegal_chars`
		b98ac44	`+ ]`
		b98ac44	`+ # Sanity check that we found at least one such character`
		b98ac44	`+ self.assertIn('\u2100', denorm_chars)`
		b98ac44	`+ self.assertIn('\uFF03', denorm_chars)`
		b98ac44	`+`
		b98ac44	`+ for scheme in ["http", "https", "ftp"]:`
		b98ac44	`+ for c in denorm_chars:`
		b98ac44	`+ url = "{}://netloc{}false.netloc/path".format(scheme, c)`
		b98ac44	`+ with self.subTest(url=url, char='{:04X}'.format(ord(c))):`
		b98ac44	`+ with self.assertRaises(ValueError):`
		b98ac44	`+ urllib.parse.urlsplit(url)`
		b98ac44
		b98ac44	`class Utility_Tests(unittest.TestCase):`
		b98ac44	`"""Testcase to test the various utility functions in the urllib."""`
		b98ac44	`diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py`
		b98ac44	`index 85e68c8b42c7..7b06f4d71d67 100644`
		b98ac44	`--- a/Lib/urllib/parse.py`
		b98ac44	`+++ b/Lib/urllib/parse.py`
		b98ac44	`@@ -391,6 +391,21 @@ def _splitnetloc(url, start=0):`
		b98ac44	`delim = min(delim, wdelim) # use earliest delim position`
		b98ac44	`return url[start:delim], url[delim:] # return (domain, rest)`
		b98ac44
		b98ac44	`+def _checknetloc(netloc):`
		b98ac44	`+ if not netloc or not any(ord(c) > 127 for c in netloc):`
		b98ac44	`+ return`
		b98ac44	`+ # looking for characters like \u2100 that expand to 'a/c'`
		b98ac44	`+ # IDNA uses NFKC equivalence, so normalize for this check`
		b98ac44	`+ import unicodedata`
		b98ac44	`+ netloc2 = unicodedata.normalize('NFKC', netloc)`
		b98ac44	`+ if netloc == netloc2:`
		b98ac44	`+ return`
		b98ac44	`+ _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay`
		b98ac44	`+ for c in '/?#@:':`
		b98ac44	`+ if c in netloc2:`
		b98ac44	`+ raise ValueError("netloc '" + netloc2 + "' contains invalid " +`
		b98ac44	`+ "characters under NFKC normalization")`
		b98ac44	`+`
		b98ac44	`def urlsplit(url, scheme='', allow_fragments=True):`
		b98ac44	`"""Parse a URL into 5 components:`
		b98ac44	`<scheme>://<netloc>/<path>?<query>#<fragment>`
		b98ac44	`@@ -420,6 +435,7 @@ def urlsplit(url, scheme='', allow_fragments=True):`
		b98ac44	`url, fragment = url.split('#', 1)`
		b98ac44	`if '?' in url:`
		b98ac44	`url, query = url.split('?', 1)`
		b98ac44	`+ _checknetloc(netloc)`
		b98ac44	`v = SplitResult(scheme, netloc, url, query, fragment)`
		b98ac44	`_parse_cache[key] = v`
		b98ac44	`return _coerce_result(v)`
		b98ac44	`@@ -443,6 +459,7 @@ def urlsplit(url, scheme='', allow_fragments=True):`
		b98ac44	`url, fragment = url.split('#', 1)`
		b98ac44	`if '?' in url:`
		b98ac44	`url, query = url.split('?', 1)`
		b98ac44	`+ _checknetloc(netloc)`
		b98ac44	`v = SplitResult(scheme, netloc, url, query, fragment)`
		b98ac44	`_parse_cache[key] = v`
		b98ac44	`return _coerce_result(v)`
		b98ac44	`diff --git a/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst b/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst`
		b98ac44	`new file mode 100644`
		b98ac44	`index 000000000000..5546394157f9`
		b98ac44	`--- /dev/null`
		b98ac44	`+++ b/Misc/NEWS.d/next/Security/2019-03-06-09-38-40.bpo-36216.6q1m4a.rst`
		b98ac44	`@@ -0,0 +1,3 @@`
		b98ac44	`+Changes urlsplit() to raise ValueError when the URL contains characters that`
		b98ac44	`+decompose under IDNA encoding (NFKC-normalization) into characters that`
		b98ac44	`+affect how the URL is parsed.`
		ce3d003	`diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py`
		ce3d003	`index 0faf2bb..d0365ec 100644`
		ce3d003	`--- a/Lib/test/test_urlparse.py`
		ce3d003	`+++ b/Lib/test/test_urlparse.py`
		ce3d003	`@@ -1011,6 +1011,12 @@ class UrlParseTestCase(unittest.TestCase):`
		ce3d003	`self.assertIn('\u2100', denorm_chars)`
		ce3d003	`self.assertIn('\uFF03', denorm_chars)`
		ce3d003
		ce3d003	`+ # bpo-36742: Verify port separators are ignored when they`
		ce3d003	`+ # existed prior to decomposition`
		ce3d003	`+ urllib.parse.urlsplit('http://\u30d5\u309a:80')`
		ce3d003	`+ with self.assertRaises(ValueError):`
		ce3d003	`+ urllib.parse.urlsplit('http://\u30d5\u309a\ufe1380')`
		ce3d003	`+`
		ce3d003	`for scheme in ["http", "https", "ftp"]:`
		ce3d003	`for c in denorm_chars:`
		ce3d003	`url = "{}://netloc{}false.netloc/path".format(scheme, c)`
		ce3d003	`diff --git a/Lib/urllib/parse.py b/Lib/urllib/parse.py`
		ce3d003	`index 8b6c9b1..e2f7b69 100644`
		ce3d003	`--- a/Lib/urllib/parse.py`
		ce3d003	`+++ b/Lib/urllib/parse.py`
		ce3d003	`@@ -402,13 +402,16 @@ def _checknetloc(netloc):`
		ce3d003	`# looking for characters like \u2100 that expand to 'a/c'`
		ce3d003	`# IDNA uses NFKC equivalence, so normalize for this check`
		ce3d003	`import unicodedata`
		ce3d003	`- netloc2 = unicodedata.normalize('NFKC', netloc)`
		ce3d003	`- if netloc == netloc2:`
		ce3d003	`+ n = netloc.rpartition('@')[2] # ignore anything to the left of '@'`
		ce3d003	`+ n = n.replace(':', '') # ignore characters already included`
		ce3d003	`+ n = n.replace('#', '') # but not the surrounding text`
		ce3d003	`+ n = n.replace('?', '')`
		ce3d003	`+ netloc2 = unicodedata.normalize('NFKC', n)`
		ce3d003	`+ if n == netloc2:`
		ce3d003	`return`
		ce3d003	`- _, _, netloc = netloc.rpartition('@') # anything to the left of '@' is okay`
		ce3d003	`for c in '/?#@:':`
		ce3d003	`if c in netloc2:`
		ce3d003	`- raise ValueError("netloc '" + netloc2 + "' contains invalid " +`
		ce3d003	`+ raise ValueError("netloc '" + netloc + "' contains invalid " +`
		ce3d003	`"characters under NFKC normalization")`
		ce3d003
		ce3d003	`def urlsplit(url, scheme='', allow_fragments=True):`

rpms / python3

Source Code

Blame 00320-CVE-2019-9636.patch