04e327e
From 7e200e0763f5b71c199aaf98bd5588f291585619 Mon Sep 17 00:00:00 2001
04e327e
From: =?UTF-8?q?Miro=20Hron=C4=8Dok?= <miro@hroncok.cz>
04e327e
Date: Tue, 7 May 2019 17:28:47 +0200
04e327e
Subject: [PATCH] bpo-30458: Disallow control chars in http URLs. (GH-12755)
04e327e
 (GH-13154)
04e327e
MIME-Version: 1.0
04e327e
Content-Type: text/plain; charset=UTF-8
04e327e
Content-Transfer-Encoding: 8bit
04e327e
04e327e
Disallow control chars in http URLs in urllib.urlopen.  This addresses a potential security problem for applications that do not sanity check their URLs where http request headers could be injected.
04e327e
04e327e
Disable https related urllib tests on a build without ssl (GH-13032)
04e327e
These tests require an SSL enabled build. Skip these tests when python is built without SSL to fix test failures.
04e327e
04e327e
Use http.client.InvalidURL instead of ValueError as the new error case's exception. (GH-13044)
04e327e
04e327e
Backport Co-Authored-By: Miro HronĨok <miro@hroncok.cz>
04e327e
---
04e327e
 Lib/http/client.py                            | 15 ++++++
04e327e
 Lib/test/test_urllib.py                       | 53 +++++++++++++++++++
04e327e
 Lib/test/test_xmlrpc.py                       |  7 ++-
04e327e
 .../2019-04-10-08-53-30.bpo-30458.51E-DA.rst  |  1 +
04e327e
 4 files changed, 75 insertions(+), 1 deletion(-)
04e327e
 create mode 100644 Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
04e327e
04e327e
diff --git a/Lib/http/client.py b/Lib/http/client.py
04e327e
index 1de151c38e..2afd452fe3 100644
04e327e
--- a/Lib/http/client.py
04e327e
+++ b/Lib/http/client.py
04e327e
@@ -140,6 +140,16 @@ _MAXHEADERS = 100
04e327e
 _is_legal_header_name = re.compile(rb'[^:\s][^:\r\n]*').fullmatch
04e327e
 _is_illegal_header_value = re.compile(rb'\n(?![ \t])|\r(?![ \t\n])').search
04e327e
 
04e327e
+# These characters are not allowed within HTTP URL paths.
04e327e
+#  See https://tools.ietf.org/html/rfc3986#section-3.3 and the
04e327e
+#  https://tools.ietf.org/html/rfc3986#appendix-A pchar definition.
04e327e
+# Prevents CVE-2019-9740.  Includes control characters such as \r\n.
04e327e
+# We don't restrict chars above \x7f as putrequest() limits us to ASCII.
04e327e
+_contains_disallowed_url_pchar_re = re.compile('[\x00-\x20\x7f]')
04e327e
+# Arguably only these _should_ allowed:
04e327e
+#  _is_allowed_url_pchars_re = re.compile(r"^[/!$&'()*+,;=:@%a-zA-Z0-9._~-]+$")
04e327e
+# We are more lenient for assumed real world compatibility purposes.
04e327e
+
04e327e
 # We always set the Content-Length header for these methods because some
04e327e
 # servers will otherwise respond with a 411
04e327e
 _METHODS_EXPECTING_BODY = {'PATCH', 'POST', 'PUT'}
04e327e
@@ -1101,6 +1111,11 @@ class HTTPConnection:
04e327e
         self._method = method
04e327e
         if not url:
04e327e
             url = '/'
04e327e
+        # Prevent CVE-2019-9740.
04e327e
+        match = _contains_disallowed_url_pchar_re.search(url)
04e327e
+        if match:
04e327e
+            raise InvalidURL(f"URL can't contain control characters. {url!r} "
04e327e
+                             f"(found at least {match.group()!r})")
04e327e
         request = '%s %s %s' % (method, url, self._http_vsn_str)
04e327e
 
04e327e
         # Non-ASCII characters should have been eliminated earlier
04e327e
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
04e327e
index 2ac73b58d8..7214492eca 100644
04e327e
--- a/Lib/test/test_urllib.py
04e327e
+++ b/Lib/test/test_urllib.py
04e327e
@@ -329,6 +329,59 @@ class urlopen_HttpTests(unittest.TestCase, FakeHTTPMixin, FakeFTPMixin):
04e327e
         finally:
04e327e
             self.unfakehttp()
04e327e
 
04e327e
+    @unittest.skipUnless(ssl, "ssl module required")
04e327e
+    def test_url_with_control_char_rejected(self):
04e327e
+        for char_no in list(range(0, 0x21)) + [0x7f]:
04e327e
+            char = chr(char_no)
04e327e
+            schemeless_url = f"//localhost:7777/test{char}/"
04e327e
+            self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
04e327e
+            try:
04e327e
+                # We explicitly test urllib.request.urlopen() instead of the top
04e327e
+                # level 'def urlopen()' function defined in this... (quite ugly)
04e327e
+                # test suite.  They use different url opening codepaths.  Plain
04e327e
+                # urlopen uses FancyURLOpener which goes via a codepath that
04e327e
+                # calls urllib.parse.quote() on the URL which makes all of the
04e327e
+                # above attempts at injection within the url _path_ safe.
04e327e
+                escaped_char_repr = repr(char).replace('\\', r'\\')
04e327e
+                InvalidURL = http.client.InvalidURL
04e327e
+                with self.assertRaisesRegex(
04e327e
+                    InvalidURL, f"contain control.*{escaped_char_repr}"):
04e327e
+                    urllib.request.urlopen(f"http:{schemeless_url}")
04e327e
+                with self.assertRaisesRegex(
04e327e
+                    InvalidURL, f"contain control.*{escaped_char_repr}"):
04e327e
+                    urllib.request.urlopen(f"https:{schemeless_url}")
04e327e
+                # This code path quotes the URL so there is no injection.
04e327e
+                resp = urlopen(f"http:{schemeless_url}")
04e327e
+                self.assertNotIn(char, resp.geturl())
04e327e
+            finally:
04e327e
+                self.unfakehttp()
04e327e
+
04e327e
+    @unittest.skipUnless(ssl, "ssl module required")
04e327e
+    def test_url_with_newline_header_injection_rejected(self):
04e327e
+        self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
04e327e
+        host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
04e327e
+        schemeless_url = "//" + host + ":8080/test/?test=a"
04e327e
+        try:
04e327e
+            # We explicitly test urllib.request.urlopen() instead of the top
04e327e
+            # level 'def urlopen()' function defined in this... (quite ugly)
04e327e
+            # test suite.  They use different url opening codepaths.  Plain
04e327e
+            # urlopen uses FancyURLOpener which goes via a codepath that
04e327e
+            # calls urllib.parse.quote() on the URL which makes all of the
04e327e
+            # above attempts at injection within the url _path_ safe.
04e327e
+            InvalidURL = http.client.InvalidURL
04e327e
+            with self.assertRaisesRegex(
04e327e
+                InvalidURL, r"contain control.*\\r.*(found at least . .)"):
04e327e
+                urllib.request.urlopen(f"http:{schemeless_url}")
04e327e
+            with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
04e327e
+                urllib.request.urlopen(f"https:{schemeless_url}")
04e327e
+            # This code path quotes the URL so there is no injection.
04e327e
+            resp = urlopen(f"http:{schemeless_url}")
04e327e
+            self.assertNotIn(' ', resp.geturl())
04e327e
+            self.assertNotIn('\r', resp.geturl())
04e327e
+            self.assertNotIn('\n', resp.geturl())
04e327e
+        finally:
04e327e
+            self.unfakehttp()
04e327e
+
04e327e
     def test_read_0_9(self):
04e327e
         # "0.9" response accepted (but not "simple responses" without
04e327e
         # a status line)
04e327e
diff --git a/Lib/test/test_xmlrpc.py b/Lib/test/test_xmlrpc.py
04e327e
index 32263f7f0b..0e002ec4ef 100644
04e327e
--- a/Lib/test/test_xmlrpc.py
04e327e
+++ b/Lib/test/test_xmlrpc.py
04e327e
@@ -945,7 +945,12 @@ class SimpleServerTestCase(BaseServerTestCase):
04e327e
     def test_partial_post(self):
04e327e
         # Check that a partial POST doesn't make the server loop: issue #14001.
04e327e
         conn = http.client.HTTPConnection(ADDR, PORT)
04e327e
-        conn.request('POST', '/RPC2 HTTP/1.0\r\nContent-Length: 100\r\n\r\nbye')
04e327e
+        conn.send('POST /RPC2 HTTP/1.0\r\n'
04e327e
+                  'Content-Length: 100\r\n\r\n'
04e327e
+                  'bye HTTP/1.1\r\n'
04e327e
+                  f'Host: {ADDR}:{PORT}\r\n'
04e327e
+                  'Accept-Encoding: identity\r\n'
04e327e
+                  'Content-Length: 0\r\n\r\n'.encode('ascii'))
04e327e
         conn.close()
04e327e
 
04e327e
     def test_context_manager(self):
04e327e
diff --git a/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
04e327e
new file mode 100644
04e327e
index 0000000000..ed8027fb4d
04e327e
--- /dev/null
04e327e
+++ b/Misc/NEWS.d/next/Security/2019-04-10-08-53-30.bpo-30458.51E-DA.rst
04e327e
@@ -0,0 +1 @@
04e327e
+Address CVE-2019-9740 by disallowing URL paths with embedded whitespace or control characters through into the underlying http client request.  Such potentially malicious header injection URLs now cause an http.client.InvalidURL exception to be raised.
04e327e
-- 
04e327e
2.21.0
04e327e