From 83fc70159b24f5b11a5ef87c9b05c2cf4c7faeba Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 14 Mar 2020 15:35:52 -0700 Subject: [PATCH] bpo-38576: Disallow control characters in hostnames in http.client (GH-18995) (GH-19002) Add host validation for control characters for more CVE-2019-18348 protection. (cherry picked from commit 9165addc22d05e776a54319a8531ebd0b2fe01ef) Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com> --- Lib/http/client.py | 10 ++++++ Lib/test/test_httplib.py | 13 ++++++- Lib/test/test_urllib.py | 36 +++++++++++++++++-- .../2020-03-14-14-57-44.bpo-38576.OowwQn.rst | 1 + 4 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst diff --git a/Lib/http/client.py b/Lib/http/client.py index d4821f1a96e07..c0ac7db6f40a0 100644 --- a/Lib/http/client.py +++ b/Lib/http/client.py @@ -858,6 +858,8 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, (self.host, self.port) = self._get_hostport(host, port) + self._validate_host(self.host) + # This is stored as an instance variable to allow unit # tests to replace it with a suitable mockup self._create_connection = socket.create_connection @@ -1215,6 +1217,14 @@ def _validate_path(self, url): raise InvalidURL(f"URL can't contain control characters. {url!r} " f"(found at least {match.group()!r})") + def _validate_host(self, host): + """Validate a host so it doesn't contain control characters.""" + # Prevent CVE-2019-18348. + match = _contains_disallowed_url_pchar_re.search(host) + if match: + raise InvalidURL(f"URL can't contain control characters. {host!r} " + f"(found at least {match.group()!r})") + def putheader(self, header, *values): """Send a request header line to the server. diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py index 14d42d483773c..fcd9231666ede 100644 --- a/Lib/test/test_httplib.py +++ b/Lib/test/test_httplib.py @@ -1132,7 +1132,7 @@ def run_server(): thread.join() self.assertEqual(result, b"proxied data\n") - def test_putrequest_override_validation(self): + def test_putrequest_override_domain_validation(self): """ It should be possible to override the default validation behavior in putrequest (bpo-38216). @@ -1145,6 +1145,17 @@ def _validate_path(self, url): conn.sock = FakeSocket('') conn.putrequest('GET', '/\x00') + def test_putrequest_override_host_validation(self): + class UnsafeHTTPConnection(client.HTTPConnection): + def _validate_host(self, url): + pass + + conn = UnsafeHTTPConnection('example.com\r\n') + conn.sock = FakeSocket('') + # set skip_host so a ValueError is not raised upon adding the + # invalid URL as the value of the "Host:" header + conn.putrequest('GET', '/', skip_host=1) + def test_putrequest_override_encoding(self): """ It should be possible to override the default encoding diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py index 0061a5297cb35..ddf425fd8d4b5 100644 --- a/Lib/test/test_urllib.py +++ b/Lib/test/test_urllib.py @@ -331,7 +331,7 @@ def test_willclose(self): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_control_char_rejected(self): + def test_url_path_with_control_char_rejected(self): for char_no in list(range(0, 0x21)) + [0x7f]: char = chr(char_no) schemeless_url = f"//localhost:7777/test{char}/" @@ -358,7 +358,7 @@ def test_url_with_control_char_rejected(self): self.unfakehttp() @unittest.skipUnless(ssl, "ssl module required") - def test_url_with_newline_header_injection_rejected(self): + def test_url_path_with_newline_header_injection_rejected(self): self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123" schemeless_url = "//" + host + ":8080/test/?test=a" @@ -383,6 +383,38 @@ def test_url_with_newline_header_injection_rejected(self): finally: self.unfakehttp() + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_control_char_rejected(self): + for char_no in list(range(0, 0x21)) + [0x7f]: + char = chr(char_no) + schemeless_url = f"//localhost{char}/test/" + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + try: + escaped_char_repr = repr(char).replace('\\', r'\\') + InvalidURL = http.client.InvalidURL + with self.assertRaisesRegex( + InvalidURL, f"contain control.*{escaped_char_repr}"): + urlopen(f"http:{schemeless_url}") + with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"): + urlopen(f"https:{schemeless_url}") + finally: + self.unfakehttp() + + @unittest.skipUnless(ssl, "ssl module required") + def test_url_host_with_newline_header_injection_rejected(self): + self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.") + host = "localhost\r\nX-injected: header\r\n" + schemeless_url = "//" + host + ":8080/test/?test=a" + try: + InvalidURL = http.client.InvalidURL + with self.assertRaisesRegex( + InvalidURL, r"contain control.*\\r"): + urlopen(f"http:{schemeless_url}") + with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"): + urlopen(f"https:{schemeless_url}") + finally: + self.unfakehttp() + def test_read_0_9(self): # "0.9" response accepted (but not "simple responses" without # a status line) diff --git a/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst new file mode 100644 index 0000000000000..34b8af28988fa --- /dev/null +++ b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst @@ -0,0 +1 @@ +Disallow control characters in hostnames in http.client, addressing CVE-2019-18348. Such potentially malicious header injection URLs now cause a InvalidURL to be raised. \ No newline at end of file