1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
From 83fc70159b24f5b11a5ef87c9b05c2cf4c7faeba Mon Sep 17 00:00:00 2001
From: "Miss Islington (bot)"
<31488909+miss-islington@users.noreply.github.com>
Date: Sat, 14 Mar 2020 15:35:52 -0700
Subject: [PATCH] bpo-38576: Disallow control characters in hostnames in
http.client (GH-18995) (GH-19002)
Add host validation for control characters for more CVE-2019-18348 protection.
(cherry picked from commit 9165addc22d05e776a54319a8531ebd0b2fe01ef)
Co-authored-by: Ashwin Ramaswami <aramaswamis@gmail.com>
---
Lib/http/client.py | 10 ++++++
Lib/test/test_httplib.py | 13 ++++++-
Lib/test/test_urllib.py | 36 +++++++++++++++++--
.../2020-03-14-14-57-44.bpo-38576.OowwQn.rst | 1 +
4 files changed, 57 insertions(+), 3 deletions(-)
create mode 100644 Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
diff --git a/Lib/http/client.py b/Lib/http/client.py
index d4821f1a96e07..c0ac7db6f40a0 100644
--- a/Lib/http/client.py
+++ b/Lib/http/client.py
@@ -858,6 +858,8 @@ def __init__(self, host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
(self.host, self.port) = self._get_hostport(host, port)
+ self._validate_host(self.host)
+
# This is stored as an instance variable to allow unit
# tests to replace it with a suitable mockup
self._create_connection = socket.create_connection
@@ -1215,6 +1217,14 @@ def _validate_path(self, url):
raise InvalidURL(f"URL can't contain control characters. {url!r} "
f"(found at least {match.group()!r})")
+ def _validate_host(self, host):
+ """Validate a host so it doesn't contain control characters."""
+ # Prevent CVE-2019-18348.
+ match = _contains_disallowed_url_pchar_re.search(host)
+ if match:
+ raise InvalidURL(f"URL can't contain control characters. {host!r} "
+ f"(found at least {match.group()!r})")
+
def putheader(self, header, *values):
"""Send a request header line to the server.
diff --git a/Lib/test/test_httplib.py b/Lib/test/test_httplib.py
index 14d42d483773c..fcd9231666ede 100644
--- a/Lib/test/test_httplib.py
+++ b/Lib/test/test_httplib.py
@@ -1132,7 +1132,7 @@ def run_server():
thread.join()
self.assertEqual(result, b"proxied data\n")
- def test_putrequest_override_validation(self):
+ def test_putrequest_override_domain_validation(self):
"""
It should be possible to override the default validation
behavior in putrequest (bpo-38216).
@@ -1145,6 +1145,17 @@ def _validate_path(self, url):
conn.sock = FakeSocket('')
conn.putrequest('GET', '/\x00')
+ def test_putrequest_override_host_validation(self):
+ class UnsafeHTTPConnection(client.HTTPConnection):
+ def _validate_host(self, url):
+ pass
+
+ conn = UnsafeHTTPConnection('example.com\r\n')
+ conn.sock = FakeSocket('')
+ # set skip_host so a ValueError is not raised upon adding the
+ # invalid URL as the value of the "Host:" header
+ conn.putrequest('GET', '/', skip_host=1)
+
def test_putrequest_override_encoding(self):
"""
It should be possible to override the default encoding
diff --git a/Lib/test/test_urllib.py b/Lib/test/test_urllib.py
index 0061a5297cb35..ddf425fd8d4b5 100644
--- a/Lib/test/test_urllib.py
+++ b/Lib/test/test_urllib.py
@@ -331,7 +331,7 @@ def test_willclose(self):
self.unfakehttp()
@unittest.skipUnless(ssl, "ssl module required")
- def test_url_with_control_char_rejected(self):
+ def test_url_path_with_control_char_rejected(self):
for char_no in list(range(0, 0x21)) + [0x7f]:
char = chr(char_no)
schemeless_url = f"//localhost:7777/test{char}/"
@@ -358,7 +358,7 @@ def test_url_with_control_char_rejected(self):
self.unfakehttp()
@unittest.skipUnless(ssl, "ssl module required")
- def test_url_with_newline_header_injection_rejected(self):
+ def test_url_path_with_newline_header_injection_rejected(self):
self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
host = "localhost:7777?a=1 HTTP/1.1\r\nX-injected: header\r\nTEST: 123"
schemeless_url = "//" + host + ":8080/test/?test=a"
@@ -383,6 +383,38 @@ def test_url_with_newline_header_injection_rejected(self):
finally:
self.unfakehttp()
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_host_with_control_char_rejected(self):
+ for char_no in list(range(0, 0x21)) + [0x7f]:
+ char = chr(char_no)
+ schemeless_url = f"//localhost{char}/test/"
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ try:
+ escaped_char_repr = repr(char).replace('\\', r'\\')
+ InvalidURL = http.client.InvalidURL
+ with self.assertRaisesRegex(
+ InvalidURL, f"contain control.*{escaped_char_repr}"):
+ urlopen(f"http:{schemeless_url}")
+ with self.assertRaisesRegex(InvalidURL, f"contain control.*{escaped_char_repr}"):
+ urlopen(f"https:{schemeless_url}")
+ finally:
+ self.unfakehttp()
+
+ @unittest.skipUnless(ssl, "ssl module required")
+ def test_url_host_with_newline_header_injection_rejected(self):
+ self.fakehttp(b"HTTP/1.1 200 OK\r\n\r\nHello.")
+ host = "localhost\r\nX-injected: header\r\n"
+ schemeless_url = "//" + host + ":8080/test/?test=a"
+ try:
+ InvalidURL = http.client.InvalidURL
+ with self.assertRaisesRegex(
+ InvalidURL, r"contain control.*\\r"):
+ urlopen(f"http:{schemeless_url}")
+ with self.assertRaisesRegex(InvalidURL, r"contain control.*\\n"):
+ urlopen(f"https:{schemeless_url}")
+ finally:
+ self.unfakehttp()
+
def test_read_0_9(self):
# "0.9" response accepted (but not "simple responses" without
# a status line)
diff --git a/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
new file mode 100644
index 0000000000000..34b8af28988fa
--- /dev/null
+++ b/Misc/NEWS.d/next/Security/2020-03-14-14-57-44.bpo-38576.OowwQn.rst
@@ -0,0 +1 @@
+Disallow control characters in hostnames in http.client, addressing CVE-2019-18348. Such potentially malicious header injection URLs now cause a InvalidURL to be raised.
\ No newline at end of file
|