Skip to content

Commit a3f9f40

Browse files
rickpriceicanhasmath
authored andcommitted
CVE-2023-27043 The email module of Python through 3.11.3 incorrectly parses e-mail addresses that contain a special character
1 parent ef6788f commit a3f9f40

File tree

5 files changed

+374
-21
lines changed

5 files changed

+374
-21
lines changed

Doc/library/email.utils.rst

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,18 @@ of the new API.
6060
begins with angle brackets, they are stripped off.
6161

6262

63-
.. function:: parseaddr(address)
63+
.. function:: parseaddr(address, *, strict=True)
6464

6565
Parse address -- which should be the value of some address-containing field such
6666
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
6767
*email address* parts. Returns a tuple of that information, unless the parse
6868
fails, in which case a 2-tuple of ``('', '')`` is returned.
6969

70+
If *strict* is true, use a strict parser which rejects malformed inputs.
71+
72+
.. versionchanged:: 3.8.20
73+
Add *strict* optional parameter and reject malformed inputs by default.
74+
7075

7176
.. function:: formataddr(pair, charset='utf-8')
7277

@@ -84,12 +89,15 @@ of the new API.
8489
Added the *charset* option.
8590

8691

87-
.. function:: getaddresses(fieldvalues)
92+
.. function:: getaddresses(fieldvalues, *, strict=True)
8893

8994
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
9095
*fieldvalues* is a sequence of header field values as might be returned by
91-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
92-
example that gets all the recipients of a message::
96+
:meth:`Message.get_all <email.message.Message.get_all>`.
97+
98+
If *strict* is true, use a strict parser which rejects malformed inputs.
99+
100+
Here's a simple example that gets all the recipients of a message::
93101

94102
from email.utils import getaddresses
95103

@@ -99,6 +107,9 @@ of the new API.
99107
resent_ccs = msg.get_all('resent-cc', [])
100108
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
101109

110+
.. versionchanged:: 3.8.20
111+
Add *strict* optional parameter and reject malformed inputs by default.
112+
102113

103114
.. function:: parsedate(date)
104115

Doc/whatsnew/3.7.rst

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2650,6 +2650,19 @@ post-handshake TLS encrypted data. Security issue reported as
26502650
<https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2023-40217>`_ by Aapo
26512651
Oksman. Patch by Gregory P. Smith.
26522652

2653+
Notable security feature in 3.7.17.7
2654+
==================================
2655+
2656+
* :func:`email.utils.getaddresses` and :func:`email.utils.parseaddr` now return
2657+
``('', '')`` 2-tuples in more situations where invalid email addresses are
2658+
encountered, instead of potentially inaccurate values.
2659+
An optional *strict* parameter was added to these two functions:
2660+
use ``strict=False`` to get the old behavior, accepting malformed inputs.
2661+
``getattr(email.utils, 'supports_strict_parsing', False)`` can be used to
2662+
check if the *strict* paramater is available.
2663+
(Contributed by Thomas Dwyer and Victor Stinner for :gh:`102988` to improve
2664+
the CVE-2023-27043 fix.)
2665+
26532666
Notable changes in 3.7.17.4
26542667
=========================
26552668

Lib/email/utils.py

Lines changed: 142 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
specialsre = re.compile(r'[][\\()<>@,:;".]')
4949
escapesre = re.compile(r'[\\"]')
5050

51+
5152
def _has_surrogates(s):
5253
"""Return True if s contains surrogate-escaped binary data."""
5354
# This check is based on the fact that unless there are surrogates, utf8
@@ -106,12 +107,127 @@ def formataddr(pair, charset='utf-8'):
106107
return address
107108

108109

110+
def _iter_escaped_chars(addr):
111+
pos = 0
112+
escape = False
113+
for pos, ch in enumerate(addr):
114+
if escape:
115+
yield (pos, '\\' + ch)
116+
escape = False
117+
elif ch == '\\':
118+
escape = True
119+
else:
120+
yield (pos, ch)
121+
if escape:
122+
yield (pos, '\\')
123+
124+
125+
def _strip_quoted_realnames(addr):
126+
"""Strip real names between quotes."""
127+
if '"' not in addr:
128+
# Fast path
129+
return addr
130+
131+
start = 0
132+
open_pos = None
133+
result = []
134+
for pos, ch in _iter_escaped_chars(addr):
135+
if ch == '"':
136+
if open_pos is None:
137+
open_pos = pos
138+
else:
139+
if start != open_pos:
140+
result.append(addr[start:open_pos])
141+
start = pos + 1
142+
open_pos = None
143+
144+
if start < len(addr):
145+
result.append(addr[start:])
146+
147+
return ''.join(result)
109148

110-
def getaddresses(fieldvalues):
111-
"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
112-
all = COMMASPACE.join(fieldvalues)
113-
a = _AddressList(all)
114-
return a.addresslist
149+
150+
supports_strict_parsing = True
151+
152+
def getaddresses(fieldvalues, *, strict=True):
153+
"""Return a list of (REALNAME, EMAIL) or ('','') for each fieldvalue.
154+
155+
When parsing fails for a fieldvalue, a 2-tuple of ('', '') is returned in
156+
its place.
157+
158+
If strict is true, use a strict parser which rejects malformed inputs.
159+
"""
160+
161+
# If strict is true, if the resulting list of parsed addresses is greater
162+
# than the number of fieldvalues in the input list, a parsing error has
163+
# occurred and consequently a list containing a single empty 2-tuple [('',
164+
# '')] is returned in its place. This is done to avoid invalid output.
165+
#
166+
# Malformed input: getaddresses(['[email protected] <[email protected]>'])
167+
# Invalid output: [('', '[email protected]'), ('', '[email protected]')]
168+
# Safe output: [('', '')]
169+
170+
if not strict:
171+
all = COMMASPACE.join(str(v) for v in fieldvalues)
172+
a = _AddressList(all)
173+
return a.addresslist
174+
175+
fieldvalues = [str(v) for v in fieldvalues]
176+
fieldvalues = _pre_parse_validation(fieldvalues)
177+
addr = COMMASPACE.join(fieldvalues)
178+
a = _AddressList(addr)
179+
result = _post_parse_validation(a.addresslist)
180+
181+
# Treat output as invalid if the number of addresses is not equal to the
182+
# expected number of addresses.
183+
n = 0
184+
for v in fieldvalues:
185+
# When a comma is used in the Real Name part it is not a deliminator.
186+
# So strip those out before counting the commas.
187+
v = _strip_quoted_realnames(v)
188+
# Expected number of addresses: 1 + number of commas
189+
n += 1 + v.count(',')
190+
if len(result) != n:
191+
return [('', '')]
192+
193+
return result
194+
195+
196+
def _check_parenthesis(addr):
197+
# Ignore parenthesis in quoted real names.
198+
addr = _strip_quoted_realnames(addr)
199+
200+
opens = 0
201+
for pos, ch in _iter_escaped_chars(addr):
202+
if ch == '(':
203+
opens += 1
204+
elif ch == ')':
205+
opens -= 1
206+
if opens < 0:
207+
return False
208+
return (opens == 0)
209+
210+
211+
def _pre_parse_validation(email_header_fields):
212+
accepted_values = []
213+
for v in email_header_fields:
214+
if not _check_parenthesis(v):
215+
v = "('', '')"
216+
accepted_values.append(v)
217+
218+
return accepted_values
219+
220+
221+
def _post_parse_validation(parsed_email_header_tuples):
222+
accepted_values = []
223+
# The parser would have parsed a correctly formatted domain-literal
224+
# The existence of an [ after parsing indicates a parsing failure
225+
for v in parsed_email_header_tuples:
226+
if '[' in v[1]:
227+
v = ('', '')
228+
accepted_values.append(v)
229+
230+
return accepted_values
115231

116232

117233
def _format_timetuple_and_zone(timetuple, zone):
@@ -202,16 +318,33 @@ def parsedate_to_datetime(data):
202318
tzinfo=datetime.timezone(datetime.timedelta(seconds=tz)))
203319

204320

205-
def parseaddr(addr):
321+
def parseaddr(addr, *, strict=True):
206322
"""
207323
Parse addr into its constituent realname and email address parts.
208324
209325
Return a tuple of realname and email address, unless the parse fails, in
210326
which case return a 2-tuple of ('', '').
327+
328+
If strict is True, use a strict parser which rejects malformed inputs.
211329
"""
212-
addrs = _AddressList(addr).addresslist
213-
if not addrs:
214-
return '', ''
330+
if not strict:
331+
addrs = _AddressList(addr).addresslist
332+
if not addrs:
333+
return ('', '')
334+
return addrs[0]
335+
336+
if isinstance(addr, list):
337+
addr = addr[0]
338+
339+
if not isinstance(addr, str):
340+
return ('', '')
341+
342+
addr = _pre_parse_validation([addr])[0]
343+
addrs = _post_parse_validation(_AddressList(addr).addresslist)
344+
345+
if not addrs or len(addrs) > 1:
346+
return ('', '')
347+
215348
return addrs[0]
216349

217350

0 commit comments

Comments
 (0)