diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index ba416e7fa6e3fe..08651af9dbe132 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -29,7 +29,8 @@
starttagopen = re.compile('<[a-zA-Z]')
endtagopen = re.compile('[a-zA-Z]')
piclose = re.compile('>')
-commentclose = re.compile(r'--\s*>')
+commentclose = re.compile(r'--!?>')
+commentabruptclose = re.compile(r'-?>')
# Note:
# 1) if you change tagfind/attrfind remember to update locatestarttagend too;
# 2) if you change tagfind/attrfind and/or locatestarttagend the parser will
@@ -309,6 +310,21 @@ def parse_html_declaration(self, i):
else:
return self.parse_bogus_comment(i)
+ # Internal -- parse comment, return length or -1 if not terminated
+ # see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
+ def parse_comment(self, i, report=True):
+ rawdata = self.rawdata
+ assert rawdata.startswith('"
''
''
+ ''
''
+ # abrupt-closing-of-empty-comment
+ ''
+ ''
''
''
- '')
+ ''
+ ''
+ ''
+ ''
+ ''
+ # nested-comment
+ ' -->'
+ ''
+ ''
+ )
expected = [('comment', " I'm a valid comment "),
('comment', 'me too!'),
('comment', '--'),
+ ('comment', '-'),
+ ('comment', ''),
+ ('comment', ''),
('comment', ''),
('comment', '--I have many hyphens--'),
('comment', ' I have a > in the middle '),
- ('comment', ' and I have -- in the middle! ')]
+ ('comment', ' and I have -- in the middle! '),
+ ('comment', 'incorrectly-closed-comment'),
+ ('comment', ''),
+ ('comment', '--!'),
+ ('comment', '-- >'),
+ ('comment', ' '),
+ ('comment', '`` now
+ends the comment. ``-- >`` no longer ends the comment. Support abnormally
+ended empty comments ``<-->`` and ``<--->``.