aboutsummaryrefslogtreecommitdiff
path: root/Lib/html
diff options
context:
space:
mode:
authorEzio Melotti <ezio.melotti@gmail.com>2011-10-28 13:21:09 +0300
committerEzio Melotti <ezio.melotti@gmail.com>2011-10-28 13:21:09 +0300
commitf50ffa94abe67c6ef5e615198af15f72e7cd2a9b (patch)
treed37cac5185ec2d8de23e854ff6dddd0bfb54cb49 /Lib/html
parentMerge heads. (diff)
downloadcpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.tar.gz
cpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.tar.bz2
cpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.zip
#13273: fix a bug that prevented HTMLParser to properly detect some tags when strict=False.
Diffstat (limited to 'Lib/html')
-rw-r--r--Lib/html/parser.py5
1 files changed, 2 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py
index a6d5be94fa3..affaf7344fa 100644
--- a/Lib/html/parser.py
+++ b/Lib/html/parser.py
@@ -30,7 +30,7 @@ attrfind = re.compile(
r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?')
attrfind_tolerant = re.compile(
- r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
+ r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?')
locatestarttagend = re.compile(r"""
<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
@@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase):
assert match, 'unexpected call to parse_starttag()'
k = match.end()
self.lasttag = tag = rawdata[i+1:k].lower()
-
while k < endpos:
if self.strict:
m = attrfind.match(rawdata, k)
else:
- m = attrfind_tolerant.search(rawdata, k)
+ m = attrfind_tolerant.match(rawdata, k)
if not m:
break
attrname, rest, attrvalue = m.group(1, 2, 3)