diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2011-10-28 13:21:09 +0300 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2011-10-28 13:21:09 +0300 |
commit | f50ffa94abe67c6ef5e615198af15f72e7cd2a9b (patch) | |
tree | d37cac5185ec2d8de23e854ff6dddd0bfb54cb49 /Lib/html | |
parent | Merge heads. (diff) | |
download | cpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.tar.gz cpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.tar.bz2 cpython-f50ffa94abe67c6ef5e615198af15f72e7cd2a9b.zip |
#13273: fix a bug that prevented HTMLParser to properly detect some tags when strict=False.
Diffstat (limited to 'Lib/html')
-rw-r--r-- | Lib/html/parser.py | 5 |
1 files changed, 2 insertions, 3 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index a6d5be94fa3..affaf7344fa 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -30,7 +30,7 @@ attrfind = re.compile( r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[^\s"\'=<>`]*))?') attrfind_tolerant = re.compile( - r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' + r',?\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*' r'(\'[^\']*\'|"[^"]*"|[^>\s]*))?') locatestarttagend = re.compile(r""" <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name @@ -277,12 +277,11 @@ class HTMLParser(_markupbase.ParserBase): assert match, 'unexpected call to parse_starttag()' k = match.end() self.lasttag = tag = rawdata[i+1:k].lower() - while k < endpos: if self.strict: m = attrfind.match(rawdata, k) else: - m = attrfind_tolerant.search(rawdata, k) + m = attrfind_tolerant.match(rawdata, k) if not m: break attrname, rest, attrvalue = m.group(1, 2, 3) |