diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2015-09-06 21:44:45 +0300 |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2015-09-06 21:44:45 +0300 |
commit | 20a2c6482e28a2ca8d257ba646f2b8ead4837387 (patch) | |
tree | 92fc8ee593df66a6df5839130f1ab5be161aec46 /Lib/html | |
parent | Fix, refactor and extend tests for shutil.make_archive(). (diff) | |
parent | #23144: Make sure that HTMLParser.feed() returns all the data, even when conv... (diff) | |
download | cpython-20a2c6482e28a2ca8d257ba646f2b8ead4837387.tar.gz cpython-20a2c6482e28a2ca8d257ba646f2b8ead4837387.tar.bz2 cpython-20a2c6482e28a2ca8d257ba646f2b8ead4837387.zip |
#23144: merge with 3.4.
Diffstat (limited to 'Lib/html')
-rw-r--r-- | Lib/html/parser.py | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/Lib/html/parser.py b/Lib/html/parser.py index 390d4ccc488..43e6411b735 100644 --- a/Lib/html/parser.py +++ b/Lib/html/parser.py @@ -139,7 +139,15 @@ class HTMLParser(_markupbase.ParserBase): if self.convert_charrefs and not self.cdata_elem: j = rawdata.find('<', i) if j < 0: - if not end: + # if we can't find the next <, either we are at the end + # or there's more text incoming. If the latter is True, + # we can't pass the text to handle_data in case we have + # a charref cut in half at end. Try to determine if + # this is the case before proceding by looking for an + # & near the end and see if it's followed by a space or ;. + amppos = rawdata.rfind('&', max(i, n-34)) + if (amppos >= 0 and + not re.compile(r'[\s;]').search(rawdata, amppos)): break # wait till we get all the text j = n else: |