diff options
Diffstat (limited to 'Lib/email/_header_value_parser.py')
-rw-r--r-- | Lib/email/_header_value_parser.py | 21 |
1 files changed, 21 insertions, 0 deletions
diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 34969ab5915..35d746aa508 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -96,6 +96,18 @@ EXTENDED_ATTRIBUTE_ENDS = ATTRIBUTE_ENDS - set('%') def quote_string(value): return '"'+str(value).replace('\\', '\\\\').replace('"', r'\"')+'"' +# Match a RFC 2047 word, looks like =?utf-8?q?someword?= +rfc2047_matcher = re.compile(r''' + =\? # literal =? + [^?]* # charset + \? # literal ? + [qQbB] # literal 'q' or 'b', case insensitive + \? # literal ? + .*? # encoded word + \?= # literal ?= +''', re.VERBOSE | re.MULTILINE) + + # # TokenList and its subclasses # @@ -1052,6 +1064,10 @@ def get_encoded_word(value): _validate_xtext(vtext) ew.append(vtext) text = ''.join(remainder) + # Encoded words should be followed by a WS + if value and value[0] not in WSP: + ew.defects.append(errors.InvalidHeaderDefect( + "missing trailing whitespace after encoded-word")) return ew, value def get_unstructured(value): @@ -1104,6 +1120,11 @@ def get_unstructured(value): unstructured.append(token) continue tok, *remainder = _wsp_splitter(value, 1) + # Split in the middle of an atom if there is a rfc2047 encoded word + # which does not have WSP on both sides. The defect will be registered + # the next time through the loop. + if rfc2047_matcher.search(tok): + tok, *remainder = value.partition('=?') vtext = ValueTerminal(tok, 'vtext') _validate_xtext(vtext) unstructured.append(vtext) |