diff options
author | Michael Orlitzky <mjo@gentoo.org> | 2018-03-24 12:12:23 -0400 |
---|---|---|
committer | Michael Orlitzky <mjo@gentoo.org> | 2018-03-24 12:12:23 -0400 |
commit | 300f46c1c52cc79238e88dba28241a6e78525966 (patch) | |
tree | f4c8ad2563adb5d29c7a87264dc94aaba439dc11 /dev-python | |
parent | dev-perl/Template-DBI: Mark stable (diff) | |
download | gentoo-300f46c1c52cc79238e88dba28241a6e78525966.tar.gz gentoo-300f46c1c52cc79238e88dba28241a6e78525966.tar.bz2 gentoo-300f46c1c52cc79238e88dba28241a6e78525966.zip |
dev-python/pyzor: fix the binary stdin patch to work with v1.0.0.
In my previous commit (adding -r1), I applied a new patch that I've
submitted upstream to address a unicode crash with python-3.x. That
patch applies cleanly against v1.0.0, but won't actually work: the
get_binary_stdin() function it uses exists only in upstream's git
master branch.
To make the patch work (and to fix some other small issues), I've
included the rest of the client changes between v1.0.0 and git
master. There are very few of them -- all python-3.x fixes -- so this
should not be too objectionable.
Bug: https://bugs.gentoo.org/643692
Package-Manager: Portage-2.3.24, Repoman-2.3.6
Diffstat (limited to 'dev-python')
-rw-r--r-- | dev-python/pyzor/files/read-stdin-as-binary-in-get_input_msg.patch | 94 |
1 files changed, 67 insertions, 27 deletions
diff --git a/dev-python/pyzor/files/read-stdin-as-binary-in-get_input_msg.patch b/dev-python/pyzor/files/read-stdin-as-binary-in-get_input_msg.patch index 81668e369377..03031a976690 100644 --- a/dev-python/pyzor/files/read-stdin-as-binary-in-get_input_msg.patch +++ b/dev-python/pyzor/files/read-stdin-as-binary-in-get_input_msg.patch @@ -1,45 +1,85 @@ -From 6332a429ed415187599ecce7d8a169ee19f0bbe5 Mon Sep 17 00:00:00 2001 +From 66225b32d2774cf37fa7f702f7eb26cd94094482 Mon Sep 17 00:00:00 2001 From: Michael Orlitzky <michael@orlitzky.com> -Date: Sun, 4 Mar 2018 17:34:33 -0500 -Subject: [PATCH 1/1] scripts/pyzor: read stdin as binary in _get_input_msg(). +Date: Sun, 4 Mar 2018 17:27:01 -0500 +Subject: [PATCH 1/1] scripts/pyzor: replace the client with the git (+ issue + 64 fix) version. -Reading stdin in python-3.x is done as text, with a best-guess -encoding. But this can go awry: for example, if an iso-8859-1 message -is passed in and if python guesses the "utf-8" encoding, then read() -will fail with a UnicodeDecodeError on non-ASCII characters. For -example, the "copyright" symbol is a single byte 0xa9 in iso-8859-1, -and the utf-8 decoder can't handle it: - - UnicodeDecodeError: 'utf-8' codec can't decode byte 0xa9... invalid - start byte - -Instead -- and as was done in python-2.x -- we can read stdin as -binary using the new get_binary_stdin() function. Afterwards, we use -email.message_from_bytes() instead of the email.message_from_file() -constructor to parse the byte data. The resulting function is able to -correctly parse these messages. - -Closes: https://github.com/SpamExperts/pyzor/issues/64 --- - scripts/pyzor | 5 ++++- - 1 file changed, 4 insertions(+), 1 deletion(-) + scripts/pyzor | 33 +++++++++++++++++++++++++++------ + 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/scripts/pyzor b/scripts/pyzor -index 567a7f9..1ba632f 100755 +index 19b1d21..86c6f7d 100755 --- a/scripts/pyzor +++ b/scripts/pyzor -@@ -171,7 +171,10 @@ def _get_input_digests(dummy): +@@ -17,9 +17,9 @@ import tempfile + import threading + + try: +- import ConfigParser +-except ImportError: + import configparser as ConfigParser ++except ImportError: ++ import ConfigParser + + import pyzor.digest + import pyzor.client +@@ -110,7 +110,7 @@ def load_configuration(): + config = ConfigParser.ConfigParser() + # Set the defaults. + config.add_section("client") +- for key, value in defaults.iteritems(): ++ for key, value in defaults.items(): + config.set("client", key, value) + # Override with the configuration. + config.read(os.path.join(options.homedir, "config")) +@@ -171,14 +171,35 @@ def _get_input_digests(dummy): def _get_input_msg(digester): - msg = email.message_from_file(sys.stdin) -+ # Read and process stdin as bytes because we don't know its -+ # encoding. Python-3.x will try to guess -- and can sometimes -+ # guess wrong -- leading to decoding errors in read(). + msg = email.message_from_bytes(get_binary_stdin().read()) digested = digester(msg).value yield digested + ++def _is_binary_reader(stream, default=False): ++ try: ++ return isinstance(stream.read(0), bytes) ++ except Exception: ++ return default ++ ++ ++def get_binary_stdin(): ++ # sys.stdin might or might not be binary in some extra cases. By ++ # default it's obviously non binary which is the core of the ++ # problem but the docs recommend changing it to binary for such ++ # cases so we need to deal with it. ++ is_binary = _is_binary_reader(sys.stdin, False) ++ if is_binary: ++ return sys.stdin ++ buf = getattr(sys.stdin, 'buffer', None) ++ if buf is not None and _is_binary_reader(buf, True): ++ return buf ++ raise RuntimeError('Did not manage to get binary stdin') ++ ++ + def _get_input_mbox(digester): + tfile = tempfile.NamedTemporaryFile() +- tfile.write(sys.stdin.read().encode("utf8")) ++ tfile.write(get_binary_stdin().read()) + tfile.seek(0) + mbox = mailbox.mbox(tfile.name) + for msg in mbox: +@@ -372,7 +393,7 @@ def genkey(client, servers, config, hash_func=hashlib.sha1): + return False + # pylint: disable-msg=W0612 + salt = "".join([chr(random.randint(0, 255)) +- for unused in xrange(hash_func(b"").digest_size)]) ++ for unused in range(hash_func(b"").digest_size)]) + if sys.version_info >= (3, 0): + salt = salt.encode("utf8") + salt_digest = hash_func(salt) -- 2.13.6 |