diff options
author | Sam James <sam@gentoo.org> | 2023-05-05 06:00:25 +0100 |
---|---|---|
committer | Sam James <sam@gentoo.org> | 2023-05-05 06:03:26 +0100 |
commit | ca1b8de7e3bc7ae01c0c46a98081def7e2612628 (patch) | |
tree | 3d741e140ed6aa5033f031bfa1e090283ec9f039 /app-backup/tarsnap/files | |
parent | app-backup/tarsnap: add 1.0.40 (diff) | |
download | gentoo-ca1b8de7e3bc7ae01c0c46a98081def7e2612628.tar.gz gentoo-ca1b8de7e3bc7ae01c0c46a98081def7e2612628.tar.bz2 gentoo-ca1b8de7e3bc7ae01c0c46a98081def7e2612628.zip |
app-backup/tarsnap: backport strict aliasing/UB fix
Doing this in a new revision to allow easy testing compared w/ 1.0.40.
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'app-backup/tarsnap/files')
-rw-r--r-- | app-backup/tarsnap/files/tarsnap-1.0.40-strict-aliasing-fix.patch | 158 |
1 files changed, 158 insertions, 0 deletions
diff --git a/app-backup/tarsnap/files/tarsnap-1.0.40-strict-aliasing-fix.patch b/app-backup/tarsnap/files/tarsnap-1.0.40-strict-aliasing-fix.patch new file mode 100644 index 000000000000..d168d99222e8 --- /dev/null +++ b/app-backup/tarsnap/files/tarsnap-1.0.40-strict-aliasing-fix.patch @@ -0,0 +1,158 @@ +https://github.com/Tarsnap/tarsnap/commit/ca40c06f290fb8298dc2e583303d45b58878f37b +https://github.com/Tarsnap/tarsnap/commit/4af6d8350ab53d0f1f3104ce3d9072c2d5f9ef7a + +From 4af6d8350ab53d0f1f3104ce3d9072c2d5f9ef7a Mon Sep 17 00:00:00 2001 +From: Graham Percival <gperciva@tarsnap.com> +Date: Fri, 1 Apr 2022 16:58:43 -0700 +Subject: [PATCH] scrypt: Fix strict aliasing + +The original scrypt code treated its data as blobs of bytes, accessing +them in whatever manner was convenient from time to time: + +* as 32-bit words or vectors thereof for the purpose of the Salsa20/8 core +* in machine-word-sized chunks (aka. size_t) for block copy and xor operations +* as 32-bit words for the Integerify function. + +This worked fine at the time, but newer compilers apply strict aliasing rules +which allow them to assume that e.g. data accessed as a uint32_t is not the +same as data accessed as a size_t, resulting in miscompilation. + +Note that in recent versions of scrypt (after 2015-07-18; versions 1.2.0 and +later) such miscompilation should be detected by the built-in runtime testing. + +To avoid aliasing problems, the generic scrypt code now operates on uint32_t +throughout while the SSE2-enabled scrypt code operates on __m128i throughout. + +Experimentally, we found that memcpy() speeds up blkcpy() in the plain C +case, but slowed it down in the _sse2.c case (probably because memcpy +can make use of vector instructions internally, but cannot assume that +it will always have a multiple of 16 bytes, as we do). +--- a/lib/crypto/crypto_scrypt_smix.c ++++ b/lib/crypto/crypto_scrypt_smix.c +@@ -27,39 +27,32 @@ + * online backup system. + */ + #include <stdint.h> ++#include <string.h> + + #include "sysendian.h" + + #include "crypto_scrypt_smix.h" + +-static void blkcpy(void *, const void *, size_t); +-static void blkxor(void *, const void *, size_t); ++static void blkcpy(uint32_t *, const uint32_t *, size_t); ++static void blkxor(uint32_t *, const uint32_t *, size_t); + static void salsa20_8(uint32_t[16]); + static void blockmix_salsa8(const uint32_t *, uint32_t *, uint32_t *, size_t); +-static uint64_t integerify(const void *, size_t); ++static uint64_t integerify(const uint32_t *, size_t); + + static void +-blkcpy(void * dest, const void * src, size_t len) ++blkcpy(uint32_t * dest, const uint32_t * src, size_t len) + { +- size_t * D = dest; +- const size_t * S = src; +- size_t L = len / sizeof(size_t); +- size_t i; + +- for (i = 0; i < L; i++) +- D[i] = S[i]; ++ memcpy(dest, src, len); + } + + static void +-blkxor(void * dest, const void * src, size_t len) ++blkxor(uint32_t * dest, const uint32_t * src, size_t len) + { +- size_t * D = dest; +- const size_t * S = src; +- size_t L = len / sizeof(size_t); + size_t i; + +- for (i = 0; i < L; i++) +- D[i] ^= S[i]; ++ for (i = 0; i < len / 4; i++) ++ dest[i] ^= src[i]; + } + + /** +@@ -145,9 +138,9 @@ blockmix_salsa8(const uint32_t * Bin, uint32_t * Bout, uint32_t * X, size_t r) + * Return the result of parsing B_{2r-1} as a little-endian integer. + */ + static uint64_t +-integerify(const void * B, size_t r) ++integerify(const uint32_t * B, size_t r) + { +- const uint32_t * X = (const void *)((uintptr_t)(B) + (2 * r - 1) * 64); ++ const uint32_t * X = B + (2 * r - 1) * 16; + + return (((uint64_t)(X[1]) << 32) + X[0]); + } +--- a/lib/crypto/crypto_scrypt_smix_sse2.c ++++ b/lib/crypto/crypto_scrypt_smix_sse2.c +@@ -36,34 +36,30 @@ + + #include "crypto_scrypt_smix_sse2.h" + +-static void blkcpy(void *, const void *, size_t); +-static void blkxor(void *, const void *, size_t); ++static void blkcpy(__m128i *, const __m128i *, size_t); ++static void blkxor(__m128i *, const __m128i *, size_t); + static void salsa20_8(__m128i[4]); + static void blockmix_salsa8(const __m128i *, __m128i *, __m128i *, size_t); +-static uint64_t integerify(const void *, size_t); ++static uint64_t integerify(const __m128i *, size_t); + + static void +-blkcpy(void * dest, const void * src, size_t len) ++blkcpy(__m128i * dest, const __m128i * src, size_t len) + { +- __m128i * D = dest; +- const __m128i * S = src; + size_t L = len / 16; + size_t i; + + for (i = 0; i < L; i++) +- D[i] = S[i]; ++ dest[i] = src[i]; + } + + static void +-blkxor(void * dest, const void * src, size_t len) ++blkxor(__m128i * dest, const __m128i * src, size_t len) + { +- __m128i * D = dest; +- const __m128i * S = src; + size_t L = len / 16; + size_t i; + + for (i = 0; i < L; i++) +- D[i] = _mm_xor_si128(D[i], S[i]); ++ dest[i] = _mm_xor_si128(dest[i], src[i]); + } + + /** +@@ -168,11 +164,18 @@ blockmix_salsa8(const __m128i * Bin, __m128i * Bout, __m128i * X, size_t r) + * Note that B's layout is permuted compared to the generic implementation. + */ + static uint64_t +-integerify(const void * B, size_t r) ++integerify(const __m128i * B, size_t r) + { +- const uint32_t * X = (const void *)((uintptr_t)(B) + (2 * r - 1) * 64); ++ const __m128i * X = B + (2*r - 1) * 4; ++ uint32_t X0, X13; + +- return (((uint64_t)(X[13]) << 32) + X[0]); ++ /* Get the first 32-bit element in X[0]. */ ++ X0 = (uint32_t)_mm_cvtsi128_si32(X[0]); ++ ++ /* Get the second 32-bit element in X[3]. */ ++ X13 = (uint32_t)_mm_cvtsi128_si32(_mm_srli_si128(X[3], 4)); ++ ++ return (((uint64_t)(X13) << 32) + X0); + } + + /** |