crypto: arm/blake2s - fix for big endian
authorEric Biggers <ebiggers@google.com>
Wed, 10 Mar 2021 07:27:26 +0000 (23:27 -0800)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 19 Mar 2021 10:59:45 +0000 (21:59 +1100)
The new ARM BLAKE2s code doesn't work correctly (fails the self-tests)
in big endian kernel builds because it doesn't swap the endianness of
the message words when loading them.  Fix this.

Fixes: 5172d322d34c ("crypto: arm/blake2s - add ARM scalar optimized BLAKE2s")
Signed-off-by: Eric Biggers <ebiggers@google.com>
Acked-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm/crypto/blake2s-core.S

index bed897e9a181a1ae0c7104fdd7b73e34e4f74975..86345751bbf3a3d8a7e3af72684985833cf6734b 100644 (file)
@@ -8,6 +8,7 @@
  */
 
 #include <linux/linkage.h>
+#include <asm/assembler.h>
 
        // Registers used to hold message words temporarily.  There aren't
        // enough ARM registers to hold the whole message block, so we have to
 #endif
 .endm
 
+.macro _le32_bswap     a, tmp
+#ifdef __ARMEB__
+       rev_l           \a, \tmp
+#endif
+.endm
+
+.macro _le32_bswap_8x  a, b, c, d, e, f, g, h,  tmp
+       _le32_bswap     \a, \tmp
+       _le32_bswap     \b, \tmp
+       _le32_bswap     \c, \tmp
+       _le32_bswap     \d, \tmp
+       _le32_bswap     \e, \tmp
+       _le32_bswap     \f, \tmp
+       _le32_bswap     \g, \tmp
+       _le32_bswap     \h, \tmp
+.endm
+
 // Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals.
 // (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two
 // columns/diagonals.  s0-s1 are the word offsets to the message words the first
@@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch)
        tst             r1, #3
        bne             .Lcopy_block_misaligned
        ldmia           r1!, {r2-r9}
+       _le32_bswap_8x  r2, r3, r4, r5, r6, r7, r8, r9,  r14
        stmia           r12!, {r2-r9}
        ldmia           r1!, {r2-r9}
+       _le32_bswap_8x  r2, r3, r4, r5, r6, r7, r8, r9,  r14
        stmia           r12, {r2-r9}
 .Lcopy_block_done:
        str             r1, [sp, #68]           // Update message pointer
@@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch)
 1:
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
        ldr             r3, [r1], #4
+       _le32_bswap     r3, r4
 #else
        ldrb            r3, [r1, #0]
        ldrb            r4, [r1, #1]