crypto: arm64/aes-ce - Simplify round key load sequence
authorArd Biesheuvel <ardb@kernel.org>
Mon, 15 Apr 2024 13:04:26 +0000 (15:04 +0200)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 26 Apr 2024 09:26:09 +0000 (17:26 +0800)
Tweak the round key logic so that they can be loaded using a single
branchless sequence using overlapping loads. This is shorter and
simpler, and puts the conditional branches based on the key size further
apart, which might benefit microarchitectures that cannot record taken
branches at every instruction. For these branches, use test-bit-branch
instructions that don't clobber the condition flags.

Note that none of this has any impact on performance, positive or
otherwise (and the branch prediction benefit would only benefit AES-192
which nobody uses). It does make for nicer code, though.

While at it, use \@ to generate the labels inside the macros, which is
more robust than using fixed numbers, which could clash inadvertently.
Also, bring aes-neon.S in line with these changes, including the switch
to test-and-branch instructions, to avoid surprises in the future when
we might start relying on the condition flags being preserved in the
chaining mode wrappers in aes-modes.S

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
arch/arm64/crypto/aes-ce.S
arch/arm64/crypto/aes-neon.S

index 1dc5bbbfeed238e91c949607f509a82da7ce77e5..b262eaa9170c357124cf44deab3ef512be9c1e8a 100644 (file)
        .endm
 
        /* preload all round keys */
-       .macro          load_round_keys, rounds, rk
-       cmp             \rounds, #12
-       blo             2222f           /* 128 bits */
-       beq             1111f           /* 192 bits */
-       ld1             {v17.4s-v18.4s}, [\rk], #32
-1111:  ld1             {v19.4s-v20.4s}, [\rk], #32
-2222:  ld1             {v21.4s-v24.4s}, [\rk], #64
-       ld1             {v25.4s-v28.4s}, [\rk], #64
-       ld1             {v29.4s-v31.4s}, [\rk]
+       .macro          load_round_keys, rk, nr, tmp
+       add             \tmp, \rk, \nr, sxtw #4
+       sub             \tmp, \tmp, #160
+       ld1             {v17.4s-v20.4s}, [\rk]
+       ld1             {v21.4s-v24.4s}, [\tmp], #64
+       ld1             {v25.4s-v28.4s}, [\tmp], #64
+       ld1             {v29.4s-v31.4s}, [\tmp]
        .endm
 
        /* prepare for encryption with key in rk[] */
        .macro          enc_prepare, rounds, rk, temp
-       mov             \temp, \rk
-       load_round_keys \rounds, \temp
+       load_round_keys \rk, \rounds, \temp
        .endm
 
        /* prepare for encryption (again) but with new key in rk[] */
        .macro          enc_switch_key, rounds, rk, temp
-       mov             \temp, \rk
-       load_round_keys \rounds, \temp
+       load_round_keys \rk, \rounds, \temp
        .endm
 
        /* prepare for decryption with key in rk[] */
        .macro          dec_prepare, rounds, rk, temp
-       mov             \temp, \rk
-       load_round_keys \rounds, \temp
+       load_round_keys \rk, \rounds, \temp
        .endm
 
        .macro          do_enc_Nx, de, mc, k, i0, i1, i2, i3, i4
 
        /* up to 5 interleaved blocks */
        .macro          do_block_Nx, enc, rounds, i0, i1, i2, i3, i4
-       cmp             \rounds, #12
-       blo             2222f           /* 128 bits */
-       beq             1111f           /* 192 bits */
+       tbz             \rounds, #2, .L\@       /* 128 bits */
        round_Nx        \enc, v17, \i0, \i1, \i2, \i3, \i4
        round_Nx        \enc, v18, \i0, \i1, \i2, \i3, \i4
-1111:  round_Nx        \enc, v19, \i0, \i1, \i2, \i3, \i4
+       tbz             \rounds, #1, .L\@       /* 192 bits */
+       round_Nx        \enc, v19, \i0, \i1, \i2, \i3, \i4
        round_Nx        \enc, v20, \i0, \i1, \i2, \i3, \i4
-2222:  .irp            key, v21, v22, v23, v24, v25, v26, v27, v28, v29
+.L\@:  .irp            key, v21, v22, v23, v24, v25, v26, v27, v28, v29
        round_Nx        \enc, \key, \i0, \i1, \i2, \i3, \i4
        .endr
        fin_round_Nx    \enc, v30, v31, \i0, \i1, \i2, \i3, \i4
index 9de7fbc797af7932693c5af9727cf17b0e1e91f9..3a8961b6ea517441c086e845db13ce788421d570 100644 (file)
        ld1             {v15.4s}, [\rk]
        add             \rkp, \rk, #16
        mov             \i, \rounds
-1111:  eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
+.La\@: eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
        movi            v15.16b, #0x40
        tbl             \in\().16b, {\in\().16b}, v13.16b       /* ShiftRows */
        sub_bytes       \in
-       subs            \i, \i, #1
+       sub             \i, \i, #1
        ld1             {v15.4s}, [\rkp], #16
-       beq             2222f
+       cbz             \i, .Lb\@
        mix_columns     \in, \enc
-       b               1111b
-2222:  eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
+       b               .La\@
+.Lb\@: eor             \in\().16b, \in\().16b, v15.16b         /* ^round key */
        .endm
 
        .macro          encrypt_block, in, rounds, rk, rkp, i
        ld1             {v15.4s}, [\rk]
        add             \rkp, \rk, #16
        mov             \i, \rounds
-1111:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
+.La\@: eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
        eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
        eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
        eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */
        tbl             \in2\().16b, {\in2\().16b}, v13.16b     /* ShiftRows */
        tbl             \in3\().16b, {\in3\().16b}, v13.16b     /* ShiftRows */
        sub_bytes_4x    \in0, \in1, \in2, \in3
-       subs            \i, \i, #1
+       sub             \i, \i, #1
        ld1             {v15.4s}, [\rkp], #16
-       beq             2222f
+       cbz             \i, .Lb\@
        mix_columns_2x  \in0, \in1, \enc
        mix_columns_2x  \in2, \in3, \enc
-       b               1111b
-2222:  eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
+       b               .La\@
+.Lb\@: eor             \in0\().16b, \in0\().16b, v15.16b       /* ^round key */
        eor             \in1\().16b, \in1\().16b, v15.16b       /* ^round key */
        eor             \in2\().16b, \in2\().16b, v15.16b       /* ^round key */
        eor             \in3\().16b, \in3\().16b, v15.16b       /* ^round key */