lib/xor: make xor prototypes more friendly to compiler vectorization
authorArd Biesheuvel <ardb@kernel.org>
Sat, 5 Feb 2022 15:23:45 +0000 (16:23 +0100)
committerHerbert Xu <herbert@gondor.apana.org.au>
Fri, 11 Feb 2022 09:39:39 +0000 (20:39 +1100)
Modern compilers are perfectly capable of extracting parallelism from
the XOR routines, provided that the prototypes reflect the nature of the
input accurately, in particular, the fact that the input vectors are
expected not to overlap. This is not documented explicitly, but is
implied by the interchangeability of the various C routines, some of
which use temporary variables while others don't: this means that these
routines only behave identically for non-overlapping inputs.

So let's decorate these input vectors with the __restrict modifier,
which informs the compiler that there is no overlap. While at it, make
the input-only vectors pointer-to-const as well.

Tested-by: Nathan Chancellor <nathan@kernel.org>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Link: https://github.com/ClangBuiltLinux/linux/issues/563
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
17 files changed:
arch/alpha/include/asm/xor.h
arch/arm/include/asm/xor.h
arch/arm64/include/asm/xor.h
arch/arm64/lib/xor-neon.c
arch/ia64/include/asm/xor.h
arch/powerpc/include/asm/xor_altivec.h
arch/powerpc/lib/xor_vmx.c
arch/powerpc/lib/xor_vmx.h
arch/powerpc/lib/xor_vmx_glue.c
arch/s390/lib/xor.c
arch/sparc/include/asm/xor_32.h
arch/sparc/include/asm/xor_64.h
arch/x86/include/asm/xor.h
arch/x86/include/asm/xor_32.h
arch/x86/include/asm/xor_avx.h
include/asm-generic/xor.h
include/linux/raid/xor.h

index 5aeb4fb3cb7cbfcdcf298cc6cc3b7df3539ecda4..e0de0c233ab923f477ae03302015199bf9935050 100644 (file)
@@ -5,24 +5,43 @@
  * Optimized RAID-5 checksumming functions for alpha EV5 and EV6
  */
 
-extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *,
-                       unsigned long *);
-extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *,
-                       unsigned long *, unsigned long *);
-extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *,
-                       unsigned long *, unsigned long *, unsigned long *);
+extern void
+xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2);
+extern void
+xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3);
+extern void
+xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3,
+           const unsigned long * __restrict p4);
+extern void
+xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3,
+           const unsigned long * __restrict p4,
+           const unsigned long * __restrict p5);
 
-extern void xor_alpha_prefetch_2(unsigned long, unsigned long *,
-                                unsigned long *);
-extern void xor_alpha_prefetch_3(unsigned long, unsigned long *,
-                                unsigned long *, unsigned long *);
-extern void xor_alpha_prefetch_4(unsigned long, unsigned long *,
-                                unsigned long *, unsigned long *,
-                                unsigned long *);
-extern void xor_alpha_prefetch_5(unsigned long, unsigned long *,
-                                unsigned long *, unsigned long *,
-                                unsigned long *, unsigned long *);
+extern void
+xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2);
+extern void
+xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3);
+extern void
+xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4);
+extern void
+xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4,
+                    const unsigned long * __restrict p5);
 
 asm("                                                          \n\
        .text                                                   \n\
index aefddec79286a1419ba48ab5078f593c48b823a3..669cad5194d3d55587831e6e9937138b8f24308c 100644 (file)
@@ -44,7 +44,8 @@
                : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4))
 
 static void
-xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2)
 {
        unsigned int lines = bytes / sizeof(unsigned long) / 4;
        register unsigned int a1 __asm__("r4");
@@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3)
+xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3)
 {
        unsigned int lines = bytes / sizeof(unsigned long) / 4;
        register unsigned int a1 __asm__("r4");
@@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4)
+xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4)
 {
        unsigned int lines = bytes / sizeof(unsigned long) / 2;
        register unsigned int a1 __asm__("r8");
@@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4,
+              const unsigned long * __restrict p5)
 {
        unsigned int lines = bytes / sizeof(unsigned long) / 2;
        register unsigned int a1 __asm__("r8");
@@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = {
 extern struct xor_block_template const xor_block_neon_inner;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2)
 {
        if (in_interrupt()) {
                xor_arm4regs_2(bytes, p1, p2);
@@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3)
 {
        if (in_interrupt()) {
                xor_arm4regs_3(bytes, p1, p2, p3);
@@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3,
+          const unsigned long * __restrict p4)
 {
        if (in_interrupt()) {
                xor_arm4regs_4(bytes, p1, p2, p3, p4);
@@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3,
+          const unsigned long * __restrict p4,
+          const unsigned long * __restrict p5)
 {
        if (in_interrupt()) {
                xor_arm4regs_5(bytes, p1, p2, p3, p4, p5);
index 947f6a4f1aa0af556a1c951b4e7ba812afa18c21..befcd8a7abc98d369473016a30cbd8c29e46f514 100644 (file)
@@ -16,7 +16,8 @@
 extern struct xor_block_template const xor_block_inner_neon;
 
 static void
-xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2)
 {
        kernel_neon_begin();
        xor_block_inner_neon.do_2(bytes, p1, p2);
@@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3)
+xor_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3)
 {
        kernel_neon_begin();
        xor_block_inner_neon.do_3(bytes, p1, p2, p3);
@@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4)
+xor_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3,
+          const unsigned long * __restrict p4)
 {
        kernel_neon_begin();
        xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4);
@@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-               unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+          const unsigned long * __restrict p2,
+          const unsigned long * __restrict p3,
+          const unsigned long * __restrict p4,
+          const unsigned long * __restrict p5)
 {
        kernel_neon_begin();
        xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5);
index d189cf4e70ea62b77ff8edcb31d506eeeafa8e42..96b171995d198fe9278e615a3a04588c50594880 100644 (file)
@@ -10,8 +10,8 @@
 #include <linux/module.h>
 #include <asm/neon-intrinsics.h>
 
-void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
-       unsigned long *p2)
+void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1,
        } while (--lines > 0);
 }
 
-void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
-       unsigned long *p2, unsigned long *p3)
+void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1,
        } while (--lines > 0);
 }
 
-void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
-       unsigned long *p2, unsigned long *p3, unsigned long *p4)
+void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1,
        } while (--lines > 0);
 }
 
-void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1,
-       unsigned long *p2, unsigned long *p3,
-       unsigned long *p4, unsigned long *p5)
+void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4,
+       const unsigned long * __restrict p5)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r)
        return res;
 }
 
-static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
-                            unsigned long *p2, unsigned long *p3)
+static void xor_arm64_eor3_3(unsigned long bytes,
+       unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1,
        } while (--lines > 0);
 }
 
-static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
-                            unsigned long *p2, unsigned long *p3,
-                            unsigned long *p4)
+static void xor_arm64_eor3_4(unsigned long bytes,
+       unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
@@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1,
        } while (--lines > 0);
 }
 
-static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1,
-                            unsigned long *p2, unsigned long *p3,
-                            unsigned long *p4, unsigned long *p5)
+static void xor_arm64_eor3_5(unsigned long bytes,
+       unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4,
+       const unsigned long * __restrict p5)
 {
        uint64_t *dp1 = (uint64_t *)p1;
        uint64_t *dp2 = (uint64_t *)p2;
index 673051bf9d7daeec20983f60b4877cc20b6bae2c..6785f70d3208bc5559582e759a13c131b87da852 100644 (file)
@@ -4,13 +4,20 @@
  */
 
 
-extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *);
-extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *,
-                      unsigned long *);
-extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *,
-                      unsigned long *, unsigned long *);
-extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *,
-                      unsigned long *, unsigned long *, unsigned long *);
+extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1,
+                      const unsigned long * __restrict p2);
+extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1,
+                      const unsigned long * __restrict p2,
+                      const unsigned long * __restrict p3);
+extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1,
+                      const unsigned long * __restrict p2,
+                      const unsigned long * __restrict p3,
+                      const unsigned long * __restrict p4);
+extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1,
+                      const unsigned long * __restrict p2,
+                      const unsigned long * __restrict p3,
+                      const unsigned long * __restrict p4,
+                      const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_ia64 = {
        .name = "ia64",
index 6ca923510b5971edd4c68a10046351c82e274358..294620a25f8025127d471f27345c96ce58d458b2 100644 (file)
@@ -3,17 +3,20 @@
 #define _ASM_POWERPC_XOR_ALTIVEC_H
 
 #ifdef CONFIG_ALTIVEC
-
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-                  unsigned long *v2_in);
-void xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in);
-void xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in,
-                  unsigned long *v4_in);
-void xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in,
-                  unsigned long *v4_in, unsigned long *v5_in);
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2);
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3);
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4);
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4,
+                  const unsigned long * __restrict p5);
 
 #endif
 #endif /* _ASM_POWERPC_XOR_ALTIVEC_H */
index 54e61979e80e5553115b2e144f49089e3676ab8e..aab49d056d1883a42c8fffd7a3d5c3549c8d73f9 100644 (file)
@@ -49,8 +49,9 @@ typedef vector signed char unative_t;
                V1##_3 = vec_xor(V1##_3, V2##_3);       \
        } while (0)
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-                    unsigned long *v2_in)
+void __xor_altivec_2(unsigned long bytes,
+                    unsigned long * __restrict v1_in,
+                    const unsigned long * __restrict v2_in)
 {
        DEFINE(v1);
        DEFINE(v2);
@@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
        } while (--lines > 0);
 }
 
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-                    unsigned long *v2_in, unsigned long *v3_in)
+void __xor_altivec_3(unsigned long bytes,
+                    unsigned long * __restrict v1_in,
+                    const unsigned long * __restrict v2_in,
+                    const unsigned long * __restrict v3_in)
 {
        DEFINE(v1);
        DEFINE(v2);
@@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
        } while (--lines > 0);
 }
 
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-                    unsigned long *v2_in, unsigned long *v3_in,
-                    unsigned long *v4_in)
+void __xor_altivec_4(unsigned long bytes,
+                    unsigned long * __restrict v1_in,
+                    const unsigned long * __restrict v2_in,
+                    const unsigned long * __restrict v3_in,
+                    const unsigned long * __restrict v4_in)
 {
        DEFINE(v1);
        DEFINE(v2);
@@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
        } while (--lines > 0);
 }
 
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-                    unsigned long *v2_in, unsigned long *v3_in,
-                    unsigned long *v4_in, unsigned long *v5_in)
+void __xor_altivec_5(unsigned long bytes,
+                    unsigned long * __restrict v1_in,
+                    const unsigned long * __restrict v2_in,
+                    const unsigned long * __restrict v3_in,
+                    const unsigned long * __restrict v4_in,
+                    const unsigned long * __restrict v5_in)
 {
        DEFINE(v1);
        DEFINE(v2);
index 5c2b0839b1794018bd05b66c4ab31f79c84addd2..573c41d90dac5297f0eac9859f5b718931f2f1b3 100644 (file)
@@ -6,16 +6,17 @@
  * outside of the enable/disable altivec block.
  */
 
-void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-                            unsigned long *v2_in);
-
-void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in,
-                            unsigned long *v2_in, unsigned long *v3_in);
-
-void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in,
-                            unsigned long *v2_in, unsigned long *v3_in,
-                            unsigned long *v4_in);
-
-void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in,
-                            unsigned long *v2_in, unsigned long *v3_in,
-                            unsigned long *v4_in, unsigned long *v5_in);
+void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2);
+void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3);
+void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4);
+void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4,
+                    const unsigned long * __restrict p5);
index 80dba916c3674e0524c87bef30ab878f721d40e1..35d917ece4d1e4db7d5b33af0e693a588b4c0a20 100644 (file)
 #include <asm/xor_altivec.h>
 #include "xor_vmx.h"
 
-void xor_altivec_2(unsigned long bytes, unsigned long *v1_in,
-                  unsigned long *v2_in)
+void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2)
 {
        preempt_disable();
        enable_kernel_altivec();
-       __xor_altivec_2(bytes, v1_in, v2_in);
+       __xor_altivec_2(bytes, p1, p2);
        disable_kernel_altivec();
        preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_2);
 
-void xor_altivec_3(unsigned long bytes,  unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in)
+void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3)
 {
        preempt_disable();
        enable_kernel_altivec();
-       __xor_altivec_3(bytes, v1_in, v2_in, v3_in);
+       __xor_altivec_3(bytes, p1, p2, p3);
        disable_kernel_altivec();
        preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_3);
 
-void xor_altivec_4(unsigned long bytes,  unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in,
-                  unsigned long *v4_in)
+void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4)
 {
        preempt_disable();
        enable_kernel_altivec();
-       __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in);
+       __xor_altivec_4(bytes, p1, p2, p3, p4);
        disable_kernel_altivec();
        preempt_enable();
 }
 EXPORT_SYMBOL(xor_altivec_4);
 
-void xor_altivec_5(unsigned long bytes,  unsigned long *v1_in,
-                  unsigned long *v2_in, unsigned long *v3_in,
-                  unsigned long *v4_in, unsigned long *v5_in)
+void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4,
+                  const unsigned long * __restrict p5)
 {
        preempt_disable();
        enable_kernel_altivec();
-       __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in);
+       __xor_altivec_5(bytes, p1, p2, p3, p4, p5);
        disable_kernel_altivec();
        preempt_enable();
 }
index a963c3d8ad0d94b2152bb58893539f3c077b863f..fb924a8041dc775272c07722aff9cecada6ac833 100644 (file)
@@ -11,7 +11,8 @@
 #include <linux/raid/xor.h>
 #include <asm/xor.h>
 
-static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2)
 {
        asm volatile(
                "       larl    1,2f\n"
@@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
                : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-                    unsigned long *p3)
+static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3)
 {
        asm volatile(
                "       larl    1,2f\n"
@@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
                : : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-                    unsigned long *p3, unsigned long *p4)
+static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4)
 {
        asm volatile(
                "       larl    1,2f\n"
@@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
                : : "0", "1", "cc", "memory");
 }
 
-static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-                    unsigned long *p3, unsigned long *p4, unsigned long *p5)
+static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1,
+                    const unsigned long * __restrict p2,
+                    const unsigned long * __restrict p3,
+                    const unsigned long * __restrict p4,
+                    const unsigned long * __restrict p5)
 {
        asm volatile(
                "       larl    1,2f\n"
index 3e5af37e4b9cd93fbfa267dee163a673cbfd8cb0..0351813cf3af5af8d3dec9a599e7b0f3ae31a8ba 100644 (file)
@@ -13,7 +13,8 @@
  */
 
 static void
-sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+sparc_2(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2)
 {
        int lines = bytes / (sizeof (long)) / 8;
 
@@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-       unsigned long *p3)
+sparc_3(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3)
 {
        int lines = bytes / (sizeof (long)) / 8;
 
@@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-       unsigned long *p3, unsigned long *p4)
+sparc_4(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4)
 {
        int lines = bytes / (sizeof (long)) / 8;
 
@@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-       unsigned long *p3, unsigned long *p4, unsigned long *p5)
+sparc_5(unsigned long bytes, unsigned long * __restrict p1,
+       const unsigned long * __restrict p2,
+       const unsigned long * __restrict p3,
+       const unsigned long * __restrict p4,
+       const unsigned long * __restrict p5)
 {
        int lines = bytes / (sizeof (long)) / 8;
 
index 16169f3edcd5be60287f68bb83cbf14d911ed4a3..caaddea8ad79dd577876f3837582f189d572eb74 100644 (file)
 
 #include <asm/spitfire.h>
 
-void xor_vis_2(unsigned long, unsigned long *, unsigned long *);
-void xor_vis_3(unsigned long, unsigned long *, unsigned long *,
-              unsigned long *);
-void xor_vis_4(unsigned long, unsigned long *, unsigned long *,
-              unsigned long *, unsigned long *);
-void xor_vis_5(unsigned long, unsigned long *, unsigned long *,
-              unsigned long *, unsigned long *, unsigned long *);
+void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2);
+void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3);
+void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4);
+void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4,
+              const unsigned long * __restrict p5);
 
 /* XXX Ugh, write cheetah versions... -DaveM */
 
@@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = {
         .do_5  = xor_vis_5,
 };
 
-void xor_niagara_2(unsigned long, unsigned long *, unsigned long *);
-void xor_niagara_3(unsigned long, unsigned long *, unsigned long *,
-                  unsigned long *);
-void xor_niagara_4(unsigned long, unsigned long *, unsigned long *,
-                  unsigned long *, unsigned long *);
-void xor_niagara_5(unsigned long, unsigned long *, unsigned long *,
-                  unsigned long *, unsigned long *, unsigned long *);
+void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2);
+void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3);
+void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4);
+void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1,
+                  const unsigned long * __restrict p2,
+                  const unsigned long * __restrict p3,
+                  const unsigned long * __restrict p4,
+                  const unsigned long * __restrict p5);
 
 static struct xor_block_template xor_block_niagara = {
         .name  = "Niagara",
index 2ee95a7769e60bb3fd78f9d4d4c34a26775de6ef..7b0307acc4103c897736fa0fcf5a003e4d1165bd 100644 (file)
@@ -57,7 +57,8 @@
                                        op(i + 3, 3)
 
 static void
-xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2(unsigned long bytes, unsigned long * __restrict p1,
+         const unsigned long * __restrict p2)
 {
        unsigned long lines = bytes >> 8;
 
@@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2)
 {
        unsigned long lines = bytes >> 8;
 
@@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3)
+xor_sse_3(unsigned long bytes, unsigned long * __restrict p1,
+         const unsigned long * __restrict p2,
+         const unsigned long * __restrict p3)
 {
        unsigned long lines = bytes >> 8;
 
@@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-              unsigned long *p3)
+xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3)
 {
        unsigned long lines = bytes >> 8;
 
@@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4)
+xor_sse_4(unsigned long bytes, unsigned long * __restrict p1,
+         const unsigned long * __restrict p2,
+         const unsigned long * __restrict p3,
+         const unsigned long * __restrict p4)
 {
        unsigned long lines = bytes >> 8;
 
@@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-              unsigned long *p3, unsigned long *p4)
+xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4)
 {
        unsigned long lines = bytes >> 8;
 
@@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-         unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5(unsigned long bytes, unsigned long * __restrict p1,
+         const unsigned long * __restrict p2,
+         const unsigned long * __restrict p3,
+         const unsigned long * __restrict p4,
+         const unsigned long * __restrict p5)
 {
        unsigned long lines = bytes >> 8;
 
@@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-              unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4,
+              const unsigned long * __restrict p5)
 {
        unsigned long lines = bytes >> 8;
 
index 67ceb790e63972d3e3b06461454bc689d5814f61..7a6b9474591e75cf57a3a2fb5cbe6396312f769c 100644 (file)
@@ -21,7 +21,8 @@
 #include <asm/fpu/api.h>
 
 static void
-xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2)
 {
        unsigned long lines = bytes >> 7;
 
@@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3)
+xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3)
 {
        unsigned long lines = bytes >> 7;
 
@@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3, unsigned long *p4)
+xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3,
+             const unsigned long * __restrict p4)
 {
        unsigned long lines = bytes >> 7;
 
@@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 
 
 static void
-xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-             unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3,
+             const unsigned long * __restrict p4,
+             const unsigned long * __restrict p5)
 {
        unsigned long lines = bytes >> 7;
 
@@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 #undef BLOCK
 
 static void
-xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2)
 {
        unsigned long lines = bytes >> 6;
 
@@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3)
+xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3)
 {
        unsigned long lines = bytes >> 6;
 
@@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3, unsigned long *p4)
+xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3,
+            const unsigned long * __restrict p4)
 {
        unsigned long lines = bytes >> 6;
 
@@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-            unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3,
+            const unsigned long * __restrict p4,
+            const unsigned long * __restrict p5)
 {
        unsigned long lines = bytes >> 6;
 
index 0c4e5b5e3852bdaa16e4eb1199614f8f31c1bdb5..7f81dd5897f417866d9554443029cf899f43df2a 100644 (file)
@@ -26,7 +26,8 @@
                BLOCK4(8) \
                BLOCK4(12)
 
-static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1)
+static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0,
+                     const unsigned long * __restrict p1)
 {
        unsigned long lines = bytes >> 9;
 
@@ -52,8 +53,9 @@ do { \
        kernel_fpu_end();
 }
 
-static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-       unsigned long *p2)
+static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0,
+                     const unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2)
 {
        unsigned long lines = bytes >> 9;
 
@@ -82,8 +84,10 @@ do { \
        kernel_fpu_end();
 }
 
-static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-       unsigned long *p2, unsigned long *p3)
+static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0,
+                     const unsigned long * __restrict p1,
+                     const unsigned long * __restrict p2,
+                     const unsigned long * __restrict p3)
 {
        unsigned long lines = bytes >> 9;
 
@@ -115,8 +119,11 @@ do { \
        kernel_fpu_end();
 }
 
-static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1,
-       unsigned long *p2, unsigned long *p3, unsigned long *p4)
+static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0,
+            const unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3,
+            const unsigned long * __restrict p4)
 {
        unsigned long lines = bytes >> 9;
 
index b62a2a56a4d4976a383c30a57cafd8246e784df1..44509d48fca21ec835a695a8cfdc6d7161d64863 100644 (file)
@@ -8,7 +8,8 @@
 #include <linux/prefetch.h>
 
 static void
-xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_2(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -27,8 +28,9 @@ xor_8regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3)
+xor_8regs_3(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -48,8 +50,10 @@ xor_8regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4)
+xor_8regs_4(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3,
+           const unsigned long * __restrict p4)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -70,8 +74,11 @@ xor_8regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_5(unsigned long bytes, unsigned long * __restrict p1,
+           const unsigned long * __restrict p2,
+           const unsigned long * __restrict p3,
+           const unsigned long * __restrict p4,
+           const unsigned long * __restrict p5)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -93,7 +100,8 @@ xor_8regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_2(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -129,8 +137,9 @@ xor_32regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3)
+xor_32regs_3(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -175,8 +184,10 @@ xor_32regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4)
+xor_32regs_4(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3,
+            const unsigned long * __restrict p4)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -230,8 +241,11 @@ xor_32regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_5(unsigned long bytes, unsigned long * __restrict p1,
+            const unsigned long * __restrict p2,
+            const unsigned long * __restrict p3,
+            const unsigned long * __restrict p4,
+            const unsigned long * __restrict p5)
 {
        long lines = bytes / (sizeof (long)) / 8;
 
@@ -294,7 +308,8 @@ xor_32regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_8regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
        prefetchw(p1);
@@ -320,8 +335,9 @@ xor_8regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3)
+xor_8regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
        prefetchw(p1);
@@ -350,8 +366,10 @@ xor_8regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4)
+xor_8regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3,
+             const unsigned long * __restrict p4)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -384,8 +402,11 @@ xor_8regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_8regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+             const unsigned long * __restrict p2,
+             const unsigned long * __restrict p3,
+             const unsigned long * __restrict p4,
+             const unsigned long * __restrict p5)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -421,7 +442,8 @@ xor_8regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
+xor_32regs_p_2(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -466,8 +488,9 @@ xor_32regs_p_2(unsigned long bytes, unsigned long *p1, unsigned long *p2)
 }
 
 static void
-xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3)
+xor_32regs_p_3(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -523,8 +546,10 @@ xor_32regs_p_3(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4)
+xor_32regs_p_4(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
@@ -591,8 +616,11 @@ xor_32regs_p_4(unsigned long bytes, unsigned long *p1, unsigned long *p2,
 }
 
 static void
-xor_32regs_p_5(unsigned long bytes, unsigned long *p1, unsigned long *p2,
-           unsigned long *p3, unsigned long *p4, unsigned long *p5)
+xor_32regs_p_5(unsigned long bytes, unsigned long * __restrict p1,
+              const unsigned long * __restrict p2,
+              const unsigned long * __restrict p3,
+              const unsigned long * __restrict p4,
+              const unsigned long * __restrict p5)
 {
        long lines = bytes / (sizeof (long)) / 8 - 1;
 
index 2a9fee8ddae3f961b52d26ba79c1fe6ae78fc73e..51b811b623224fd3c9d19288a04b9ed6921c0f49 100644 (file)
@@ -11,13 +11,20 @@ struct xor_block_template {
         struct xor_block_template *next;
         const char *name;
         int speed;
-       void (*do_2)(unsigned long, unsigned long *, unsigned long *);
-       void (*do_3)(unsigned long, unsigned long *, unsigned long *,
-                    unsigned long *);
-       void (*do_4)(unsigned long, unsigned long *, unsigned long *,
-                    unsigned long *, unsigned long *);
-       void (*do_5)(unsigned long, unsigned long *, unsigned long *,
-                    unsigned long *, unsigned long *, unsigned long *);
+       void (*do_2)(unsigned long, unsigned long * __restrict,
+                    const unsigned long * __restrict);
+       void (*do_3)(unsigned long, unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict);
+       void (*do_4)(unsigned long, unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict);
+       void (*do_5)(unsigned long, unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict,
+                    const unsigned long * __restrict);
 };
 
 #endif