riscv: Add vector extension XOR implementation
authorGreentime Hu <greentime.hu@sifive.com>
Mon, 15 Jan 2024 05:59:22 +0000 (05:59 +0000)
committerPalmer Dabbelt <palmer@rivosinc.com>
Tue, 16 Jan 2024 15:13:55 +0000 (07:13 -0800)
This patch adds support for vector optimized XOR and it is tested in
qemu.

Co-developed-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Han-Kuan Chen <hankuan.chen@sifive.com>
Signed-off-by: Greentime Hu <greentime.hu@sifive.com>
Signed-off-by: Andy Chiu <andy.chiu@sifive.com>
Tested-by: Björn Töpel <bjorn@rivosinc.com>
Tested-by: Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
Link: https://lore.kernel.org/r/20240115055929.4736-4-andy.chiu@sifive.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
arch/riscv/include/asm/asm-prototypes.h
arch/riscv/include/asm/xor.h [new file with mode: 0644]
arch/riscv/lib/Makefile
arch/riscv/lib/xor.S [new file with mode: 0644]

index 36b955c762ba08e92ca0441ee8fbae9219c1f2fa..6db1a9bbff4ce35d1e05261094bb2efa2ef311a1 100644 (file)
@@ -9,6 +9,24 @@ long long __lshrti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __ashlti3(long long a, int b);
 
+#ifdef CONFIG_RISCV_ISA_V
+
+void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2);
+void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3);
+void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4);
+void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
+                const unsigned long *__restrict p2,
+                const unsigned long *__restrict p3,
+                const unsigned long *__restrict p4,
+                const unsigned long *__restrict p5);
+
+#endif /* CONFIG_RISCV_ISA_V */
 
 #define DECLARE_DO_ERROR_INFO(name)    asmlinkage void name(struct pt_regs *regs)
 
diff --git a/arch/riscv/include/asm/xor.h b/arch/riscv/include/asm/xor.h
new file mode 100644 (file)
index 0000000..9601186
--- /dev/null
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/hardirq.h>
+#include <asm-generic/xor.h>
+#ifdef CONFIG_RISCV_ISA_V
+#include <asm/vector.h>
+#include <asm/switch_to.h>
+#include <asm/asm-prototypes.h>
+
+static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2)
+{
+       kernel_vector_begin();
+       xor_regs_2_(bytes, p1, p2);
+       kernel_vector_end();
+}
+
+static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3)
+{
+       kernel_vector_begin();
+       xor_regs_3_(bytes, p1, p2, p3);
+       kernel_vector_end();
+}
+
+static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4)
+{
+       kernel_vector_begin();
+       xor_regs_4_(bytes, p1, p2, p3, p4);
+       kernel_vector_end();
+}
+
+static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
+                        const unsigned long *__restrict p2,
+                        const unsigned long *__restrict p3,
+                        const unsigned long *__restrict p4,
+                        const unsigned long *__restrict p5)
+{
+       kernel_vector_begin();
+       xor_regs_5_(bytes, p1, p2, p3, p4, p5);
+       kernel_vector_end();
+}
+
+static struct xor_block_template xor_block_rvv = {
+       .name = "rvv",
+       .do_2 = xor_vector_2,
+       .do_3 = xor_vector_3,
+       .do_4 = xor_vector_4,
+       .do_5 = xor_vector_5
+};
+
+#undef XOR_TRY_TEMPLATES
+#define XOR_TRY_TEMPLATES           \
+       do {        \
+               xor_speed(&xor_block_8regs);    \
+               xor_speed(&xor_block_32regs);    \
+               if (has_vector()) { \
+                       xor_speed(&xor_block_rvv);\
+               } \
+       } while (0)
+#endif
index 26cb2502ecf8969b76a47e874f6be9e90219887b..494f9cd1a00c0b83619f622d381e86c5529fd80d 100644 (file)
@@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT)   += tishift.o
 lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+lib-$(CONFIG_RISCV_ISA_V)      += xor.o
diff --git a/arch/riscv/lib/xor.S b/arch/riscv/lib/xor.S
new file mode 100644 (file)
index 0000000..b28f243
--- /dev/null
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#include <linux/linkage.h>
+#include <linux/export.h>
+#include <asm/asm.h>
+
+SYM_FUNC_START(xor_regs_2_)
+       vsetvli a3, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a3
+       vxor.vv v16, v0, v8
+       add a2, a2, a3
+       vse8.v v16, (a1)
+       add a1, a1, a3
+       bnez a0, xor_regs_2_
+       ret
+SYM_FUNC_END(xor_regs_2_)
+EXPORT_SYMBOL(xor_regs_2_)
+
+SYM_FUNC_START(xor_regs_3_)
+       vsetvli a4, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a4
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a4
+       vxor.vv v16, v0, v16
+       add a3, a3, a4
+       vse8.v v16, (a1)
+       add a1, a1, a4
+       bnez a0, xor_regs_3_
+       ret
+SYM_FUNC_END(xor_regs_3_)
+EXPORT_SYMBOL(xor_regs_3_)
+
+SYM_FUNC_START(xor_regs_4_)
+       vsetvli a5, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a5
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a5
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a5
+       vxor.vv v16, v0, v24
+       add a4, a4, a5
+       vse8.v v16, (a1)
+       add a1, a1, a5
+       bnez a0, xor_regs_4_
+       ret
+SYM_FUNC_END(xor_regs_4_)
+EXPORT_SYMBOL(xor_regs_4_)
+
+SYM_FUNC_START(xor_regs_5_)
+       vsetvli a6, a0, e8, m8, ta, ma
+       vle8.v v0, (a1)
+       vle8.v v8, (a2)
+       sub a0, a0, a6
+       vxor.vv v0, v0, v8
+       vle8.v v16, (a3)
+       add a2, a2, a6
+       vxor.vv v0, v0, v16
+       vle8.v v24, (a4)
+       add a3, a3, a6
+       vxor.vv v0, v0, v24
+       vle8.v v8, (a5)
+       add a4, a4, a6
+       vxor.vv v16, v0, v8
+       add a5, a5, a6
+       vse8.v v16, (a1)
+       add a1, a1, a6
+       bnez a0, xor_regs_5_
+       ret
+SYM_FUNC_END(xor_regs_5_)
+EXPORT_SYMBOL(xor_regs_5_)