s390/checksum: provide csum_partial_copy_nocheck()

author Heiko Carstens <hca@linux.ibm.com>

Sat, 3 Feb 2024 10:45:23 +0000 (11:45 +0100)

committer Heiko Carstens <hca@linux.ibm.com>

Fri, 16 Feb 2024 13:30:17 +0000 (14:30 +0100)
author Heiko Carstens <hca@linux.ibm.com>
Sat, 3 Feb 2024 10:45:23 +0000 (11:45 +0100)
committer Heiko Carstens <hca@linux.ibm.com>
Fri, 16 Feb 2024 13:30:17 +0000 (14:30 +0100)
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h

index 00095cc20afa634275832dd0713004f26da3055c..b89159591ca08d7caf6248ef270d325772fdeb85 100644 (file)
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -32,6 +32,9 @@ static inline __wsum cksm(const void *buff, int len, __wsum sum)
  
  __wsum csum_partial(const void *buff, int len, __wsum sum);
  
+#define _HAVE_ARCH_CSUM_AND_COPY
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
+
  /*
   * Fold a partial checksum without adding pseudo headers.
   */
diff --git a/arch/s390/include/asm/fpu-insn-asm.h b/arch/s390/include/asm/fpu-insn-asm.h

index aaf42c513a21d4c3d7105d57f25272f866670d52..02ccfe46050a08ce94a5eef63f4703c0d029f7ca 100644 (file)
--- a/arch/s390/include/asm/fpu-insn-asm.h
+++ b/arch/s390/include/asm/fpu-insn-asm.h
@@ -531,6 +531,16 @@
         MRXBOPC 0, 0x37, v1
  .endm
  
+/* VECTOR STORE WITH LENGTH */
+.macro VSTL    v, gr, disp, base
+       VX_NUM  v1, \v
+       GR_NUM  b2, \base
+       GR_NUM  r3, \gr
+       .word   0xE700 | ((v1&15) << 4) | r3
+       .word   (b2 << 12) | (\disp)
+       MRXBOPC 0, 0x3f, v1
+.endm
+
  /* Vector integer instructions */
  
  /* VECTOR AND */
diff --git a/arch/s390/include/asm/fpu-insn.h b/arch/s390/include/asm/fpu-insn.h

index 7e9997fa45d36e30e9b800434de1228aa10d97c8..35c4fbe0bdd6ee170d870d41d6971dbaa5e434db 100644 (file)
--- a/arch/s390/include/asm/fpu-insn.h
+++ b/arch/s390/include/asm/fpu-insn.h
@@ -241,6 +241,64 @@ static __always_inline void fpu_vlvgf(u8 v, u32 val, u16 index)
  
  #ifdef CONFIG_CC_IS_CLANG
  
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+       instrument_write(vxr, sizeof(__vector128));
+       asm volatile("\n"
+               "       la      1,%[vxr]\n"
+               "       VST     %[v1],0,,1\n"
+               : [vxr] "=R" (*(__vector128 *)vxr)
+               : [v1] "I" (v1)
+               : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vst(u8 v1, const void *vxr)
+{
+       instrument_write(vxr, sizeof(__vector128));
+       asm volatile("VST       %[v1],%O[vxr],,%R[vxr]\n"
+                    : [vxr] "=Q" (*(__vector128 *)vxr)
+                    : [v1] "I" (v1)
+                    : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+       unsigned int size;
+
+       size = min(index + 1, sizeof(__vector128));
+       instrument_write(vxr, size);
+       asm volatile("\n"
+               "       la      1,%[vxr]\n"
+               "       VSTL    %[v1],%[index],0,1\n"
+               : [vxr] "=R" (*(u8 *)vxr)
+               : [index] "d" (index), [v1] "I" (v1)
+               : "memory", "1");
+}
+
+#else /* CONFIG_CC_IS_CLANG */
+
+static __always_inline void fpu_vstl(u8 v1, u32 index, const void *vxr)
+{
+       unsigned int size;
+
+       size = min(index + 1, sizeof(__vector128));
+       instrument_write(vxr, size);
+       asm volatile("VSTL      %[v1],%[index],%O[vxr],%R[vxr]\n"
+                    : [vxr] "=Q" (*(u8 *)vxr)
+                    : [index] "d" (index), [v1] "I" (v1)
+                    : "memory");
+}
+
+#endif /* CONFIG_CC_IS_CLANG */
+
+#ifdef CONFIG_CC_IS_CLANG
+
  #define fpu_vstm(_v1, _v3, _vxrs)                                      \
  ({                                                                     \
         unsigned int size = ((_v3) - (_v1) + 1) * sizeof(__vector128);  \
diff --git a/arch/s390/lib/csum-partial.c b/arch/s390/lib/csum-partial.c

index 3ea009cbc3b797ce3a0e6d0f1e2ca9084b5022fc..458abd9bac70256ca0bcdda48b9b6cbdf2efa4b9 100644 (file)
--- a/arch/s390/lib/csum-partial.c
+++ b/arch/s390/lib/csum-partial.c
@@ -5,8 +5,8 @@
  #include <asm/fpu.h>
  
  /*
- * Computes the checksum of a memory block at buff, length len,
- * and adds in "sum" (32-bit).
+ * Computes the checksum of a memory block at src, length len,
+ * and adds in "sum" (32-bit). If copy is true copies to dst.
   *
   * Returns a 32-bit number suitable for feeding into itself
   * or csum_tcpudp_magic.
@@ -14,43 +14,60 @@
   * This function must be called with even lengths, except
   * for the last fragment, which may be odd.
   *
- * It's best to have buff aligned on a 64-bit boundary.
+ * It's best to have src and dst aligned on a 64-bit boundary.
   */
-__wsum csum_partial(const void *buff, int len, __wsum sum)
+static __always_inline __wsum csum_copy(void *dst, const void *src, int len, __wsum sum, bool copy)
  {
         DECLARE_KERNEL_FPU_ONSTACK8(vxstate);
  
-       if (!cpu_has_vx())
-               return cksm(buff, len, sum);
+       if (!cpu_has_vx()) {
+               if (copy)
+                       memcpy(dst, src, len);
+               return cksm(dst, len, sum);
+       }
         kernel_fpu_begin(&vxstate, KERNEL_VXR_V16V23);
         fpu_vlvgf(16, (__force u32)sum, 1);
         fpu_vzero(17);
         fpu_vzero(18);
         fpu_vzero(19);
         while (len >= 64) {
-               fpu_vlm(20, 23, buff);
+               fpu_vlm(20, 23, src);
+               if (copy) {
+                       fpu_vstm(20, 23, dst);
+                       dst += 64;
+               }
                 fpu_vcksm(16, 20, 16);
                 fpu_vcksm(17, 21, 17);
                 fpu_vcksm(18, 22, 18);
                 fpu_vcksm(19, 23, 19);
-               buff += 64;
+               src += 64;
                 len -= 64;
         }
         while (len >= 32) {
-               fpu_vlm(20, 21, buff);
+               fpu_vlm(20, 21, src);
+               if (copy) {
+                       fpu_vstm(20, 21, dst);
+                       dst += 32;
+               }
                 fpu_vcksm(16, 20, 16);
                 fpu_vcksm(17, 21, 17);
-               buff += 32;
+               src += 32;
                 len -= 32;
         }
         while (len >= 16) {
-               fpu_vl(20, buff);
+               fpu_vl(20, src);
+               if (copy) {
+                       fpu_vst(20, dst);
+                       dst += 16;
+               }
                 fpu_vcksm(16, 20, 16);
-               buff += 16;
+               src += 16;
                 len -= 16;
         }
         if (len) {
-               fpu_vll(20, len - 1, buff);
+               fpu_vll(20, len - 1, src);
+               if (copy)
+                       fpu_vstl(20, len - 1, dst);
                 fpu_vcksm(16, 20, 16);
         }
         fpu_vcksm(18, 19, 18);
@@ -60,4 +77,15 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
         kernel_fpu_end(&vxstate, KERNEL_VXR_V16V23);
         return sum;
  }
+
+__wsum csum_partial(const void *buff, int len, __wsum sum)
+{
+       return csum_copy(NULL, buff, len, sum, false);
+}
  EXPORT_SYMBOL(csum_partial);
+
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+       return csum_copy(dst, src, len, 0, true);
+}
+EXPORT_SYMBOL(csum_partial_copy_nocheck);
author	Heiko Carstens <hca@linux.ibm.com>
	Sat, 3 Feb 2024 10:45:23 +0000 (11:45 +0100)
committer	Heiko Carstens <hca@linux.ibm.com>
	Fri, 16 Feb 2024 13:30:17 +0000 (14:30 +0100)
arch/s390/include/asm/checksum.h		patch \| blob \| history
arch/s390/include/asm/fpu-insn-asm.h		patch \| blob \| history
arch/s390/include/asm/fpu-insn.h		patch \| blob \| history
arch/s390/lib/csum-partial.c		patch \| blob \| history