LoongArch: Select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
authorXi Ruoyao <xry111@xry111.site>
Tue, 14 May 2024 04:24:18 +0000 (12:24 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Tue, 14 May 2024 04:24:18 +0000 (12:24 +0800)
This allows compiling a full 128-bit product of two 64-bit integers as a
mul/mulh pair, instead of a nasty long sequence of 20+ instructions.

However, after selecting ARCH_SUPPORTS_INT128, when optimizing for size
the compiler generates calls to __ashlti3, __ashrti3, and __lshrti3 for
shifting __int128 values, causing a link failure:

    loongarch64-unknown-linux-gnu-ld: kernel/sched/fair.o: in
    function `mul_u64_u32_shr':
    <PATH>/include/linux/math64.h:161:(.text+0x5e4): undefined
    reference to `__lshrti3'

So provide the implementation of these functions if ARCH_SUPPORTS_INT128.

Closes: https://lore.kernel.org/loongarch/CAAhV-H5EZ=7OF7CSiYyZ8_+wWuenpo=K2WT8-6mAT4CvzUC_4g@mail.gmail.com/
Signed-off-by: Xi Ruoyao <xry111@xry111.site>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/Kconfig
arch/loongarch/include/asm/asm-prototypes.h
arch/loongarch/lib/Makefile
arch/loongarch/lib/tishift.S [new file with mode: 0644]

index 1355ec0c69ccac333fe09860e3ac80f907f5b91c..335a98b28167cbffaee4845ed6e8a1a493924bdf 100644 (file)
@@ -57,6 +57,7 @@ config LOONGARCH
        select ARCH_SUPPORTS_ACPI
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_HUGETLBFS
+       select ARCH_SUPPORTS_INT128 if CC_HAS_INT128
        select ARCH_SUPPORTS_LTO_CLANG
        select ARCH_SUPPORTS_LTO_CLANG_THIN
        select ARCH_SUPPORTS_NUMA_BALANCING
index cf8e1a4e7c19dad55c6adc9a56820b02e0fae977..51f224bcfc654228ae423e9a066b25b35102a5b9 100644 (file)
@@ -6,3 +6,9 @@
 #include <asm/page.h>
 #include <asm/ftrace.h>
 #include <asm-generic/asm-prototypes.h>
+
+#ifdef CONFIG_ARCH_SUPPORTS_INT128
+__int128_t __ashlti3(__int128_t a, int b);
+__int128_t __ashrti3(__int128_t a, int b);
+__int128_t __lshrti3(__int128_t a, int b);
+#endif
index a77bf160bfc4246fa34cc666f037ace2eaf01996..ccea3bbd4353134fb115fc547f742f9f5e420204 100644 (file)
@@ -6,6 +6,8 @@
 lib-y  += delay.o memset.o memcpy.o memmove.o \
           clear_user.o copy_user.o csum.o dump_tlb.o unaligned.o
 
+obj-$(CONFIG_ARCH_SUPPORTS_INT128) += tishift.o
+
 obj-$(CONFIG_CPU_HAS_LSX) += xor_simd.o xor_simd_glue.o
 
 obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/loongarch/lib/tishift.S b/arch/loongarch/lib/tishift.S
new file mode 100644 (file)
index 0000000..fa1d310
--- /dev/null
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/asmmacro.h>
+#include <linux/export.h>
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__ashlti3)
+       srli.d  t2, a0, 1
+       nor     t3, zero, a2
+       sll.d   t1, a1, a2
+       srl.d   t2, t2, t3
+       andi    t0, a2, 64
+       sll.d   a0, a0, a2
+       or      t1, t2, t1
+       maskeqz a1, a0, t0
+       masknez a0, a0, t0
+       masknez t0, t1, t0
+       or      a1, t0, a1
+       jr      ra
+SYM_FUNC_END(__ashlti3)
+EXPORT_SYMBOL(__ashlti3)
+
+SYM_FUNC_START(__ashrti3)
+       nor     t3, zero, a2
+       slli.d  t2, a1, 1
+       srl.d   t1, a0, a2
+       sll.d   t2, t2, t3
+       andi    t0, a2, 64
+       or      t1, t2, t1
+       sra.d   a2, a1, a2
+       srai.d  a1, a1, 63
+       maskeqz a0, a2, t0
+       maskeqz a1, a1, t0
+       masknez a2, a2, t0
+       masknez t0, t1, t0
+       or      a1, a1, a2
+       or      a0, t0, a0
+       jr      ra
+SYM_FUNC_END(__ashrti3)
+EXPORT_SYMBOL(__ashrti3)
+
+SYM_FUNC_START(__lshrti3)
+       slli.d  t2, a1, 1
+       nor     t3, zero, a2
+       srl.d   t1, a0, a2
+       sll.d   t2, t2, t3
+       andi    t0, a2, 64
+       srl.d   a1, a1, a2
+       or      t1, t2, t1
+       maskeqz a0, a1, t0
+       masknez a1, a1, t0
+       masknez t0, t1, t0
+       or      a0, t0, a0
+       jr      ra
+SYM_FUNC_END(__lshrti3)
+EXPORT_SYMBOL(__lshrti3)