RISC-V: hwprobe: Support probing of misaligned access performance
authorEvan Green <evan@rivosinc.com>
Fri, 7 Apr 2023 23:11:01 +0000 (16:11 -0700)
committerPalmer Dabbelt <palmer@rivosinc.com>
Tue, 18 Apr 2023 22:48:16 +0000 (15:48 -0700)
This allows userspace to select various routines to use based on the
performance of misaligned access on the target hardware.

Rather than adding DT bindings, this change taps into the alternatives
mechanism used to probe CPU errata. Add a new function pointer alongside
the vendor-specific errata_patch_func() that probes for desirable errata
(otherwise known as "features"). Unlike the errata_patch_func(), this
function is called on each CPU as it comes up, so it can save
feature information per-CPU.

The T-head C906 has fast unaligned access, both as defined by GCC [1],
and in performing a basic benchmark, which determined that byte copies
are >50% slower than a misaligned word copy of the same data size (source
for this test at [2]):

bytecopy size f000 count 50000 offset 0 took 31664899 us
wordcopy size f000 count 50000 offset 0 took 5180919 us
wordcopy size f000 count 50000 offset 1 took 13416949 us

[1] https://github.com/gcc-mirror/gcc/blob/master/gcc/config/riscv/riscv.cc#L353
[2] https://pastebin.com/EPXvDHSW

Co-developed-by: Palmer Dabbelt <palmer@rivosinc.com>
Signed-off-by: Evan Green <evan@rivosinc.com>
Reviewed-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Tested-by: Heiko Stuebner <heiko.stuebner@vrull.eu>
Reviewed-by: Conor Dooley <conor.dooley@microchip.com>
Reviewed-by: Paul Walmsley <paul.walmsley@sifive.com>
Link: https://lore.kernel.org/r/20230407231103.2622178-5-evan@rivosinc.com
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Documentation/riscv/hwprobe.rst
arch/riscv/errata/thead/errata.c
arch/riscv/include/asm/alternative.h
arch/riscv/include/asm/cpufeature.h
arch/riscv/include/asm/hwprobe.h
arch/riscv/include/uapi/asm/hwprobe.h
arch/riscv/kernel/alternative.c
arch/riscv/kernel/cpufeature.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/sys_riscv.c

index 945d44683c40c63610c33528029ad7130d51b09e..9f0dd62dcb5db66536c1b7272ae73be45888e2b6 100644 (file)
@@ -63,3 +63,24 @@ The following keys are defined:
 
   * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
     by version 2.2 of the RISC-V ISA manual.
+
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
+  information about the selected set of processors.
+
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
+    accesses is unknown.
+
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
+    emulated via software, either in or below the kernel.  These accesses are
+    always extremely slow.
+
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
+    in hardware, but are slower than the cooresponding aligned accesses
+    sequences.
+
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
+    in hardware and are faster than the cooresponding aligned accesses
+    sequences.
+
+  * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
+    not supported at all and will generate a misaligned address fault.
index 3b96a06d3c54461b4e5ad22046fb945d5e5cd106..5b6d62586a8bc737cf5279cf3278aed770974a15 100644 (file)
@@ -11,7 +11,9 @@
 #include <linux/uaccess.h>
 #include <asm/alternative.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/errata_list.h>
+#include <asm/hwprobe.h>
 #include <asm/patch.h>
 #include <asm/vendorid_list.h>
 
@@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
        if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
                local_flush_icache_all();
 }
+
+void __init_or_module thead_feature_probe_func(unsigned int cpu,
+                                              unsigned long archid,
+                                              unsigned long impid)
+{
+       if ((archid == 0) && (impid == 0))
+               per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
+}
index b8648d4f2ac1a8c492d741feda0e3d07d10e716c..b5774e24d4a3927ea0f108af0c4faf7a83e4e5ac 100644 (file)
@@ -28,6 +28,7 @@
 #define ALT_OLD_PTR(a)                 __ALT_PTR(a, old_offset)
 #define ALT_ALT_PTR(a)                 __ALT_PTR(a, alt_offset)
 
+void __init probe_vendor_features(unsigned int cpu);
 void __init apply_boot_alternatives(void);
 void __init apply_early_boot_alternatives(void);
 void apply_module_alternatives(void *start, size_t length);
@@ -55,11 +56,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
                             unsigned long archid, unsigned long impid,
                             unsigned int stage);
 
+void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
+                             unsigned long impid);
+
 void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
                                 unsigned int stage);
 
 #else /* CONFIG_RISCV_ALTERNATIVE */
 
+static inline void probe_vendor_features(unsigned int cpu) { }
 static inline void apply_boot_alternatives(void) { }
 static inline void apply_early_boot_alternatives(void) { }
 static inline void apply_module_alternatives(void *start, size_t length) { }
index 66ebaae449c8cbe8bc2cb85254d3bc06f9d452ee..808d5403f2ac1c31accf3c8f083664956e84748f 100644 (file)
@@ -18,4 +18,6 @@ struct riscv_cpuinfo {
 
 DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
 
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
 #endif
index d717c80a64fff89996cb5d8a87780b72fc429a24..78936f4ff513307a59e62768838ae19a738f69d1 100644 (file)
@@ -8,6 +8,6 @@
 
 #include <uapi/asm/hwprobe.h>
 
-#define RISCV_HWPROBE_MAX_KEY 4
+#define RISCV_HWPROBE_MAX_KEY 5
 
 #endif
index 398e08f7e083c3be5de0321e09c4d7c8cef83d75..8d745a4ad8a2c3e60ba09c0a93497711bd56f6e8 100644 (file)
@@ -25,6 +25,13 @@ struct riscv_hwprobe {
 #define RISCV_HWPROBE_KEY_IMA_EXT_0    4
 #define                RISCV_HWPROBE_IMA_FD            (1 << 0)
 #define                RISCV_HWPROBE_IMA_C             (1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0    5
+#define                RISCV_HWPROBE_MISALIGNED_UNKNOWN        (0 << 0)
+#define                RISCV_HWPROBE_MISALIGNED_EMULATED       (1 << 0)
+#define                RISCV_HWPROBE_MISALIGNED_SLOW           (2 << 0)
+#define                RISCV_HWPROBE_MISALIGNED_FAST           (3 << 0)
+#define                RISCV_HWPROBE_MISALIGNED_UNSUPPORTED    (4 << 0)
+#define                RISCV_HWPROBE_MISALIGNED_MASK           (7 << 0)
 /* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
 
 #endif
index 2354c69dc7d1e6ae47e126b5739bac7ba9bee10a..fc65c9293ac562e80a6ebaf30209b4b11e47912e 100644 (file)
@@ -27,6 +27,8 @@ struct cpu_manufacturer_info_t {
        void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
                                  unsigned long archid, unsigned long impid,
                                  unsigned int stage);
+       void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
+                                  unsigned long impid);
 };
 
 static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
@@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
        cpu_mfr_info->imp_id = sbi_get_mimpid();
 #endif
 
+       cpu_mfr_info->feature_probe_func = NULL;
        switch (cpu_mfr_info->vendor_id) {
 #ifdef CONFIG_ERRATA_SIFIVE
        case SIFIVE_VENDOR_ID:
@@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
 #ifdef CONFIG_ERRATA_THEAD
        case THEAD_VENDOR_ID:
                cpu_mfr_info->patch_func = thead_errata_patch_func;
+               cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
                break;
 #endif
        default:
@@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
        }
 }
 
+/* Called on each CPU as it starts */
+void __init_or_module probe_vendor_features(unsigned int cpu)
+{
+       struct cpu_manufacturer_info_t cpu_mfr_info;
+
+       riscv_fill_cpu_mfr_info(&cpu_mfr_info);
+       if (!cpu_mfr_info.feature_probe_func)
+               return;
+
+       cpu_mfr_info.feature_probe_func(cpu,
+                                       cpu_mfr_info.arch_id,
+                                       cpu_mfr_info.imp_id);
+}
+
 /*
  * This is called very early in the boot process (directly after we run
  * a feature detect on the boot CPU). No need to worry about other CPUs
@@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void)
        /* If called on non-boot cpu things could go wrong */
        WARN_ON(smp_processor_id() != 0);
 
+       probe_vendor_features(0);
        _apply_alternatives((struct alt_entry *)__alt_start,
                            (struct alt_entry *)__alt_end,
                            RISCV_ALTERNATIVES_BOOT);
index 59d58ee0f68d6db2fa8799a96f849edd99d793e7..8bbc89351050e2d74983c780458d4c2ca248ad7c 100644 (file)
@@ -30,6 +30,9 @@ unsigned long elf_hwcap __read_mostly;
 /* Host ISA bitmap */
 static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
 
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
 /**
  * riscv_isa_extension_base() - Get base extension word
  *
index ddb2afba6d25558c26748f8f63dd1dc1702b5e89..2867c12c3d1681fe509a1b50ee8c866ed6891ba1 100644 (file)
@@ -168,6 +168,7 @@ asmlinkage __visible void smp_callin(void)
        notify_cpu_starting(curr_cpuid);
        numa_add_cpu(curr_cpuid);
        set_cpu_online(curr_cpuid, 1);
+       probe_vendor_features(curr_cpuid);
 
        /*
         * Remote TLB flushes are ignored while the CPU is offline, so emit
index 5ca567cef14204af3bf67fc9f396c36156498656..55389e7595f60d9a6f0c934cdbdeb9a88954e530 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <linux/syscalls.h>
 #include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
 #include <asm/hwprobe.h>
 #include <asm/sbi.h>
 #include <asm/switch_to.h>
@@ -117,6 +118,29 @@ static void hwprobe_arch_id(struct riscv_hwprobe *pair,
        pair->value = id;
 }
 
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+       int cpu;
+       u64 perf = -1ULL;
+
+       for_each_cpu(cpu, cpus) {
+               int this_perf = per_cpu(misaligned_access_speed, cpu);
+
+               if (perf == -1ULL)
+                       perf = this_perf;
+
+               if (perf != this_perf) {
+                       perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+                       break;
+               }
+       }
+
+       if (perf == -1ULL)
+               return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+       return perf;
+}
+
 static void hwprobe_one_pair(struct riscv_hwprobe *pair,
                             const struct cpumask *cpus)
 {
@@ -146,6 +170,10 @@ static void hwprobe_one_pair(struct riscv_hwprobe *pair,
 
                break;
 
+       case RISCV_HWPROBE_KEY_CPUPERF_0:
+               pair->value = hwprobe_misaligned(cpus);
+               break;
+
        /*
         * For forward compatibility, unknown keys don't fail the whole
         * call, but get their element key set to -1 and value set to 0