x86/fred: FRED entry/exit and dispatch code
authorH. Peter Anvin (Intel) <hpa@zytor.com>
Sat, 9 Dec 2023 21:42:14 +0000 (13:42 -0800)
committerBorislav Petkov (AMD) <bp@alien8.de>
Wed, 31 Jan 2024 21:02:31 +0000 (22:02 +0100)
The code to actually handle kernel and event entry/exit using
FRED. It is split up into two files thus:

 - entry_64_fred.S contains the actual entrypoints and exit code, and
   saves and restores registers.

 - entry_fred.c contains the two-level event dispatch code for FRED.
   The first-level dispatch is on the event type, and the second-level
   is on the event vector.

  [ bp: Fold in an allmodconfig clang build fix:
    https://lore.kernel.org/r/20240129064521.5168-1-xin3.li@intel.com
    and a CONFIG_IA32_EMULATION=n build fix:
    https://lore.kernel.org/r/20240127093728.1323-3-xin3.li@intel.com]

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Originally-by: Megha Dey <megha.dey@intel.com>
Co-developed-by: Xin Li <xin3.li@intel.com>
Signed-off-by: H. Peter Anvin (Intel) <hpa@zytor.com>
Signed-off-by: Xin Li <xin3.li@intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
Tested-by: Shan Kang <shan.kang@intel.com>
Link: https://lore.kernel.org/r/20231209214214.2932-1-xin3.li@intel.com
arch/x86/entry/Makefile
arch/x86/entry/entry_64_fred.S [new file with mode: 0644]
arch/x86/entry/entry_fred.c [new file with mode: 0644]
arch/x86/include/asm/asm-prototypes.h
arch/x86/include/asm/fred.h
arch/x86/include/asm/ia32.h

index ca2fe186994b0a653a525583e18b90b0715f3fa7..c93e7f5c2a065233a04637702d5df401553ed6cc 100644 (file)
@@ -18,6 +18,9 @@ obj-y                         += vdso/
 obj-y                          += vsyscall/
 
 obj-$(CONFIG_PREEMPTION)       += thunk_$(BITS).o
+CFLAGS_entry_fred.o            += -fno-stack-protector
+CFLAGS_REMOVE_entry_fred.o     += -pg $(CC_FLAGS_FTRACE)
+obj-$(CONFIG_X86_FRED)         += entry_64_fred.o entry_fred.o
+
 obj-$(CONFIG_IA32_EMULATION)   += entry_64_compat.o syscall_32.o
 obj-$(CONFIG_X86_X32_ABI)      += syscall_x32.o
-
diff --git a/arch/x86/entry/entry_64_fred.S b/arch/x86/entry/entry_64_fred.S
new file mode 100644 (file)
index 0000000..c1ddaf6
--- /dev/null
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The actual FRED entry points.
+ */
+
+#include <asm/fred.h>
+
+#include "calling.h"
+
+       .code64
+       .section .noinstr.text, "ax"
+
+.macro FRED_ENTER
+       UNWIND_HINT_END_OF_STACK
+       ENDBR
+       PUSH_AND_CLEAR_REGS
+       movq    %rsp, %rdi      /* %rdi -> pt_regs */
+.endm
+
+.macro FRED_EXIT
+       UNWIND_HINT_REGS
+       POP_REGS
+.endm
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * IA32_FRED_CONFIG & ~FFFH for events that occur in ring 3.
+ * Thus the FRED ring 3 entry point must be 4K page aligned.
+ */
+       .align 4096
+
+SYM_CODE_START_NOALIGN(asm_fred_entrypoint_user)
+       FRED_ENTER
+       call    fred_entry_from_user
+       FRED_EXIT
+       ERETU
+SYM_CODE_END(asm_fred_entrypoint_user)
+
+/*
+ * The new RIP value that FRED event delivery establishes is
+ * (IA32_FRED_CONFIG & ~FFFH) + 256 for events that occur in
+ * ring 0, i.e., asm_fred_entrypoint_user + 256.
+ */
+       .org asm_fred_entrypoint_user + 256, 0xcc
+SYM_CODE_START_NOALIGN(asm_fred_entrypoint_kernel)
+       FRED_ENTER
+       call    fred_entry_from_kernel
+       FRED_EXIT
+       ERETS
+SYM_CODE_END(asm_fred_entrypoint_kernel)
diff --git a/arch/x86/entry/entry_fred.c b/arch/x86/entry/entry_fred.c
new file mode 100644 (file)
index 0000000..125b623
--- /dev/null
@@ -0,0 +1,245 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * The FRED specific kernel/user entry functions which are invoked from
+ * assembly code and dispatch to the associated handlers.
+ */
+#include <linux/kernel.h>
+#include <linux/kdebug.h>
+#include <linux/nospec.h>
+
+#include <asm/desc.h>
+#include <asm/fred.h>
+#include <asm/idtentry.h>
+#include <asm/syscall.h>
+#include <asm/trapnr.h>
+#include <asm/traps.h>
+
+/* FRED EVENT_TYPE_OTHER vector numbers */
+#define FRED_SYSCALL                   1
+#define FRED_SYSENTER                  2
+
+static noinstr void fred_bad_type(struct pt_regs *regs, unsigned long error_code)
+{
+       irqentry_state_t irq_state = irqentry_nmi_enter(regs);
+
+       instrumentation_begin();
+
+       /* Panic on events from a high stack level */
+       if (regs->fred_cs.sl > 0) {
+               pr_emerg("PANIC: invalid or fatal FRED event; event type %u "
+                        "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+                        regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+                        fred_event_data(regs), regs->cs, regs->ip);
+               die("invalid or fatal FRED event", regs, regs->orig_ax);
+               panic("invalid or fatal FRED event");
+       } else {
+               unsigned long flags = oops_begin();
+               int sig = SIGKILL;
+
+               pr_alert("BUG: invalid or fatal FRED event; event type %u "
+                        "vector %u error 0x%lx aux 0x%lx at %04x:%016lx\n",
+                        regs->fred_ss.type, regs->fred_ss.vector, regs->orig_ax,
+                        fred_event_data(regs), regs->cs, regs->ip);
+
+               if (__die("Invalid or fatal FRED event", regs, regs->orig_ax))
+                       sig = 0;
+
+               oops_end(flags, regs, sig);
+       }
+
+       instrumentation_end();
+       irqentry_nmi_exit(regs, irq_state);
+}
+
+static noinstr void fred_intx(struct pt_regs *regs)
+{
+       switch (regs->fred_ss.vector) {
+       /* Opcode 0xcd, 0x3, NOT INT3 (opcode 0xcc) */
+       case X86_TRAP_BP:
+               return exc_int3(regs);
+
+       /* Opcode 0xcd, 0x4, NOT INTO (opcode 0xce) */
+       case X86_TRAP_OF:
+               return exc_overflow(regs);
+
+#ifdef CONFIG_IA32_EMULATION
+       /* INT80 */
+       case IA32_SYSCALL_VECTOR:
+               if (ia32_enabled())
+                       return int80_emulation(regs);
+               fallthrough;
+#endif
+
+       default:
+               return exc_general_protection(regs, 0);
+       }
+}
+
+static __always_inline void fred_other(struct pt_regs *regs)
+{
+       /* The compiler can fold these conditions into a single test */
+       if (likely(regs->fred_ss.vector == FRED_SYSCALL && regs->fred_ss.lm)) {
+               regs->orig_ax = regs->ax;
+               regs->ax = -ENOSYS;
+               do_syscall_64(regs, regs->orig_ax);
+               return;
+       } else if (ia32_enabled() &&
+                  likely(regs->fred_ss.vector == FRED_SYSENTER && !regs->fred_ss.lm)) {
+               regs->orig_ax = regs->ax;
+               regs->ax = -ENOSYS;
+               do_fast_syscall_32(regs);
+               return;
+       } else {
+               exc_invalid_op(regs);
+               return;
+       }
+}
+
+#define SYSVEC(_vector, _function) [_vector - FIRST_SYSTEM_VECTOR] = fred_sysvec_##_function
+
+static idtentry_t sysvec_table[NR_SYSTEM_VECTORS] __ro_after_init = {
+       SYSVEC(ERROR_APIC_VECTOR,               error_interrupt),
+       SYSVEC(SPURIOUS_APIC_VECTOR,            spurious_apic_interrupt),
+       SYSVEC(LOCAL_TIMER_VECTOR,              apic_timer_interrupt),
+       SYSVEC(X86_PLATFORM_IPI_VECTOR,         x86_platform_ipi),
+
+       SYSVEC(RESCHEDULE_VECTOR,               reschedule_ipi),
+       SYSVEC(CALL_FUNCTION_SINGLE_VECTOR,     call_function_single),
+       SYSVEC(CALL_FUNCTION_VECTOR,            call_function),
+       SYSVEC(REBOOT_VECTOR,                   reboot),
+
+       SYSVEC(THRESHOLD_APIC_VECTOR,           threshold),
+       SYSVEC(DEFERRED_ERROR_VECTOR,           deferred_error),
+       SYSVEC(THERMAL_APIC_VECTOR,             thermal),
+
+       SYSVEC(IRQ_WORK_VECTOR,                 irq_work),
+
+       SYSVEC(POSTED_INTR_VECTOR,              kvm_posted_intr_ipi),
+       SYSVEC(POSTED_INTR_WAKEUP_VECTOR,       kvm_posted_intr_wakeup_ipi),
+       SYSVEC(POSTED_INTR_NESTED_VECTOR,       kvm_posted_intr_nested_ipi),
+};
+
+static noinstr void fred_extint(struct pt_regs *regs)
+{
+       unsigned int vector = regs->fred_ss.vector;
+       unsigned int index = array_index_nospec(vector - FIRST_SYSTEM_VECTOR,
+                                               NR_SYSTEM_VECTORS);
+
+       if (WARN_ON_ONCE(vector < FIRST_EXTERNAL_VECTOR))
+               return;
+
+       if (likely(vector >= FIRST_SYSTEM_VECTOR)) {
+               irqentry_state_t state = irqentry_enter(regs);
+
+               instrumentation_begin();
+               sysvec_table[index](regs);
+               instrumentation_end();
+               irqentry_exit(regs, state);
+       } else {
+               common_interrupt(regs, vector);
+       }
+}
+
+static noinstr void fred_hwexc(struct pt_regs *regs, unsigned long error_code)
+{
+       /* Optimize for #PF. That's the only exception which matters performance wise */
+       if (likely(regs->fred_ss.vector == X86_TRAP_PF))
+               return exc_page_fault(regs, error_code);
+
+       switch (regs->fred_ss.vector) {
+       case X86_TRAP_DE: return exc_divide_error(regs);
+       case X86_TRAP_DB: return fred_exc_debug(regs);
+       case X86_TRAP_BR: return exc_bounds(regs);
+       case X86_TRAP_UD: return exc_invalid_op(regs);
+       case X86_TRAP_NM: return exc_device_not_available(regs);
+       case X86_TRAP_DF: return exc_double_fault(regs, error_code);
+       case X86_TRAP_TS: return exc_invalid_tss(regs, error_code);
+       case X86_TRAP_NP: return exc_segment_not_present(regs, error_code);
+       case X86_TRAP_SS: return exc_stack_segment(regs, error_code);
+       case X86_TRAP_GP: return exc_general_protection(regs, error_code);
+       case X86_TRAP_MF: return exc_coprocessor_error(regs);
+       case X86_TRAP_AC: return exc_alignment_check(regs, error_code);
+       case X86_TRAP_XF: return exc_simd_coprocessor_error(regs);
+
+#ifdef CONFIG_X86_MCE
+       case X86_TRAP_MC: return fred_exc_machine_check(regs);
+#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+       case X86_TRAP_VE: return exc_virtualization_exception(regs);
+#endif
+#ifdef CONFIG_X86_CET
+       case X86_TRAP_CP: return exc_control_protection(regs, error_code);
+#endif
+       default: return fred_bad_type(regs, error_code);
+       }
+
+}
+
+static noinstr void fred_swexc(struct pt_regs *regs, unsigned long error_code)
+{
+       switch (regs->fred_ss.vector) {
+       case X86_TRAP_BP: return exc_int3(regs);
+       case X86_TRAP_OF: return exc_overflow(regs);
+       default: return fred_bad_type(regs, error_code);
+       }
+}
+
+__visible noinstr void fred_entry_from_user(struct pt_regs *regs)
+{
+       unsigned long error_code = regs->orig_ax;
+
+       /* Invalidate orig_ax so that syscall_get_nr() works correctly */
+       regs->orig_ax = -1;
+
+       switch (regs->fred_ss.type) {
+       case EVENT_TYPE_EXTINT:
+               return fred_extint(regs);
+       case EVENT_TYPE_NMI:
+               if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
+                       return fred_exc_nmi(regs);
+               break;
+       case EVENT_TYPE_HWEXC:
+               return fred_hwexc(regs, error_code);
+       case EVENT_TYPE_SWINT:
+               return fred_intx(regs);
+       case EVENT_TYPE_PRIV_SWEXC:
+               if (likely(regs->fred_ss.vector == X86_TRAP_DB))
+                       return fred_exc_debug(regs);
+               break;
+       case EVENT_TYPE_SWEXC:
+               return fred_swexc(regs, error_code);
+       case EVENT_TYPE_OTHER:
+               return fred_other(regs);
+       default: break;
+       }
+
+       return fred_bad_type(regs, error_code);
+}
+
+__visible noinstr void fred_entry_from_kernel(struct pt_regs *regs)
+{
+       unsigned long error_code = regs->orig_ax;
+
+       /* Invalidate orig_ax so that syscall_get_nr() works correctly */
+       regs->orig_ax = -1;
+
+       switch (regs->fred_ss.type) {
+       case EVENT_TYPE_EXTINT:
+               return fred_extint(regs);
+       case EVENT_TYPE_NMI:
+               if (likely(regs->fred_ss.vector == X86_TRAP_NMI))
+                       return fred_exc_nmi(regs);
+               break;
+       case EVENT_TYPE_HWEXC:
+               return fred_hwexc(regs, error_code);
+       case EVENT_TYPE_PRIV_SWEXC:
+               if (likely(regs->fred_ss.vector == X86_TRAP_DB))
+                       return fred_exc_debug(regs);
+               break;
+       case EVENT_TYPE_SWEXC:
+               return fred_swexc(regs, error_code);
+       default: break;
+       }
+
+       return fred_bad_type(regs, error_code);
+}
index b1a98fa38828e2589416fe4445ed001831009bb0..076bf8dee70264f63d2a4842bca4be9c5f587f7e 100644 (file)
@@ -12,6 +12,7 @@
 #include <asm/special_insns.h>
 #include <asm/preempt.h>
 #include <asm/asm.h>
+#include <asm/fred.h>
 #include <asm/gsseg.h>
 
 #ifndef CONFIG_X86_CMPXCHG64
index f514fdb5a39f735266e479c31a126d3b995fa887..16a64ffecbf8d5512a7c3247b81900e2f5523dd3 100644 (file)
@@ -60,6 +60,12 @@ static __always_inline unsigned long fred_event_data(struct pt_regs *regs)
        return fred_info(regs)->edata;
 }
 
+void asm_fred_entrypoint_user(void);
+void asm_fred_entrypoint_kernel(void);
+
+__visible void fred_entry_from_user(struct pt_regs *regs);
+__visible void fred_entry_from_kernel(struct pt_regs *regs);
+
 #else /* CONFIG_X86_FRED */
 static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
 #endif /* CONFIG_X86_FRED */
index c7ef6ea2fa993c78864f460dace8ef86ad246b16..4212c00c9708d49253d8ebe3699578f14476408a 100644 (file)
@@ -69,7 +69,7 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm);
 
 extern bool __ia32_enabled;
 
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
 {
        return __ia32_enabled;
 }
@@ -81,7 +81,7 @@ static inline void ia32_disable(void)
 
 #else /* !CONFIG_IA32_EMULATION */
 
-static inline bool ia32_enabled(void)
+static __always_inline bool ia32_enabled(void)
 {
        return IS_ENABLED(CONFIG_X86_32);
 }