s390: add support for BEAR enhancement facility
authorSven Schnelle <svens@linux.ibm.com>
Wed, 7 Apr 2021 07:20:17 +0000 (09:20 +0200)
committerVasily Gorbik <gor@linux.ibm.com>
Tue, 26 Oct 2021 13:21:29 +0000 (15:21 +0200)
The Breaking-Event-Address-Register (BEAR) stores the address of the
last breaking event instruction. Breaking events are usually instructions
that change the program flow - for example branches, and instructions
that modify the address in the PSW like lpswe. This is useful for debugging
wild branches, because one could easily figure out where the wild branch
was originating from.

What is problematic is that lpswe is considered a breaking event, and
therefore overwrites BEAR on kernel exit. The BEAR enhancement facility
adds new instructions that allow to save/restore BEAR and also an lpswey
instruction that doesn't cause a breaking event. So we can save BEAR on
kernel entry and restore it on exit to user space.

Signed-off-by: Sven Schnelle <svens@linux.ibm.com>
Reviewed-by: Heiko Carstens <hca@linux.ibm.com>
Signed-off-by: Vasily Gorbik <gor@linux.ibm.com>
arch/s390/include/asm/cpu.h
arch/s390/include/asm/lowcore.h
arch/s390/kernel/asm-offsets.c
arch/s390/kernel/entry.S
arch/s390/kernel/irq.c
arch/s390/kernel/process.c
arch/s390/kernel/setup.c
arch/s390/kernel/syscall.c
arch/s390/kernel/traps.c
arch/s390/mm/dump_pagetables.c
arch/s390/mm/vmem.c

index 62228a884e0632b6f0bd7d83e266862bb34e78e9..26c710cd34859f5452daedbeea3b1557a5956208 100644 (file)
@@ -12,6 +12,7 @@
 #ifndef __ASSEMBLY__
 
 #include <linux/types.h>
+#include <linux/jump_label.h>
 
 struct cpuid
 {
@@ -21,5 +22,7 @@ struct cpuid
        unsigned int unused  : 16;
 } __attribute__ ((packed, aligned(8)));
 
+DECLARE_STATIC_KEY_FALSE(cpu_has_bear);
+
 #endif /* __ASSEMBLY__ */
 #endif /* _ASM_S390_CPU_H */
index 1129a1e93e80d6d2b2ab47a2d6efd5421d7b4395..1262f5003acfff7b912d96d2350a40f5cff997cf 100644 (file)
@@ -93,9 +93,10 @@ struct lowcore {
        psw_t   return_psw;                     /* 0x0290 */
        psw_t   return_mcck_psw;                /* 0x02a0 */
 
+       __u64   last_break;                     /* 0x02b0 */
+
        /* CPU accounting and timing values. */
-       __u64   sys_enter_timer;                /* 0x02b0 */
-       __u8    pad_0x02b8[0x02c0-0x02b8];      /* 0x02b8 */
+       __u64   sys_enter_timer;                /* 0x02b8 */
        __u64   mcck_enter_timer;               /* 0x02c0 */
        __u64   exit_timer;                     /* 0x02c8 */
        __u64   user_timer;                     /* 0x02d0 */
@@ -188,7 +189,7 @@ struct lowcore {
        __u32   tod_progreg_save_area;          /* 0x1324 */
        __u32   cpu_timer_save_area[2];         /* 0x1328 */
        __u32   clock_comp_save_area[2];        /* 0x1330 */
-       __u8    pad_0x1338[0x1340-0x1338];      /* 0x1338 */
+       __u64   last_break_save_area;           /* 0x1338 */
        __u32   access_regs_save_area[16];      /* 0x1340 */
        __u64   cregs_save_area[16];            /* 0x1380 */
        __u8    pad_0x1400[0x1800-0x1400];      /* 0x1400 */
index 44eb79e4299e1d721fb673c7d72e9e2ff47f6fcf..28177e4f52cc3acaf9b51b73207d4200e3e0403d 100644 (file)
@@ -35,6 +35,7 @@ int main(void)
        OFFSET(__PT_ORIG_GPR2, pt_regs, orig_gpr2);
        OFFSET(__PT_FLAGS, pt_regs, flags);
        OFFSET(__PT_CR1, pt_regs, cr1);
+       OFFSET(__PT_LAST_BREAK, pt_regs, last_break);
        DEFINE(__PT_SIZE, sizeof(struct pt_regs));
        BLANK();
        /* stack_frame offsets */
@@ -127,6 +128,7 @@ int main(void)
        OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count);
        OFFSET(__LC_GMAP, lowcore, gmap);
        OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
+       OFFSET(__LC_LAST_BREAK, lowcore, last_break);
        /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
        OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
        /* hardware defined lowcore locations 0x1000 - 0x18ff */
@@ -140,6 +142,7 @@ int main(void)
        OFFSET(__LC_TOD_PROGREG_SAVE_AREA, lowcore, tod_progreg_save_area);
        OFFSET(__LC_CPU_TIMER_SAVE_AREA, lowcore, cpu_timer_save_area);
        OFFSET(__LC_CLOCK_COMP_SAVE_AREA, lowcore, clock_comp_save_area);
+       OFFSET(__LC_LAST_BREAK_SAVE_AREA, lowcore, last_break_save_area);
        OFFSET(__LC_AREGS_SAVE_AREA, lowcore, access_regs_save_area);
        OFFSET(__LC_CREGS_SAVE_AREA, lowcore, cregs_save_area);
        OFFSET(__LC_PGM_TDB, lowcore, pgm_tdb);
index 4c9b967290ae059ae4bb486e05946aa6792842f2..01bae1d51113b2409a78549f63266327447d0870 100644 (file)
@@ -52,6 +52,22 @@ STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
 
 _LPP_OFFSET    = __LC_LPP
 
+       .macro STBEAR address
+       ALTERNATIVE "", ".insn  s,0xb2010000,\address", 193
+       .endm
+
+       .macro LBEAR address
+       ALTERNATIVE "", ".insn  s,0xb2000000,\address", 193
+       .endm
+
+       .macro LPSWEY address,lpswe
+       ALTERNATIVE "b \lpswe", ".insn siy,0xeb0000000071,\address,0", 193
+       .endm
+
+       .macro MBEAR reg
+       ALTERNATIVE "", __stringify(mvc __PT_LAST_BREAK(8,\reg),__LC_LAST_BREAK), 193
+       .endm
+
        .macro  CHECK_STACK savearea
 #ifdef CONFIG_CHECK_STACK
        tml     %r15,STACK_SIZE - CONFIG_STACK_GUARD
@@ -302,6 +318,7 @@ ENTRY(system_call)
        BPOFF
        lghi    %r14,0
 .Lsysc_per:
+       STBEAR  __LC_LAST_BREAK
        lctlg   %c1,%c1,__LC_KERNEL_ASCE
        lg      %r12,__LC_CURRENT
        lg      %r15,__LC_KERNEL_STACK
@@ -321,14 +338,16 @@ ENTRY(system_call)
        xgr     %r11,%r11
        la      %r2,STACK_FRAME_OVERHEAD(%r15)  # pointer to pt_regs
        mvc     __PT_R8(64,%r2),__LC_SAVE_AREA_SYNC
+       MBEAR   %r2
        lgr     %r3,%r14
        brasl   %r14,__do_syscall
        lctlg   %c1,%c1,__LC_USER_ASCE
        mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
        BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+       LBEAR   STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
        lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
        stpt    __LC_EXIT_TIMER
-       b       __LC_RETURN_LPSWE
+       LPSWEY  __LC_RETURN_PSW,__LC_RETURN_LPSWE
 ENDPROC(system_call)
 
 #
@@ -340,9 +359,10 @@ ENTRY(ret_from_fork)
        lctlg   %c1,%c1,__LC_USER_ASCE
        mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
        BPEXIT __TI_flags(%r12),_TIF_ISOLATE_BP
+       LBEAR   STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
        lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
        stpt    __LC_EXIT_TIMER
-       b       __LC_RETURN_LPSWE
+       LPSWEY  __LC_RETURN_PSW,__LC_RETURN_LPSWE
 ENDPROC(ret_from_fork)
 
 /*
@@ -382,6 +402,7 @@ ENTRY(pgm_check_handler)
        xc      __SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
        stmg    %r0,%r7,__PT_R0(%r11)
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_SYNC
+       mvc     __PT_LAST_BREAK(8,%r11),__LC_PGM_LAST_BREAK
        stmg    %r8,%r9,__PT_PSW(%r11)
 
        # clear user controlled registers to prevent speculative use
@@ -401,8 +422,9 @@ ENTRY(pgm_check_handler)
        stpt    __LC_EXIT_TIMER
 .Lpgm_exit_kernel:
        mvc     __LC_RETURN_PSW(16),STACK_FRAME_OVERHEAD+__PT_PSW(%r15)
+       LBEAR   STACK_FRAME_OVERHEAD+__PT_LAST_BREAK(%r15)
        lmg     %r0,%r15,STACK_FRAME_OVERHEAD+__PT_R0(%r15)
-       b       __LC_RETURN_LPSWE
+       LPSWEY  __LC_RETURN_PSW,__LC_RETURN_LPSWE
 
 #
 # single stepped system call
@@ -412,7 +434,8 @@ ENTRY(pgm_check_handler)
        larl    %r14,.Lsysc_per
        stg     %r14,__LC_RETURN_PSW+8
        lghi    %r14,1
-       lpswe   __LC_RETURN_PSW         # branch to .Lsysc_per
+       LBEAR   __LC_PGM_LAST_BREAK
+       LPSWEY  __LC_RETURN_PSW,__LC_RETURN_LPSWE # branch to .Lsysc_per
 ENDPROC(pgm_check_handler)
 
 /*
@@ -422,6 +445,7 @@ ENDPROC(pgm_check_handler)
 ENTRY(\name)
        STCK    __LC_INT_CLOCK
        stpt    __LC_SYS_ENTER_TIMER
+       STBEAR  __LC_LAST_BREAK
        BPOFF
        stmg    %r8,%r15,__LC_SAVE_AREA_ASYNC
        lg      %r12,__LC_CURRENT
@@ -453,6 +477,7 @@ ENTRY(\name)
        xgr     %r10,%r10
        xc      __PT_FLAGS(8,%r11),__PT_FLAGS(%r11)
        mvc     __PT_R8(64,%r11),__LC_SAVE_AREA_ASYNC
+       MBEAR   %r11
        stmg    %r8,%r9,__PT_PSW(%r11)
        tm      %r8,0x0001              # coming from user space?
        jno     1f
@@ -465,8 +490,9 @@ ENTRY(\name)
        lctlg   %c1,%c1,__LC_USER_ASCE
        BPEXIT  __TI_flags(%r12),_TIF_ISOLATE_BP
        stpt    __LC_EXIT_TIMER
-2:     lmg     %r0,%r15,__PT_R0(%r11)
-       b       __LC_RETURN_LPSWE
+2:     LBEAR   __PT_LAST_BREAK(%r11)
+       lmg     %r0,%r15,__PT_R0(%r11)
+       LPSWEY  __LC_RETURN_PSW,__LC_RETURN_LPSWE
 ENDPROC(\name)
 .endm
 
@@ -505,6 +531,7 @@ ENTRY(mcck_int_handler)
        BPOFF
        la      %r1,4095                # validate r1
        spt     __LC_CPU_TIMER_SAVE_AREA-4095(%r1)      # validate cpu timer
+       LBEAR   __LC_LAST_BREAK_SAVE_AREA-4095(%r1)             # validate bear
        lmg     %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# validate gprs
        lg      %r12,__LC_CURRENT
        lmg     %r8,%r9,__LC_MCK_OLD_PSW
@@ -591,8 +618,10 @@ ENTRY(mcck_int_handler)
        jno     0f
        BPEXIT  __TI_flags(%r12),_TIF_ISOLATE_BP
        stpt    __LC_EXIT_TIMER
-0:     lmg     %r11,%r15,__PT_R11(%r11)
-       b       __LC_RETURN_MCCK_LPSWE
+0:     ALTERNATIVE "", __stringify(lghi %r12,__LC_LAST_BREAK_SAVE_AREA),193
+       LBEAR   0(%r12)
+       lmg     %r11,%r15,__PT_R11(%r11)
+       LPSWEY  __LC_RETURN_MCCK_PSW,__LC_RETURN_MCCK_LPSWE
 
 .Lmcck_panic:
        /*
index 3a3145c4a3ba4dfd612d1f8ecb1adbd701a82a4b..0df83ecaa2e0c0c6e94199fe33b4f6db64baa798 100644 (file)
@@ -140,8 +140,11 @@ void noinstr do_io_irq(struct pt_regs *regs)
 
        irq_enter();
 
-       if (user_mode(regs))
+       if (user_mode(regs)) {
                update_timer_sys();
+               if (static_branch_likely(&cpu_has_bear))
+                       current->thread.last_break = regs->last_break;
+       }
 
        from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
        if (from_idle)
@@ -171,8 +174,11 @@ void noinstr do_ext_irq(struct pt_regs *regs)
 
        irq_enter();
 
-       if (user_mode(regs))
+       if (user_mode(regs)) {
                update_timer_sys();
+               if (static_branch_likely(&cpu_has_bear))
+                       current->thread.last_break = regs->last_break;
+       }
 
        regs->int_code = S390_lowcore.ext_int_code_addr;
        regs->int_parm = S390_lowcore.ext_params;
index 350e94d0cac23a4645ca5fcb7b63b46e662f55c2..e6b9b4753fd3f69094be71b37bc638485712e0df 100644 (file)
@@ -141,7 +141,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
                frame->childregs.gprs[10] = arg;
                frame->childregs.gprs[11] = (unsigned long)do_exit;
                frame->childregs.orig_gpr2 = -1;
-
+               frame->childregs.last_break = 1;
                return 0;
        }
        frame->childregs = *current_pt_regs();
index 860a4e6ebaf953d883b26a4f2aa04e76a9131745..e738a45057aca572b50708ed704291f271dad6f0 100644 (file)
@@ -174,6 +174,8 @@ unsigned long MODULES_END;
 struct lowcore *lowcore_ptr[NR_CPUS];
 EXPORT_SYMBOL(lowcore_ptr);
 
+DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
+
 /*
  * The Write Back bit position in the physaddr is given by the SLPC PCI.
  * Leaving the mask zero always uses write through which is safe
@@ -1038,6 +1040,9 @@ void __init setup_arch(char **cmdline_p)
        smp_detect_cpus();
        topology_init_early();
 
+       if (test_facility(193))
+               static_branch_enable(&cpu_has_bear);
+
        /*
         * Create kernel page tables and switch to virtual addressing.
         */
index 8fe2d23b64f439fd92400b8cd04ecfac68f95bf5..dc2355c623d6ea1edf86c0e5f137db89bf9b07b7 100644 (file)
@@ -154,6 +154,8 @@ void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
        regs->psw = S390_lowcore.svc_old_psw;
        regs->int_code = S390_lowcore.svc_int_code;
        update_timer_sys();
+       if (static_branch_likely(&cpu_has_bear))
+               current->thread.last_break = regs->last_break;
 
        local_irq_enable();
        regs->orig_gpr2 = regs->gprs[2];
index d32a6ee7b0dd30b344075384d2b50503bac47b09..6c6f7dcce1a510b67b772ed5798e9481767e3db0 100644 (file)
@@ -300,7 +300,6 @@ static void (*pgm_check_table[128])(struct pt_regs *regs);
 
 void noinstr __do_pgm_check(struct pt_regs *regs)
 {
-       unsigned long last_break = S390_lowcore.pgm_last_break;
        unsigned int trapnr;
        irqentry_state_t state;
 
@@ -311,10 +310,11 @@ void noinstr __do_pgm_check(struct pt_regs *regs)
 
        if (user_mode(regs)) {
                update_timer_sys();
-               if (last_break < 4096)
-                       last_break = 1;
-               current->thread.last_break = last_break;
-               regs->last_break = last_break;
+               if (!static_branch_likely(&cpu_has_bear)) {
+                       if (regs->last_break < 4096)
+                               regs->last_break = 1;
+               }
+               current->thread.last_break = regs->last_break;
        }
 
        if (S390_lowcore.pgm_code & 0x0200) {
index 0b0c8c2849530c4f0294068c8325c57db77f9dba..9f9af5298dd6e33bdd7f4f698ed8e81ac074d29f 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/kasan.h>
 #include <asm/ptdump.h>
 #include <asm/kasan.h>
+#include <asm/nospec-branch.h>
 #include <asm/sections.h>
 
 static unsigned long max_addr;
@@ -116,8 +117,13 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr)
                return;
        if (st->current_prot & _PAGE_NOEXEC)
                return;
-       /* The first lowcore page is currently still W+X. */
-       if (addr == PAGE_SIZE)
+       /*
+        * The first lowcore page is W+X if spectre mitigations are using
+        * trampolines or the BEAR enhancements facility is not installed,
+        * in which case we have two lpswe instructions in lowcore that need
+        * to be executable.
+        */
+       if (addr == PAGE_SIZE && (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)))
                return;
        WARN_ONCE(1, "s390/mm: Found insecure W+X mapping at address %pS\n",
                  (void *)st->start_address);
@@ -203,7 +209,9 @@ void ptdump_check_wx(void)
        if (st.wx_pages)
                pr_warn("Checked W+X mappings: FAILED, %lu W+X pages found\n", st.wx_pages);
        else
-               pr_info("Checked W+X mappings: passed, no unexpected W+X pages found\n");
+               pr_info("Checked W+X mappings: passed, no %sW+X pages found\n",
+                       (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) ?
+                       "unexpected " : "");
 }
 #endif /* CONFIG_DEBUG_WX */
 
index 2b1c6d916cf9c651325da83569df9830f5aed0db..7d9705eeb02f1f530dbebdf4b1d7c9351d2e4bf3 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/hugetlb.h>
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
+#include <asm/nospec-branch.h>
 #include <asm/pgalloc.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
@@ -584,8 +585,13 @@ void __init vmem_map_init(void)
        __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
                     SET_MEMORY_RO | SET_MEMORY_X);
 
-       /* we need lowcore executable for our LPSWE instructions */
-       set_memory_x(0, 1);
+       if (nospec_uses_trampoline() || !static_key_enabled(&cpu_has_bear)) {
+               /*
+                * Lowcore must be executable for LPSWE
+                * and expoline trampoline branch instructions.
+                */
+               set_memory_x(0, 1);
+       }
 
        pr_info("Write protected kernel read-only data: %luk\n",
                (unsigned long)(__end_rodata - _stext) >> 10);