CFLAGS_sclp_early_core.o += -I$(srctree)/drivers/s390/char
-obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
+obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o vmem.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
obj-y += version.o pgm_check_info.o ctype.o ipl_data.o machine_kexec_reloc.o
obj-$(findstring y, $(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) $(CONFIG_PGSTE)) += uv.o
struct vmlinux_info {
unsigned long default_lma;
- void (*entry)(void);
+ unsigned long entry;
unsigned long image_size; /* does not include .bss */
unsigned long bss_size; /* uncompressed image .bss size */
unsigned long bootdata_off;
unsigned long rela_dyn_start;
unsigned long rela_dyn_end;
unsigned long amode31_size;
+ unsigned long init_mm_off;
+ unsigned long swapper_pg_dir_off;
+ unsigned long invalid_pg_dir_off;
};
void startup_kernel(void);
void sclp_early_setup_buffer(void);
void print_pgm_check_info(void);
unsigned long get_random_base(unsigned long safe_addr);
+void setup_vmem(unsigned long online_end, unsigned long asce_limit);
void __printf(1, 2) decompressor_printk(const char *fmt, ...);
void error(char *m);
#include <asm/diag.h>
#include <asm/uv.h>
#include <asm/abs_lowcore.h>
+#include <asm/mem_detect.h>
#include "decompressor.h"
#include "boot.h"
#include "uv.h"
#endif
}
-static void setup_kernel_memory_layout(void)
+static unsigned long setup_kernel_memory_layout(void)
{
unsigned long vmemmap_start;
+ unsigned long asce_limit;
unsigned long rte_size;
unsigned long pages;
unsigned long vmax;
vmalloc_size > _REGION2_SIZE ||
vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN >
_REGION2_SIZE) {
- vmax = _REGION1_SIZE;
+ asce_limit = _REGION1_SIZE;
rte_size = _REGION2_SIZE;
} else {
- vmax = _REGION2_SIZE;
+ asce_limit = _REGION2_SIZE;
rte_size = _REGION3_SIZE;
}
/*
* secure storage limit, so that any vmalloc allocation
* we do could be used to back secure guest storage.
*/
- vmax = adjust_to_uv_max(vmax);
+ vmax = adjust_to_uv_max(asce_limit);
#ifdef CONFIG_KASAN
/* force vmalloc and modules below kasan shadow */
vmax = min(vmax, KASAN_SHADOW_START);
/* make sure vmemmap doesn't overlay with vmalloc area */
VMALLOC_START = max(vmemmap_start + vmemmap_size, VMALLOC_START);
vmemmap = (struct page *)vmemmap_start;
+
+ return asce_limit;
}
/*
vmlinux.rela_dyn_start += offset;
vmlinux.rela_dyn_end += offset;
vmlinux.dynsym_start += offset;
+ vmlinux.init_mm_off += offset;
+ vmlinux.swapper_pg_dir_off += offset;
+ vmlinux.invalid_pg_dir_off += offset;
}
static unsigned long reserve_amode31(unsigned long safe_addr)
{
unsigned long random_lma;
unsigned long safe_addr;
+ unsigned long asce_limit;
+ unsigned long online_end;
void *img;
+ psw_t psw;
detect_facilities();
sanitize_prot_virt_host();
setup_ident_map_size(detect_memory());
setup_vmalloc_size();
- setup_kernel_memory_layout();
+ asce_limit = setup_kernel_memory_layout();
+ online_end = min(get_mem_detect_end(), ident_map_size);
if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_enabled) {
random_lma = get_random_base(safe_addr);
} else if (__kaslr_offset)
memcpy((void *)vmlinux.default_lma, img, vmlinux.image_size);
+ /*
+ * The order of the following operations is important:
+ *
+ * - handle_relocs() must follow clear_bss_section() to establish static
+ * memory references to data in .bss to be used by setup_vmem()
+ * (i.e init_mm.pgd)
+ *
+ * - setup_vmem() must follow handle_relocs() to be able using
+ * static memory references to data in .bss (i.e init_mm.pgd)
+ *
+ * - copy_bootdata() must follow setup_vmem() to propagate changes to
+ * bootdata made by setup_vmem()
+ */
clear_bss_section();
- copy_bootdata();
handle_relocs(__kaslr_offset);
+ setup_vmem(online_end, asce_limit);
+ copy_bootdata();
if (__kaslr_offset) {
/*
if (IS_ENABLED(CONFIG_KERNEL_UNCOMPRESSED))
memset(img, 0, vmlinux.image_size);
}
- vmlinux.entry();
+
+ /*
+ * Jump to the decompressed kernel entry point and switch DAT mode on.
+ */
+ psw.addr = vmlinux.entry;
+ psw.mask = PSW_KERNEL_BITS;
+ __load_psw(psw);
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/sched/task.h>
+#include <linux/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/facility.h>
+#include <asm/sections.h>
+#include <asm/mem_detect.h>
+#include "decompressor.h"
+#include "boot.h"
+
+#define init_mm (*(struct mm_struct *)vmlinux.init_mm_off)
+#define swapper_pg_dir vmlinux.swapper_pg_dir_off
+#define invalid_pg_dir vmlinux.invalid_pg_dir_off
+
+unsigned long __bootdata_preserved(s390_invalid_asce);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
+
+static void boot_check_oom(void)
+{
+ if (pgalloc_pos < pgalloc_low)
+ error("out of memory on boot\n");
+}
+
+static void pgtable_populate_begin(unsigned long online_end)
+{
+ unsigned long initrd_end;
+ unsigned long kernel_end;
+
+ kernel_end = vmlinux.default_lma + vmlinux.image_size + vmlinux.bss_size;
+ pgalloc_low = round_up(kernel_end, PAGE_SIZE);
+ if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
+ initrd_end = round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
+ pgalloc_low = max(pgalloc_low, initrd_end);
+ }
+
+ pgalloc_end = round_down(online_end, PAGE_SIZE);
+ pgalloc_pos = pgalloc_end;
+
+ boot_check_oom();
+}
+
+static void *boot_alloc_pages(unsigned int order)
+{
+ unsigned long size = PAGE_SIZE << order;
+
+ pgalloc_pos -= size;
+ pgalloc_pos = round_down(pgalloc_pos, size);
+
+ boot_check_oom();
+
+ return (void *)pgalloc_pos;
+}
+
+static void *boot_crst_alloc(unsigned long val)
+{
+ unsigned long *table;
+
+ table = boot_alloc_pages(CRST_ALLOC_ORDER);
+ if (table)
+ crst_table_init(table, val);
+ return table;
+}
+
+static pte_t *boot_pte_alloc(void)
+{
+ static void *pte_leftover;
+ pte_t *pte;
+
+ BUILD_BUG_ON(_PAGE_TABLE_SIZE * 2 != PAGE_SIZE);
+
+ if (!pte_leftover) {
+ pte_leftover = boot_alloc_pages(0);
+ pte = pte_leftover + _PAGE_TABLE_SIZE;
+ } else {
+ pte = pte_leftover;
+ pte_leftover = NULL;
+ }
+ memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
+ return pte;
+}
+
+static bool can_large_pud(pud_t *pu_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat2 &&
+ IS_ALIGNED(addr, PUD_SIZE) && (end - addr) >= PUD_SIZE;
+}
+
+static bool can_large_pmd(pmd_t *pm_dir, unsigned long addr, unsigned long end)
+{
+ return machine.has_edat1 &&
+ IS_ALIGNED(addr, PMD_SIZE) && (end - addr) >= PMD_SIZE;
+}
+
+static void pgtable_pte_populate(pmd_t *pmd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pte_t *pte, entry;
+
+ pte = pte_offset_kernel(pmd, addr);
+ for (; addr < end; addr += PAGE_SIZE, pte++) {
+ if (pte_none(*pte)) {
+ entry = __pte(__pa(addr));
+ entry = set_pte_bit(entry, PAGE_KERNEL_EXEC);
+ set_pte(pte, entry);
+ }
+ }
+}
+
+static void pgtable_pmd_populate(pud_t *pud, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pmd_t *pmd, entry;
+ pte_t *pte;
+
+ pmd = pmd_offset(pud, addr);
+ for (; addr < end; addr = next, pmd++) {
+ next = pmd_addr_end(addr, end);
+ if (pmd_none(*pmd)) {
+ if (can_large_pmd(pmd, addr, next)) {
+ entry = __pmd(__pa(addr));
+ entry = set_pmd_bit(entry, SEGMENT_KERNEL_EXEC);
+ set_pmd(pmd, entry);
+ continue;
+ }
+ pte = boot_pte_alloc();
+ pmd_populate(&init_mm, pmd, pte);
+ } else if (pmd_large(*pmd)) {
+ continue;
+ }
+ pgtable_pte_populate(pmd, addr, next);
+ }
+}
+
+static void pgtable_pud_populate(p4d_t *p4d, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pud_t *pud, entry;
+ pmd_t *pmd;
+
+ pud = pud_offset(p4d, addr);
+ for (; addr < end; addr = next, pud++) {
+ next = pud_addr_end(addr, end);
+ if (pud_none(*pud)) {
+ if (can_large_pud(pud, addr, next)) {
+ entry = __pud(__pa(addr));
+ entry = set_pud_bit(entry, REGION3_KERNEL_EXEC);
+ set_pud(pud, entry);
+ continue;
+ }
+ pmd = boot_crst_alloc(_SEGMENT_ENTRY_EMPTY);
+ pud_populate(&init_mm, pud, pmd);
+ } else if (pud_large(*pud)) {
+ continue;
+ }
+ pgtable_pmd_populate(pud, addr, next);
+ }
+}
+
+static void pgtable_p4d_populate(pgd_t *pgd, unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ p4d_t *p4d;
+ pud_t *pud;
+
+ p4d = p4d_offset(pgd, addr);
+ for (; addr < end; addr = next, p4d++) {
+ next = p4d_addr_end(addr, end);
+ if (p4d_none(*p4d)) {
+ pud = boot_crst_alloc(_REGION3_ENTRY_EMPTY);
+ p4d_populate(&init_mm, p4d, pud);
+ }
+ pgtable_pud_populate(p4d, addr, next);
+ }
+}
+
+static void pgtable_populate(unsigned long addr, unsigned long end)
+{
+ unsigned long next;
+ pgd_t *pgd;
+ p4d_t *p4d;
+
+ pgd = pgd_offset(&init_mm, addr);
+ for (; addr < end; addr = next, pgd++) {
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(*pgd)) {
+ p4d = boot_crst_alloc(_REGION2_ENTRY_EMPTY);
+ pgd_populate(&init_mm, pgd, p4d);
+ }
+ pgtable_p4d_populate(pgd, addr, next);
+ }
+}
+
+/*
+ * The pgtables are located in the range [pgalloc_pos, pgalloc_end).
+ * That range must stay intact and is later reserved in the memblock.
+ * Therefore pgtable_populate(pgalloc_pos, pgalloc_end) is needed to
+ * finalize pgalloc_pos pointer. However that call can decrease the
+ * value of pgalloc_pos pointer itself. Therefore, pgtable_populate()
+ * needs to be called repeatedly until pgtables are complete and
+ * pgalloc_pos does not grow left anymore.
+ */
+static void pgtable_populate_end(void)
+{
+ unsigned long pgalloc_end_curr = pgalloc_end;
+ unsigned long pgalloc_pos_prev;
+
+ do {
+ pgalloc_pos_prev = pgalloc_pos;
+ pgtable_populate(pgalloc_pos, pgalloc_end_curr);
+ pgalloc_end_curr = pgalloc_pos_prev;
+ } while (pgalloc_pos < pgalloc_pos_prev);
+}
+
+void setup_vmem(unsigned long online_end, unsigned long asce_limit)
+{
+ unsigned long asce_type;
+ unsigned long asce_bits;
+
+ if (asce_limit == _REGION1_SIZE) {
+ asce_type = _REGION2_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
+ } else {
+ asce_type = _REGION3_ENTRY_EMPTY;
+ asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ }
+ s390_invalid_asce = invalid_pg_dir | _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+
+ crst_table_init((unsigned long *)swapper_pg_dir, asce_type);
+ crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
+
+ /*
+ * To allow prefixing the lowcore must be mapped with 4KB pages.
+ * To prevent creation of a large page at address 0 first map
+ * the lowcore and create the identity mapping only afterwards.
+ *
+ * No further pgtable_populate() calls are allowed after the value
+ * of pgalloc_pos finalized with a call to pgtable_populate_end().
+ */
+ pgtable_populate_begin(online_end);
+ pgtable_populate(0, sizeof(struct lowcore));
+ pgtable_populate(0, online_end);
+ pgtable_populate_end();
+
+ S390_lowcore.kernel_asce = swapper_pg_dir | asce_bits;
+ S390_lowcore.user_asce = s390_invalid_asce;
+
+ __ctl_load(S390_lowcore.kernel_asce, 1, 1);
+ __ctl_load(S390_lowcore.user_asce, 7, 7);
+ __ctl_load(S390_lowcore.kernel_asce, 13, 13);
+
+ init_mm.context.asce = S390_lowcore.kernel_asce;
+}
#define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
extern void kasan_early_init(void);
-extern void kasan_copy_shadow_mapping(void);
-extern void kasan_free_early_identity(void);
/*
* Estimate kasan memory requirements, which it will reserve
}
#else
static inline void kasan_early_init(void) { }
-static inline void kasan_copy_shadow_mapping(void) { }
-static inline void kasan_free_early_identity(void) { }
static inline unsigned long kasan_estimate_memory_needs(unsigned long physmem) { return 0; }
#endif
#include <asm/uv.h>
extern pgd_t swapper_pg_dir[];
+extern pgd_t invalid_pg_dir[];
extern void paging_init(void);
extern unsigned long s390_invalid_asce;
#ifndef __ASSEMBLY__
#define PSW_KERNEL_BITS (PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_HOME | \
- PSW_MASK_EA | PSW_MASK_BA)
+ PSW_MASK_EA | PSW_MASK_BA | PSW_MASK_DAT)
#define PSW_USER_BITS (PSW_MASK_DAT | PSW_MASK_IO | PSW_MASK_EXT | \
PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_MASK_MCHECK | \
PSW_MASK_PSTATE | PSW_ASC_PRIMARY)
extern int noexec_disabled;
extern unsigned long ident_map_size;
+extern unsigned long pgalloc_pos;
+extern unsigned long pgalloc_end;
+extern unsigned long pgalloc_low;
/* The Write Back bit position in the physaddr is given by the SLPC PCI */
extern unsigned long mio_wb_bit_mask;
#include <linux/uaccess.h>
#include <linux/kernel.h>
#include <asm/asm-extable.h>
+#include <linux/memblock.h>
#include <asm/diag.h>
#include <asm/ebcdic.h>
#include <asm/ipl.h>
psw_t psw;
psw.addr = (unsigned long)early_pgm_check_handler;
- psw.mask = PSW_MASK_BASE | PSW_DEFAULT_KEY | PSW_MASK_EA | PSW_MASK_BA;
- if (IS_ENABLED(CONFIG_KASAN))
- psw.mask |= PSW_MASK_DAT;
+ psw.mask = PSW_KERNEL_BITS;
S390_lowcore.program_new_psw = psw;
S390_lowcore.preempt_count = INIT_PREEMPT_COUNT;
}
unsigned long psw_mask;
/* Wait for external, I/O or machine check interrupt. */
- psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT |
+ PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
clear_cpu_flag(CIF_NOHZ_DELAY);
/* psw_idle() returns with interrupts disabled. */
if (unlikely(args->fn)) {
/* kernel thread */
memset(&frame->childregs, 0, sizeof(struct pt_regs));
- frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
- PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
+ frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_IO |
+ PSW_MASK_EXT | PSW_MASK_MCHECK;
frame->childregs.psw.addr =
(unsigned long)__ret_from_fork;
frame->childregs.gprs[9] = (unsigned long)args->fn;
unsigned long __bootdata(ident_map_size);
struct mem_detect_info __bootdata(mem_detect);
struct initrd_data __bootdata(initrd_data);
+unsigned long __bootdata(pgalloc_pos);
+unsigned long __bootdata(pgalloc_end);
+unsigned long __bootdata(pgalloc_low);
unsigned long __bootdata_preserved(__kaslr_offset);
unsigned long __bootdata(__amode31_base);
call_on_stack_noreturn(rest_init, stack);
}
-static void __init setup_lowcore_dat_off(void)
+static void __init setup_lowcore(void)
{
- unsigned long int_psw_mask = PSW_KERNEL_BITS;
- struct lowcore *abs_lc, *lc;
+ struct lowcore *lc, *abs_lc;
unsigned long mcck_stack;
unsigned long flags;
- if (IS_ENABLED(CONFIG_KASAN))
- int_psw_mask |= PSW_MASK_DAT;
-
/*
* Setup lowcore for boot cpu
*/
panic("%s: Failed to allocate %zu bytes align=%zx\n",
__func__, sizeof(*lc), sizeof(*lc));
- lc->restart_psw.mask = PSW_KERNEL_BITS;
- lc->restart_psw.addr = (unsigned long) restart_int_handler;
- lc->external_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
+ lc->restart_psw.addr = __pa(restart_int_handler);
+ lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->external_new_psw.addr = (unsigned long) ext_int_handler;
- lc->svc_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->svc_new_psw.addr = (unsigned long) system_call;
- lc->program_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
- lc->mcck_new_psw.mask = int_psw_mask;
+ lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
- lc->io_new_psw.mask = int_psw_mask | PSW_MASK_MCHECK;
+ lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
lc->io_new_psw.addr = (unsigned long) io_int_handler;
lc->clock_comparator = clock_comparator_max;
lc->nodat_stack = ((unsigned long) &init_thread_union)
lc->restart_fn = (unsigned long) do_restart;
lc->restart_data = 0;
lc->restart_source = -1U;
-
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_stack = lc->restart_stack;
- abs_lc->restart_fn = lc->restart_fn;
- abs_lc->restart_data = lc->restart_data;
- abs_lc->restart_source = lc->restart_source;
- abs_lc->restart_psw = lc->restart_psw;
- abs_lc->mcesad = lc->mcesad;
- put_abs_lowcore(abs_lc, flags);
+ __ctl_store(lc->cregs_save_area, 0, 15);
mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
if (!mcck_stack)
lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
lc->preempt_count = PREEMPT_DISABLED;
+ lc->kernel_asce = S390_lowcore.kernel_asce;
+ lc->user_asce = S390_lowcore.user_asce;
+
+ abs_lc = get_abs_lowcore(&flags);
+ abs_lc->restart_stack = lc->restart_stack;
+ abs_lc->restart_fn = lc->restart_fn;
+ abs_lc->restart_data = lc->restart_data;
+ abs_lc->restart_source = lc->restart_source;
+ abs_lc->restart_psw = lc->restart_psw;
+ abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
+ memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
+ abs_lc->program_new_psw = lc->program_new_psw;
+ abs_lc->mcesad = lc->mcesad;
+ put_abs_lowcore(abs_lc, flags);
set_prefix(__pa(lc));
lowcore_ptr[0] = lc;
-}
-
-static void __init setup_lowcore_dat_on(void)
-{
- struct lowcore *abs_lc;
- unsigned long flags;
-
- __ctl_clear_bit(0, 28);
- S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.mcck_new_psw.mask |= PSW_MASK_DAT;
- S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
- __ctl_set_bit(0, 28);
- __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
if (abs_lowcore_map(0, lowcore_ptr[0], true))
panic("Couldn't setup absolute lowcore");
abs_lowcore_mapped = true;
- abs_lc = get_abs_lowcore(&flags);
- abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
- abs_lc->program_new_psw = S390_lowcore.program_new_psw;
- memcpy(abs_lc->cregs_save_area, S390_lowcore.cregs_save_area,
- sizeof(abs_lc->cregs_save_area));
- put_abs_lowcore(abs_lc, flags);
}
static struct resource code_resource = {
#endif
+/*
+ * Reserve page tables created by decompressor
+ */
+static void __init reserve_pgtables(void)
+{
+ memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
+}
+
/*
* Reserve memory for kdump kernel to be loaded with kexec
*/
setup_control_program_code();
/* Do some memory reservations *before* memory is added to memblock */
+ reserve_pgtables();
reserve_kernel();
reserve_initrd();
reserve_certificate_list();
#endif
setup_resources();
- setup_lowcore_dat_off();
+ setup_lowcore();
smp_fill_possible_mask();
cpu_detect_mhz_feature();
cpu_init();
static_branch_enable(&cpu_has_bear);
/*
- * Create kernel page tables and switch to virtual addressing.
+ * Create kernel page tables.
*/
paging_init();
memcpy_real_init();
* After paging_init created the kernel page table, the new PSWs
* in lowcore can now run with DAT enabled.
*/
- setup_lowcore_dat_on();
#ifdef CONFIG_CRASH_DUMP
smp_save_dump_ipl_cpu();
#endif
lc = lowcore_ptr[pcpu - pcpu_devices];
source_cpu = stap();
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+
if (pcpu->address == source_cpu) {
call_on_stack(2, stack, void, __pcpu_delegate,
pcpu_delegate_fn *, func, void *, data);
int cpu;
/* Disable all interrupts/machine checks */
- __load_psw_mask(PSW_KERNEL_BITS | PSW_MASK_DAT);
+ __load_psw_mask(PSW_KERNEL_BITS);
trace_hardirqs_off();
debug_set_critical();
QUAD(__rela_dyn_start) /* rela_dyn_start */
QUAD(__rela_dyn_end) /* rela_dyn_end */
QUAD(_eamode31 - _samode31) /* amode31_size */
+ QUAD(init_mm)
+ QUAD(swapper_pg_dir)
+ QUAD(invalid_pg_dir)
} :NONE
/* Debugging sections. */
#include <linux/virtio_config.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(".bss..swapper_pg_dir");
-static pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
+pgd_t invalid_pg_dir[PTRS_PER_PGD] __section(".bss..invalid_pg_dir");
-unsigned long s390_invalid_asce;
+unsigned long __bootdata_preserved(s390_invalid_asce);
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
void __init paging_init(void)
{
unsigned long max_zone_pfns[MAX_NR_ZONES];
- unsigned long pgd_type, asce_bits;
- psw_t psw;
-
- s390_invalid_asce = (unsigned long)invalid_pg_dir;
- s390_invalid_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- crst_table_init((unsigned long *)invalid_pg_dir, _REGION3_ENTRY_EMPTY);
- init_mm.pgd = swapper_pg_dir;
- if (VMALLOC_END > _REGION2_SIZE) {
- asce_bits = _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION2_ENTRY_EMPTY;
- } else {
- asce_bits = _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION3_ENTRY_EMPTY;
- }
- init_mm.context.asce = (__pa(init_mm.pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.kernel_asce = init_mm.context.asce;
- S390_lowcore.user_asce = s390_invalid_asce;
- crst_table_init((unsigned long *) init_mm.pgd, pgd_type);
- vmem_map_init();
- kasan_copy_shadow_mapping();
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.user_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
- kasan_free_early_identity();
+ vmem_map_init();
sparse_init();
zone_dma_bits = 31;
memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
// SPDX-License-Identifier: GPL-2.0
#include <linux/kasan.h>
#include <linux/sched/task.h>
-#include <linux/memblock.h>
#include <linux/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/kasan.h>
static unsigned long segment_pos __initdata;
static unsigned long segment_low __initdata;
-static unsigned long pgalloc_pos __initdata;
-static unsigned long pgalloc_low __initdata;
-static unsigned long pgalloc_freeable __initdata;
static bool has_edat __initdata;
static bool has_nx __initdata;
#define __sha(x) ((unsigned long)kasan_mem_to_shadow((void *)x))
-static pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
-
static void __init kasan_early_panic(const char *reason)
{
sclp_early_printk("The Linux kernel failed to boot with the KernelAddressSanitizer:\n");
}
}
-static void __init kasan_set_pgd(pgd_t *pgd, unsigned long asce_type)
-{
- unsigned long asce_bits;
-
- asce_bits = asce_type | _ASCE_TABLE_LENGTH;
- S390_lowcore.kernel_asce = (__pa(pgd) & PAGE_MASK) | asce_bits;
- S390_lowcore.user_asce = S390_lowcore.kernel_asce;
-
- __ctl_load(S390_lowcore.kernel_asce, 1, 1);
- __ctl_load(S390_lowcore.kernel_asce, 7, 7);
- __ctl_load(S390_lowcore.kernel_asce, 13, 13);
-}
-
-static void __init kasan_enable_dat(void)
-{
- psw_t psw;
-
- psw.mask = __extract_psw();
- psw_bits(psw).dat = 1;
- psw_bits(psw).as = PSW_BITS_AS_HOME;
- __load_psw_mask(psw.mask);
-}
-
static void __init kasan_early_detect_facilities(void)
{
if (test_facility(8)) {
p4d_t p4d_z = __p4d(__pa(kasan_early_shadow_pud) | _REGION2_ENTRY);
unsigned long untracked_end = MODULES_VADDR;
unsigned long shadow_alloc_size;
- unsigned long initrd_end;
unsigned long memsize;
kasan_early_detect_facilities();
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_START, P4D_SIZE));
BUILD_BUG_ON(!IS_ALIGNED(KASAN_SHADOW_END, P4D_SIZE));
- crst_table_init((unsigned long *)early_pg_dir, _REGION2_ENTRY_EMPTY);
/* init kasan zero shadow */
- crst_table_init((unsigned long *)kasan_early_shadow_p4d,
- p4d_val(p4d_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pud,
- pud_val(pud_z));
- crst_table_init((unsigned long *)kasan_early_shadow_pmd,
- pmd_val(pmd_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_p4d, p4d_val(p4d_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pud, pud_val(pud_z));
+ crst_table_init((unsigned long *)kasan_early_shadow_pmd, pmd_val(pmd_z));
memset64((u64 *)kasan_early_shadow_pte, pte_val(pte_z), PTRS_PER_PTE);
shadow_alloc_size = memsize >> KASAN_SHADOW_SCALE_SHIFT;
- pgalloc_low = round_up((unsigned long)_end, _SEGMENT_SIZE);
- if (IS_ENABLED(CONFIG_BLK_DEV_INITRD)) {
- initrd_end =
- round_up(initrd_data.start + initrd_data.size, _SEGMENT_SIZE);
- pgalloc_low = max(pgalloc_low, initrd_end);
- }
if (pgalloc_low + shadow_alloc_size > memsize)
kasan_early_panic("out of memory during initialisation\n");
if (has_edat) {
- segment_pos = round_down(memsize, _SEGMENT_SIZE);
+ segment_pos = round_down(pgalloc_pos, _SEGMENT_SIZE);
segment_low = segment_pos - shadow_alloc_size;
+ segment_low = round_down(segment_low, _SEGMENT_SIZE);
pgalloc_pos = segment_low;
- } else {
- pgalloc_pos = memsize;
}
- init_mm.pgd = early_pg_dir;
/*
* Current memory layout:
* +- 0 -------------+ +- shadow start -+
POPULATE_ZERO_SHADOW);
kasan_early_pgtable_populate(__sha(MODULES_END), __sha(_REGION1_SIZE),
POPULATE_ZERO_SHADOW);
- /* memory allocated for identity mapping structs will be freed later */
- pgalloc_freeable = pgalloc_pos;
- /* populate identity mapping */
- kasan_early_pgtable_populate(0, memsize, POPULATE_ONE2ONE);
- kasan_set_pgd(early_pg_dir, _ASCE_TYPE_REGION2);
- kasan_enable_dat();
/* enable kasan */
init_task.kasan_depth = 0;
- memblock_reserve(pgalloc_pos, memsize - pgalloc_pos);
sclp_early_printk("KernelAddressSanitizer initialized\n");
}
-
-void __init kasan_copy_shadow_mapping(void)
-{
- /*
- * At this point we are still running on early pages setup early_pg_dir,
- * while swapper_pg_dir has just been initialized with identity mapping.
- * Carry over shadow memory region from early_pg_dir to swapper_pg_dir.
- */
-
- pgd_t *pg_dir_src;
- pgd_t *pg_dir_dst;
- p4d_t *p4_dir_src;
- p4d_t *p4_dir_dst;
-
- pg_dir_src = pgd_offset_raw(early_pg_dir, KASAN_SHADOW_START);
- pg_dir_dst = pgd_offset_raw(init_mm.pgd, KASAN_SHADOW_START);
- p4_dir_src = p4d_offset(pg_dir_src, KASAN_SHADOW_START);
- p4_dir_dst = p4d_offset(pg_dir_dst, KASAN_SHADOW_START);
- memcpy(p4_dir_dst, p4_dir_src,
- (KASAN_SHADOW_SIZE >> P4D_SHIFT) * sizeof(p4d_t));
-}
-
-void __init kasan_free_early_identity(void)
-{
- memblock_phys_free(pgalloc_pos, pgalloc_freeable - pgalloc_pos);
-}
#include <linux/list.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include <asm/cacheflush.h>
#include <asm/nospec-branch.h>
#include <asm/pgalloc.h>
mutex_unlock(&vmem_mutex);
}
+static int __init memblock_region_cmp(const void *a, const void *b)
+{
+ const struct memblock_region *r1 = a;
+ const struct memblock_region *r2 = b;
+
+ if (r1->base < r2->base)
+ return -1;
+ if (r1->base > r2->base)
+ return 1;
+ return 0;
+}
+
+static void __init memblock_region_swap(void *a, void *b, int size)
+{
+ struct memblock_region *r1 = a;
+ struct memblock_region *r2 = b;
+ struct memblock_region swap;
+
+ swap = *r1;
+ *r1 = *r2;
+ *r2 = swap;
+}
+
/*
* map whole physical memory to virtual memory (identity mapping)
* we reserve enough space in the vmalloc area for vmemmap to hotplug
*/
void __init vmem_map_init(void)
{
+ struct memblock_region memory_rwx_regions[] = {
+ {
+ .base = 0,
+ .size = sizeof(struct lowcore),
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_stext),
+ .size = _etext - _stext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __pa(_sinittext),
+ .size = _einittext - _sinittext,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ {
+ .base = __stext_amode31,
+ .size = __etext_amode31 - __stext_amode31,
+ .flags = MEMBLOCK_NONE,
+#ifdef CONFIG_NUMA
+ .nid = NUMA_NO_NODE,
+#endif
+ },
+ };
+ struct memblock_type memory_rwx = {
+ .regions = memory_rwx_regions,
+ .cnt = ARRAY_SIZE(memory_rwx_regions),
+ .max = ARRAY_SIZE(memory_rwx_regions),
+ };
phys_addr_t base, end;
u64 i;
- for_each_mem_range(i, &base, &end)
- vmem_add_range(base, end - base);
+ /*
+ * Set RW+NX attribute on all memory, except regions enumerated with
+ * memory_rwx exclude type. These regions need different attributes,
+ * which are enforced afterwards.
+ *
+ * __for_each_mem_range() iterate and exclude types should be sorted.
+ * The relative location of _stext and _sinittext is hardcoded in the
+ * linker script. However a location of __stext_amode31 and the kernel
+ * image itself are chosen dynamically. Thus, sort the exclude type.
+ */
+ sort(&memory_rwx_regions,
+ ARRAY_SIZE(memory_rwx_regions), sizeof(memory_rwx_regions[0]),
+ memblock_region_cmp, memblock_region_swap);
+ __for_each_mem_range(i, &memblock.memory, &memory_rwx,
+ NUMA_NO_NODE, MEMBLOCK_NONE, &base, &end, NULL) {
+ __set_memory((unsigned long)__va(base),
+ (end - base) >> PAGE_SHIFT,
+ SET_MEMORY_RW | SET_MEMORY_NX);
+ }
+
__set_memory((unsigned long)_stext,
(unsigned long)(_etext - _stext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
__set_memory((unsigned long)_sinittext,
(unsigned long)(_einittext - _sinittext) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- __set_memory(__stext_amode31, (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
+ __set_memory(__stext_amode31,
+ (__etext_amode31 - __stext_amode31) >> PAGE_SHIFT,
SET_MEMORY_RO | SET_MEMORY_X);
- /* lowcore requires 4k mapping for real addresses / prefixing */
- set_memory_4k(0, LC_PAGES);
-
/* lowcore must be executable for LPSWE */
- if (!static_key_enabled(&cpu_has_bear))
- set_memory_x(0, 1);
+ if (static_key_enabled(&cpu_has_bear))
+ set_memory_nx(0, 1);
+ set_memory_nx(PAGE_SIZE, 1);
pr_info("Write protected kernel read-only data: %luk\n",
(unsigned long)(__end_rodata - _stext) >> 10);