From: Christophe Leroy Date: Tue, 29 Oct 2019 12:13:58 +0000 (+0000) Subject: powerpc/kexec: Move kexec files into a dedicated subdir. X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=793b08e2efff3ec020c5c5861d00ed394fcdd488;p=linux.git powerpc/kexec: Move kexec files into a dedicated subdir. arch/powerpc/kernel/ contains 8 files dedicated to kexec. Move them into a dedicated subdirectory. Signed-off-by: Christophe Leroy [mpe: Move to a/p/kexec, drop the 'machine' naming and use 'core' instead] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/afbef97ec6a978574a5cf91a4441000e0a9da42a.1572351221.git.christophe.leroy@c-s.fr --- diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 51e6908323ad5..5e2f9eaa3ee7d 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/ obj-$(CONFIG_KVM) += kvm/ obj-$(CONFIG_PERF_EVENTS) += perf/ +obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index fadbc1eb25861..c1df4e5188298 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,9 +5,6 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) - ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) endif @@ -82,7 +79,6 @@ obj-$(CONFIG_FA_DUMP) += fadump.o obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o ifdef CONFIG_PPC32 obj-$(CONFIG_E500) += idle_e500.o -obj-$(CONFIG_KEXEC_CORE) += kexec_relocate_32.o endif obj-$(CONFIG_PPC_BOOK3S_32) += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o obj-$(CONFIG_TAU) += tau_6xx.o @@ -126,14 +122,6 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \ pci-common.o pci_of_scan.o obj-$(CONFIG_PCI_MSI) += msi.o -obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \ - machine_kexec_$(BITS).o -obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o -ifdef CONFIG_HAVE_IMA_KEXEC -ifdef CONFIG_IMA -obj-y += ima_kexec.o -endif -endif obj-$(CONFIG_AUDIT) += audit.o obj64-$(CONFIG_AUDIT) += compat_audit.o @@ -168,12 +156,6 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o GCOV_PROFILE_prom_init.o := n KCOV_INSTRUMENT_prom_init.o := n UBSAN_SANITIZE_prom_init.o := n -GCOV_PROFILE_machine_kexec_64.o := n -KCOV_INSTRUMENT_machine_kexec_64.o := n -UBSAN_SANITIZE_machine_kexec_64.o := n -GCOV_PROFILE_machine_kexec_32.o := n -KCOV_INSTRUMENT_machine_kexec_32.o := n -UBSAN_SANITIZE_machine_kexec_32.o := n GCOV_PROFILE_kprobes.o := n KCOV_INSTRUMENT_kprobes.o := n UBSAN_SANITIZE_kprobes.o := n diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c deleted file mode 100644 index d488311efab1f..0000000000000 --- a/arch/powerpc/kernel/crash.c +++ /dev/null @@ -1,374 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Architecture specific (PPC64) functions for kexec based crash dumps. - * - * Copyright (C) 2005, IBM Corp. - * - * Created by: Haren Myneni - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -/* - * The primary CPU waits a while for all secondary CPUs to enter. This is to - * avoid sending an IPI if the secondary CPUs are entering - * crash_kexec_secondary on their own (eg via a system reset). - * - * The secondary timeout has to be longer than the primary. Both timeouts are - * in milliseconds. - */ -#define PRIMARY_TIMEOUT 500 -#define SECONDARY_TIMEOUT 1000 - -#define IPI_TIMEOUT 10000 -#define REAL_MODE_TIMEOUT 10000 - -static int time_to_dump; -/* - * crash_wake_offline should be set to 1 by platforms that intend to wake - * up offline cpus prior to jumping to a kdump kernel. Currently powernv - * sets it to 1, since we want to avoid things from happening when an - * offline CPU wakes up due to something like an HMI (malfunction error), - * which propagates to all threads. - */ -int crash_wake_offline; - -#define CRASH_HANDLER_MAX 3 -/* List of shutdown handles */ -static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; -static DEFINE_SPINLOCK(crash_handlers_lock); - -static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; -static int crash_shutdown_cpu = -1; - -static int handle_fault(struct pt_regs *regs) -{ - if (crash_shutdown_cpu == smp_processor_id()) - longjmp(crash_shutdown_buf, 1); - return 0; -} - -#ifdef CONFIG_SMP - -static atomic_t cpus_in_crash; -void crash_ipi_callback(struct pt_regs *regs) -{ - static cpumask_t cpus_state_saved = CPU_MASK_NONE; - - int cpu = smp_processor_id(); - - hard_irq_disable(); - if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { - crash_save_cpu(regs, cpu); - cpumask_set_cpu(cpu, &cpus_state_saved); - } - - atomic_inc(&cpus_in_crash); - smp_mb__after_atomic(); - - /* - * Starting the kdump boot. - * This barrier is needed to make sure that all CPUs are stopped. - */ - while (!time_to_dump) - cpu_relax(); - - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 1); - -#ifdef CONFIG_PPC64 - kexec_smp_wait(); -#else - for (;;); /* FIXME */ -#endif - - /* NOTREACHED */ -} - -static void crash_kexec_prepare_cpus(int cpu) -{ - unsigned int msecs; - unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ - int tries = 0; - int (*old_handler)(struct pt_regs *regs); - - printk(KERN_EMERG "Sending IPI to other CPUs\n"); - - if (crash_wake_offline) - ncpus = num_present_cpus() - 1; - - crash_send_ipi(crash_ipi_callback); - smp_wmb(); - -again: - /* - * FIXME: Until we will have the way to stop other CPUs reliably, - * the crash CPU will send an IPI and wait for other CPUs to - * respond. - */ - msecs = IPI_TIMEOUT; - while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) - mdelay(1); - - /* Would it be better to replace the trap vector here? */ - - if (atomic_read(&cpus_in_crash) >= ncpus) { - printk(KERN_EMERG "IPI complete\n"); - return; - } - - printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", - ncpus - atomic_read(&cpus_in_crash)); - - /* - * If we have a panic timeout set then we can't wait indefinitely - * for someone to activate system reset. We also give up on the - * second time through if system reset fail to work. - */ - if ((panic_timeout > 0) || (tries > 0)) - return; - - /* - * A system reset will cause all CPUs to take an 0x100 exception. - * The primary CPU returns here via setjmp, and the secondary - * CPUs reexecute the crash_kexec_secondary path. - */ - old_handler = __debugger; - __debugger = handle_fault; - crash_shutdown_cpu = smp_processor_id(); - - if (setjmp(crash_shutdown_buf) == 0) { - printk(KERN_EMERG "Activate system reset (dumprestart) " - "to stop other cpu(s)\n"); - - /* - * A system reset will force all CPUs to execute the - * crash code again. We need to reset cpus_in_crash so we - * wait for everyone to do this. - */ - atomic_set(&cpus_in_crash, 0); - smp_mb(); - - while (atomic_read(&cpus_in_crash) < ncpus) - cpu_relax(); - } - - crash_shutdown_cpu = -1; - __debugger = old_handler; - - tries++; - goto again; -} - -/* - * This function will be called by secondary cpus. - */ -void crash_kexec_secondary(struct pt_regs *regs) -{ - unsigned long flags; - int msecs = SECONDARY_TIMEOUT; - - local_irq_save(flags); - - /* Wait for the primary crash CPU to signal its progress */ - while (crashing_cpu < 0) { - if (--msecs < 0) { - /* No response, kdump image may not have been loaded */ - local_irq_restore(flags); - return; - } - - mdelay(1); - } - - crash_ipi_callback(regs); -} - -#else /* ! CONFIG_SMP */ - -static void crash_kexec_prepare_cpus(int cpu) -{ - /* - * move the secondaries to us so that we can copy - * the new kernel 0-0x100 safely - * - * do this if kexec in setup.c ? - */ -#ifdef CONFIG_PPC64 - smp_release_cpus(); -#else - /* FIXME */ -#endif -} - -void crash_kexec_secondary(struct pt_regs *regs) -{ -} -#endif /* CONFIG_SMP */ - -/* wait for all the CPUs to hit real mode but timeout if they don't come in */ -#if defined(CONFIG_SMP) && defined(CONFIG_PPC64) -static void __maybe_unused crash_kexec_wait_realmode(int cpu) -{ - unsigned int msecs; - int i; - - msecs = REAL_MODE_TIMEOUT; - for (i=0; i < nr_cpu_ids && msecs > 0; i++) { - if (i == cpu) - continue; - - while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { - barrier(); - if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) - break; - msecs--; - mdelay(1); - } - } - mb(); -} -#else -static inline void crash_kexec_wait_realmode(int cpu) {} -#endif /* CONFIG_SMP && CONFIG_PPC64 */ - -/* - * Register a function to be called on shutdown. Only use this if you - * can't reset your device in the second kernel. - */ -int crash_shutdown_register(crash_shutdown_t handler) -{ - unsigned int i, rc; - - spin_lock(&crash_handlers_lock); - for (i = 0 ; i < CRASH_HANDLER_MAX; i++) - if (!crash_shutdown_handles[i]) { - /* Insert handle at first empty entry */ - crash_shutdown_handles[i] = handler; - rc = 0; - break; - } - - if (i == CRASH_HANDLER_MAX) { - printk(KERN_ERR "Crash shutdown handles full, " - "not registered.\n"); - rc = 1; - } - - spin_unlock(&crash_handlers_lock); - return rc; -} -EXPORT_SYMBOL(crash_shutdown_register); - -int crash_shutdown_unregister(crash_shutdown_t handler) -{ - unsigned int i, rc; - - spin_lock(&crash_handlers_lock); - for (i = 0 ; i < CRASH_HANDLER_MAX; i++) - if (crash_shutdown_handles[i] == handler) - break; - - if (i == CRASH_HANDLER_MAX) { - printk(KERN_ERR "Crash shutdown handle not found\n"); - rc = 1; - } else { - /* Shift handles down */ - for (; i < (CRASH_HANDLER_MAX - 1); i++) - crash_shutdown_handles[i] = - crash_shutdown_handles[i+1]; - /* - * Reset last entry to NULL now that it has been shifted down, - * this will allow new handles to be added here. - */ - crash_shutdown_handles[i] = NULL; - rc = 0; - } - - spin_unlock(&crash_handlers_lock); - return rc; -} -EXPORT_SYMBOL(crash_shutdown_unregister); - -void default_machine_crash_shutdown(struct pt_regs *regs) -{ - unsigned int i; - int (*old_handler)(struct pt_regs *regs); - - /* - * This function is only called after the system - * has panicked or is otherwise in a critical state. - * The minimum amount of code to allow a kexec'd kernel - * to run successfully needs to happen here. - * - * In practice this means stopping other cpus in - * an SMP system. - * The kernel is broken so disable interrupts. - */ - hard_irq_disable(); - - /* - * Make a note of crashing cpu. Will be used in machine_kexec - * such that another IPI will not be sent. - */ - crashing_cpu = smp_processor_id(); - - /* - * If we came in via system reset, wait a while for the secondary - * CPUs to enter. - */ - if (TRAP(regs) == 0x100) - mdelay(PRIMARY_TIMEOUT); - - crash_kexec_prepare_cpus(crashing_cpu); - - crash_save_cpu(regs, crashing_cpu); - - time_to_dump = 1; - - crash_kexec_wait_realmode(crashing_cpu); - - machine_kexec_mask_interrupts(); - - /* - * Call registered shutdown routines safely. Swap out - * __debugger_fault_handler, and replace on exit. - */ - old_handler = __debugger_fault_handler; - __debugger_fault_handler = handle_fault; - crash_shutdown_cpu = smp_processor_id(); - for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { - if (setjmp(crash_shutdown_buf) == 0) { - /* - * Insert syncs and delay to ensure - * instructions in the dangerous region don't - * leak away from this protected region. - */ - asm volatile("sync; isync"); - /* dangerous region */ - crash_shutdown_handles[i](); - asm volatile("sync; isync"); - } - } - crash_shutdown_cpu = -1; - __debugger_fault_handler = old_handler; - - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(1, 0); -} diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c deleted file mode 100644 index 720e50e490b61..0000000000000 --- a/arch/powerpc/kernel/ima_kexec.c +++ /dev/null @@ -1,219 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2016 IBM Corporation - * - * Authors: - * Thiago Jung Bauermann - */ - -#include -#include -#include -#include -#include - -static int get_addr_size_cells(int *addr_cells, int *size_cells) -{ - struct device_node *root; - - root = of_find_node_by_path("/"); - if (!root) - return -EINVAL; - - *addr_cells = of_n_addr_cells(root); - *size_cells = of_n_size_cells(root); - - of_node_put(root); - - return 0; -} - -static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, - size_t *size) -{ - int ret, addr_cells, size_cells; - - ret = get_addr_size_cells(&addr_cells, &size_cells); - if (ret) - return ret; - - if (len < 4 * (addr_cells + size_cells)) - return -ENOENT; - - *addr = of_read_number(prop, addr_cells); - *size = of_read_number(prop + 4 * addr_cells, size_cells); - - return 0; -} - -/** - * ima_get_kexec_buffer - get IMA buffer from the previous kernel - * @addr: On successful return, set to point to the buffer contents. - * @size: On successful return, set to the buffer size. - * - * Return: 0 on success, negative errno on error. - */ -int ima_get_kexec_buffer(void **addr, size_t *size) -{ - int ret, len; - unsigned long tmp_addr; - size_t tmp_size; - const void *prop; - - prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); - if (!prop) - return -ENOENT; - - ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); - if (ret) - return ret; - - *addr = __va(tmp_addr); - *size = tmp_size; - - return 0; -} - -/** - * ima_free_kexec_buffer - free memory used by the IMA buffer - */ -int ima_free_kexec_buffer(void) -{ - int ret; - unsigned long addr; - size_t size; - struct property *prop; - - prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); - if (!prop) - return -ENOENT; - - ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); - if (ret) - return ret; - - ret = of_remove_property(of_chosen, prop); - if (ret) - return ret; - - return memblock_free(addr, size); - -} - -/** - * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt - * - * The IMA measurement buffer is of no use to a subsequent kernel, so we always - * remove it from the device tree. - */ -void remove_ima_buffer(void *fdt, int chosen_node) -{ - int ret, len; - unsigned long addr; - size_t size; - const void *prop; - - prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); - if (!prop) - return; - - ret = do_get_kexec_buffer(prop, len, &addr, &size); - fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); - if (ret) - return; - - ret = delete_fdt_mem_rsv(fdt, addr, size); - if (!ret) - pr_debug("Removed old IMA buffer reservation.\n"); -} - -#ifdef CONFIG_IMA_KEXEC -/** - * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer - * - * Architectures should use this function to pass on the IMA buffer - * information to the next kernel. - * - * Return: 0 on success, negative errno on error. - */ -int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, - size_t size) -{ - image->arch.ima_buffer_addr = load_addr; - image->arch.ima_buffer_size = size; - - return 0; -} - -static int write_number(void *p, u64 value, int cells) -{ - if (cells == 1) { - u32 tmp; - - if (value > U32_MAX) - return -EINVAL; - - tmp = cpu_to_be32(value); - memcpy(p, &tmp, sizeof(tmp)); - } else if (cells == 2) { - u64 tmp; - - tmp = cpu_to_be64(value); - memcpy(p, &tmp, sizeof(tmp)); - } else - return -EINVAL; - - return 0; -} - -/** - * setup_ima_buffer - add IMA buffer information to the fdt - * @image: kexec image being loaded. - * @fdt: Flattened device tree for the next kernel. - * @chosen_node: Offset to the chosen node. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) -{ - int ret, addr_cells, size_cells, entry_size; - u8 value[16]; - - remove_ima_buffer(fdt, chosen_node); - if (!image->arch.ima_buffer_size) - return 0; - - ret = get_addr_size_cells(&addr_cells, &size_cells); - if (ret) - return ret; - - entry_size = 4 * (addr_cells + size_cells); - - if (entry_size > sizeof(value)) - return -EINVAL; - - ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); - if (ret) - return ret; - - ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, - size_cells); - if (ret) - return ret; - - ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, - entry_size); - if (ret < 0) - return -EINVAL; - - ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, - image->arch.ima_buffer_size); - if (ret) - return -EINVAL; - - pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", - image->arch.ima_buffer_addr, image->arch.ima_buffer_size); - - return 0; -} -#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c deleted file mode 100644 index 3072fd6dbe94a..0000000000000 --- a/arch/powerpc/kernel/kexec_elf_64.c +++ /dev/null @@ -1,125 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Load ELF vmlinux file for the kexec_file_load syscall. - * - * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) - * Copyright (C) 2004 IBM Corp. - * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) - * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) - * Copyright (C) 2016 IBM Corporation - * - * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. - * Heavily modified for the kernel by - * Thiago Jung Bauermann . - */ - -#define pr_fmt(fmt) "kexec_elf: " fmt - -#include -#include -#include -#include -#include -#include -#include - -static void *elf64_load(struct kimage *image, char *kernel_buf, - unsigned long kernel_len, char *initrd, - unsigned long initrd_len, char *cmdline, - unsigned long cmdline_len) -{ - int ret; - unsigned int fdt_size; - unsigned long kernel_load_addr; - unsigned long initrd_load_addr = 0, fdt_load_addr; - void *fdt; - const void *slave_code; - struct elfhdr ehdr; - struct kexec_elf_info elf_info; - struct kexec_buf kbuf = { .image = image, .buf_min = 0, - .buf_max = ppc64_rma_size }; - struct kexec_buf pbuf = { .image = image, .buf_min = 0, - .buf_max = ppc64_rma_size, .top_down = true, - .mem = KEXEC_BUF_MEM_UNKNOWN }; - - ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); - if (ret) - goto out; - - ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); - if (ret) - goto out; - - pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); - - ret = kexec_load_purgatory(image, &pbuf); - if (ret) { - pr_err("Loading purgatory failed.\n"); - goto out; - } - - pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); - - if (initrd != NULL) { - kbuf.buffer = initrd; - kbuf.bufsz = kbuf.memsz = initrd_len; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = false; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - initrd_load_addr = kbuf.mem; - - pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); - } - - fdt_size = fdt_totalsize(initial_boot_params) * 2; - fdt = kmalloc(fdt_size, GFP_KERNEL); - if (!fdt) { - pr_err("Not enough memory for the device tree.\n"); - ret = -ENOMEM; - goto out; - } - ret = fdt_open_into(initial_boot_params, fdt, fdt_size); - if (ret < 0) { - pr_err("Error setting up the new device tree.\n"); - ret = -EINVAL; - goto out; - } - - ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); - if (ret) - goto out; - - fdt_pack(fdt); - - kbuf.buffer = fdt; - kbuf.bufsz = kbuf.memsz = fdt_size; - kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = true; - kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; - ret = kexec_add_buffer(&kbuf); - if (ret) - goto out; - fdt_load_addr = kbuf.mem; - - pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); - - slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; - ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, - fdt_load_addr); - if (ret) - pr_err("Error setting up the purgatory.\n"); - -out: - kexec_free_elf_info(&elf_info); - - /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ - return ret ? ERR_PTR(ret) : fdt; -} - -const struct kexec_file_ops kexec_elf64_ops = { - .probe = kexec_elf_probe, - .load = elf64_load, -}; diff --git a/arch/powerpc/kernel/kexec_relocate_32.S b/arch/powerpc/kernel/kexec_relocate_32.S deleted file mode 100644 index 8a8b4887c879c..0000000000000 --- a/arch/powerpc/kernel/kexec_relocate_32.S +++ /dev/null @@ -1,500 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * This file contains kexec low-level functions. - * - * Copyright (C) 2002-2003 Eric Biederman - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * PPC44x port. Copyright (C) 2011, IBM Corporation - * Author: Suzuki Poulose - */ - -#include -#include -#include -#include -#include - - .text - - /* - * Must be relocatable PIC code callable as a C function. - */ - .globl relocate_new_kernel -relocate_new_kernel: - /* r3 = page_list */ - /* r4 = reboot_code_buffer */ - /* r5 = start_address */ - -#ifdef CONFIG_FSL_BOOKE - - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#define ENTRY_MAPPING_KEXEC_SETUP -#include "fsl_booke_entry_mapping.S" -#undef ENTRY_MAPPING_KEXEC_SETUP - - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#elif defined(CONFIG_44x) - - /* Save our parameters */ - mr r29, r3 - mr r30, r4 - mr r31, r5 - -#ifdef CONFIG_PPC_47x - /* Check for 47x cores */ - mfspr r3,SPRN_PVR - srwi r3,r3,16 - cmplwi cr0,r3,PVR_476FPE@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476@h - beq setup_map_47x - cmplwi cr0,r3,PVR_476_ISS@h - beq setup_map_47x -#endif /* CONFIG_PPC_47x */ - -/* - * Code for setting up 1:1 mapping for PPC440x for KEXEC - * - * We cannot switch off the MMU on PPC44x. - * So we: - * 1) Invalidate all the mappings except the one we are running from. - * 2) Create a tmp mapping for our code in the other address space(TS) and - * jump to it. Invalidate the entry we started in. - * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. - * 4) Jump to the 1:1 mapping in original TS. - * 5) Invalidate the tmp mapping. - * - * - Based on the kexec support code for FSL BookE - * - */ - - /* - * Load the PID with kernel PID (0). - * Also load our MSR_IS and TID to MMUCR for TLB search. - */ - li r3, 0 - mtspr SPRN_PID, r3 - mfmsr r4 - andi. r4,r4,MSR_IS@l - beq wmmucr - oris r3,r3,PPC44x_MMUCR_STS@h -wmmucr: - mtspr SPRN_MMUCR,r3 - sync - - /* - * Invalidate all the TLB entries except the current entry - * where we are running from - */ - bl 0f /* Find our address */ -0: mflr r5 /* Make it accessible */ - tlbsx r23,0,r5 /* Find entry we are in */ - li r4,0 /* Start at TLB entry 0 */ - li r3,0 /* Set PAGEID inval value */ -1: cmpw r23,r4 /* Is this our entry? */ - beq skip /* If so, skip the inval */ - tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ -skip: - addi r4,r4,1 /* Increment */ - cmpwi r4,64 /* Are we done? */ - bne 1b /* If not, repeat */ - isync - - /* Create a temp mapping and jump to it */ - andi. r6, r23, 1 /* Find the index to use */ - addi r24, r6, 1 /* r24 will contain 1 or 2 */ - - mfmsr r9 /* get the MSR */ - rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ - xori r7, r5, 1 /* Use the other address space */ - - /* Read the current mapping entries */ - tlbre r3, r23, PPC44x_TLB_PAGEID - tlbre r4, r23, PPC44x_TLB_XLAT - tlbre r5, r23, PPC44x_TLB_ATTRIB - - /* Save our current XLAT entry */ - mr r25, r4 - - /* Extract the TLB PageSize */ - li r10, 1 /* r10 will hold PageSize */ - rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ - - /* XXX: As of now we use 256M, 4K pages */ - cmpwi r11, PPC44x_TLB_256M - bne tlb_4k - rotlwi r10, r10, 28 /* r10 = 256M */ - b write_out -tlb_4k: - cmpwi r11, PPC44x_TLB_4K - bne default - rotlwi r10, r10, 12 /* r10 = 4K */ - b write_out -default: - rotlwi r10, r10, 10 /* r10 = 1K */ - -write_out: - /* - * Write out the tmp 1:1 mapping for this code in other address space - * Fixup EPN = RPN , TS=other address space - */ - insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ - - /* Write out the tmp mapping entries */ - tlbwe r3, r24, PPC44x_TLB_PAGEID - tlbwe r4, r24, PPC44x_TLB_XLAT - tlbwe r5, r24, PPC44x_TLB_ATTRIB - - subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ - not r10, r11 /* Mask for PageNum */ - - /* Switch to other address space in MSR */ - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - addi r8, r8, (2f-1b) /* Find the target offset */ - - /* Jump to the tmp mapping */ - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi - -2: - /* Invalidate the entry we were executing from */ - li r3, 0 - tlbwe r3, r23, PPC44x_TLB_PAGEID - - /* attribute fields. rwx for SUPERVISOR mode */ - li r5, 0 - ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) - - /* Create 1:1 mapping in 256M pages */ - xori r7, r7, 1 /* Revert back to Original TS */ - - li r8, 0 /* PageNumber */ - li r6, 3 /* TLB Index, start at 3 */ - -next_tlb: - rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ - mr r4, r3 /* RPN = EPN */ - ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ - insrwi r3, r7, 1, 23 /* Set TS from r7 */ - - tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ - tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ - tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ - - addi r8, r8, 1 /* Increment PN */ - addi r6, r6, 1 /* Increment TLB Index */ - cmpwi r8, 8 /* Are we done ? */ - bne next_tlb - isync - - /* Jump to the new mapping 1:1 */ - li r9,0 - insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ - - bl 1f -1: mflr r8 - and r8, r8, r11 /* Get our offset within page */ - addi r8, r8, (2f-1b) - - and r5, r25, r10 /* Get our target PageNum */ - or r8, r8, r5 /* Target jump address */ - - mtspr SPRN_SRR0, r8 - mtspr SPRN_SRR1, r9 - rfi -2: - /* Invalidate the tmp entry we used */ - li r3, 0 - tlbwe r3, r24, PPC44x_TLB_PAGEID - sync - b ppc44x_map_done - -#ifdef CONFIG_PPC_47x - - /* 1:1 mapping for 47x */ - -setup_map_47x: - - /* - * Load the kernel pid (0) to PID and also to MMUCR[TID]. - * Also set the MSR IS->MMUCR STS - */ - li r3, 0 - mtspr SPRN_PID, r3 /* Set PID */ - mfmsr r4 /* Get MSR */ - andi. r4, r4, MSR_IS@l /* TS=1? */ - beq 1f /* If not, leave STS=0 */ - oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */ -1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */ - sync - - /* Find the entry we are running from */ - bl 2f -2: mflr r23 - tlbsx r23, 0, r23 - tlbre r24, r23, 0 /* TLB Word 0 */ - tlbre r25, r23, 1 /* TLB Word 1 */ - tlbre r26, r23, 2 /* TLB Word 2 */ - - - /* - * Invalidates all the tlb entries by writing to 256 RPNs(r4) - * of 4k page size in all 4 ways (0-3 in r3). - * This would invalidate the entire UTLB including the one we are - * running from. However the shadow TLB entries would help us - * to continue the execution, until we flush them (rfi/isync). - */ - addis r3, 0, 0x8000 /* specify the way */ - addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */ - addi r5, 0, 0 - b clear_utlb_entry - - /* Align the loop to speed things up. from head_44x.S */ - .align 6 - -clear_utlb_entry: - - tlbwe r4, r3, 0 - tlbwe r5, r3, 1 - tlbwe r5, r3, 2 - addis r3, r3, 0x2000 /* Increment the way */ - cmpwi r3, 0 - bne clear_utlb_entry - addis r3, 0, 0x8000 - addis r4, r4, 0x100 /* Increment the EPN */ - cmpwi r4, 0 - bne clear_utlb_entry - - /* Create the entries in the other address space */ - mfmsr r5 - rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */ - xori r7, r7, 1 /* r7 = !TS */ - - insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */ - - /* - * write out the TLB entries for the tmp mapping - * Use way '0' so that we could easily invalidate it later. - */ - lis r3, 0x8000 /* Way '0' */ - - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Update the msr to the new TS */ - insrwi r5, r7, 1, 26 - - bl 1f -1: mflr r6 - addi r6, r6, (2f-1b) - - mtspr SPRN_SRR0, r6 - mtspr SPRN_SRR1, r5 - rfi - - /* - * Now we are in the tmp address space. - * Create a 1:1 mapping for 0-2GiB in the original TS. - */ -2: - li r3, 0 - li r4, 0 /* TLB Word 0 */ - li r5, 0 /* TLB Word 1 */ - li r6, 0 - ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */ - - li r8, 0 /* PageIndex */ - - xori r7, r7, 1 /* revert back to original TS */ - -write_utlb: - rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */ - /* ERPN = 0 as we don't use memory above 2G */ - - mr r4, r5 /* EPN = RPN */ - ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M) - insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */ - - tlbwe r4, r3, 0 /* Write out the entries */ - tlbwe r5, r3, 1 - tlbwe r6, r3, 2 - addi r8, r8, 1 - cmpwi r8, 8 /* Have we completed ? */ - bne write_utlb - - /* make sure we complete the TLB write up */ - isync - - /* - * Prepare to jump to the 1:1 mapping. - * 1) Extract page size of the tmp mapping - * DSIZ = TLB_Word0[22:27] - * 2) Calculate the physical address of the address - * to jump to. - */ - rlwinm r10, r24, 0, 22, 27 - - cmpwi r10, PPC47x_TLB0_4K - bne 0f - li r10, 0x1000 /* r10 = 4k */ - bl 1f - -0: - /* Defaults to 256M */ - lis r10, 0x1000 - - bl 1f -1: mflr r4 - addi r4, r4, (2f-1b) /* virtual address of 2f */ - - subi r11, r10, 1 /* offsetmask = Pagesize - 1 */ - not r10, r11 /* Pagemask = ~(offsetmask) */ - - and r5, r25, r10 /* Physical page */ - and r6, r4, r11 /* offset within the current page */ - - or r5, r5, r6 /* Physical address for 2f */ - - /* Switch the TS in MSR to the original one */ - mfmsr r8 - insrwi r8, r7, 1, 26 - - mtspr SPRN_SRR1, r8 - mtspr SPRN_SRR0, r5 - rfi - -2: - /* Invalidate the tmp mapping */ - lis r3, 0x8000 /* Way '0' */ - - clrrwi r24, r24, 12 /* Clear the valid bit */ - tlbwe r24, r3, 0 - tlbwe r25, r3, 1 - tlbwe r26, r3, 2 - - /* Make sure we complete the TLB write and flush the shadow TLB */ - isync - -#endif - -ppc44x_map_done: - - - /* Restore the parameters */ - mr r3, r29 - mr r4, r30 - mr r5, r31 - - li r0, 0 -#else - li r0, 0 - - /* - * Set Machine Status Register to a known status, - * switch the MMU off and jump to 1: in a single step. - */ - - mr r8, r0 - ori r8, r8, MSR_RI|MSR_ME - mtspr SPRN_SRR1, r8 - addi r8, r4, 1f - relocate_new_kernel - mtspr SPRN_SRR0, r8 - sync - rfi - -1: -#endif - /* from this point address translation is turned off */ - /* and interrupts are disabled */ - - /* set a new stack at the bottom of our page... */ - /* (not really needed now) */ - addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ - stw r0, 0(r1) - - /* Do the copies */ - li r6, 0 /* checksum */ - mr r0, r3 - b 1f - -0: /* top, read another word for the indirection page */ - lwzu r0, 4(r3) - -1: - /* is it a destination page? (r8) */ - rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ - beq 2f - - rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ - b 0b - -2: /* is it an indirection page? (r3) */ - rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ - beq 2f - - rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ - subi r3, r3, 4 - b 0b - -2: /* are we done? */ - rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ - beq 2f - b 3f - -2: /* is it a source page? (r9) */ - rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ - beq 0b - - rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ - - li r7, PAGE_SIZE / 4 - mtctr r7 - subi r9, r9, 4 - subi r8, r8, 4 -9: - lwzu r0, 4(r9) /* do the copy */ - xor r6, r6, r0 - stwu r0, 4(r8) - dcbst 0, r8 - sync - icbi 0, r8 - bdnz 9b - - addi r9, r9, 4 - addi r8, r8, 4 - b 0b - -3: - - /* To be certain of avoiding problems with self-modifying code - * execute a serializing instruction here. - */ - isync - sync - - mfspr r3, SPRN_PIR /* current core we are running on */ - mr r4, r5 /* load physical address of chunk called */ - - /* jump to the entry point, usually the setup routine */ - mtlr r5 - blrl - -1: b 1b - -relocate_new_kernel_end: - - .globl relocate_new_kernel_size -relocate_new_kernel_size: - .long relocate_new_kernel_end - relocate_new_kernel diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c deleted file mode 100644 index 078fe3d76feb6..0000000000000 --- a/arch/powerpc/kernel/machine_kexec.c +++ /dev/null @@ -1,280 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Code to handle transition of Linux booting another kernel. - * - * Copyright (C) 2002-2003 Eric Biederman - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * Copyright (C) 2005 IBM Corporation. - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -void machine_kexec_mask_interrupts(void) { - unsigned int i; - struct irq_desc *desc; - - for_each_irq_desc(i, desc) { - struct irq_chip *chip; - - chip = irq_desc_get_chip(desc); - if (!chip) - continue; - - if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) - chip->irq_eoi(&desc->irq_data); - - if (chip->irq_mask) - chip->irq_mask(&desc->irq_data); - - if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) - chip->irq_disable(&desc->irq_data); - } -} - -void machine_crash_shutdown(struct pt_regs *regs) -{ - default_machine_crash_shutdown(regs); -} - -/* - * Do what every setup is needed on image and the - * reboot code buffer to allow us to avoid allocations - * later. - */ -int machine_kexec_prepare(struct kimage *image) -{ - if (ppc_md.machine_kexec_prepare) - return ppc_md.machine_kexec_prepare(image); - else - return default_machine_kexec_prepare(image); -} - -void machine_kexec_cleanup(struct kimage *image) -{ -} - -void arch_crash_save_vmcoreinfo(void) -{ - -#ifdef CONFIG_NEED_MULTIPLE_NODES - VMCOREINFO_SYMBOL(node_data); - VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); -#endif -#ifndef CONFIG_NEED_MULTIPLE_NODES - VMCOREINFO_SYMBOL(contig_page_data); -#endif -#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP) - VMCOREINFO_SYMBOL(vmemmap_list); - VMCOREINFO_SYMBOL(mmu_vmemmap_psize); - VMCOREINFO_SYMBOL(mmu_psize_defs); - VMCOREINFO_STRUCT_SIZE(vmemmap_backing); - VMCOREINFO_OFFSET(vmemmap_backing, list); - VMCOREINFO_OFFSET(vmemmap_backing, phys); - VMCOREINFO_OFFSET(vmemmap_backing, virt_addr); - VMCOREINFO_STRUCT_SIZE(mmu_psize_def); - VMCOREINFO_OFFSET(mmu_psize_def, shift); -#endif - vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); -} - -/* - * Do not allocate memory (or fail in any way) in machine_kexec(). - * We are past the point of no return, committed to rebooting now. - */ -void machine_kexec(struct kimage *image) -{ - int save_ftrace_enabled; - - save_ftrace_enabled = __ftrace_enabled_save(); - this_cpu_disable_ftrace(); - - if (ppc_md.machine_kexec) - ppc_md.machine_kexec(image); - else - default_machine_kexec(image); - - this_cpu_enable_ftrace(); - __ftrace_enabled_restore(save_ftrace_enabled); - - /* Fall back to normal restart if we're still alive. */ - machine_restart(NULL); - for(;;); -} - -void __init reserve_crashkernel(void) -{ - unsigned long long crash_size, crash_base; - int ret; - - /* use common parsing */ - ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), - &crash_size, &crash_base); - if (ret == 0 && crash_size > 0) { - crashk_res.start = crash_base; - crashk_res.end = crash_base + crash_size - 1; - } - - if (crashk_res.end == crashk_res.start) { - crashk_res.start = crashk_res.end = 0; - return; - } - - /* We might have got these values via the command line or the - * device tree, either way sanitise them now. */ - - crash_size = resource_size(&crashk_res); - -#ifndef CONFIG_NONSTATIC_KERNEL - if (crashk_res.start != KDUMP_KERNELBASE) - printk("Crash kernel location must be 0x%x\n", - KDUMP_KERNELBASE); - - crashk_res.start = KDUMP_KERNELBASE; -#else - if (!crashk_res.start) { -#ifdef CONFIG_PPC64 - /* - * On 64bit we split the RMO in half but cap it at half of - * a small SLB (128MB) since the crash kernel needs to place - * itself and some stacks to be in the first segment. - */ - crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); -#else - crashk_res.start = KDUMP_KERNELBASE; -#endif - } - - crash_base = PAGE_ALIGN(crashk_res.start); - if (crash_base != crashk_res.start) { - printk("Crash kernel base must be aligned to 0x%lx\n", - PAGE_SIZE); - crashk_res.start = crash_base; - } - -#endif - crash_size = PAGE_ALIGN(crash_size); - crashk_res.end = crashk_res.start + crash_size - 1; - - /* The crash region must not overlap the current kernel */ - if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { - printk(KERN_WARNING - "Crash kernel can not overlap current kernel\n"); - crashk_res.start = crashk_res.end = 0; - return; - } - - /* Crash kernel trumps memory limit */ - if (memory_limit && memory_limit <= crashk_res.end) { - memory_limit = crashk_res.end + 1; - printk("Adjusted memory limit for crashkernel, now 0x%llx\n", - memory_limit); - } - - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crashk_res.start >> 20), - (unsigned long)(memblock_phys_mem_size() >> 20)); - - if (!memblock_is_region_memory(crashk_res.start, crash_size) || - memblock_reserve(crashk_res.start, crash_size)) { - pr_err("Failed to reserve memory for crashkernel!\n"); - crashk_res.start = crashk_res.end = 0; - return; - } -} - -int overlaps_crashkernel(unsigned long start, unsigned long size) -{ - return (start + size) > crashk_res.start && start <= crashk_res.end; -} - -/* Values we need to export to the second kernel via the device tree. */ -static phys_addr_t kernel_end; -static phys_addr_t crashk_base; -static phys_addr_t crashk_size; -static unsigned long long mem_limit; - -static struct property kernel_end_prop = { - .name = "linux,kernel-end", - .length = sizeof(phys_addr_t), - .value = &kernel_end, -}; - -static struct property crashk_base_prop = { - .name = "linux,crashkernel-base", - .length = sizeof(phys_addr_t), - .value = &crashk_base -}; - -static struct property crashk_size_prop = { - .name = "linux,crashkernel-size", - .length = sizeof(phys_addr_t), - .value = &crashk_size, -}; - -static struct property memory_limit_prop = { - .name = "linux,memory-limit", - .length = sizeof(unsigned long long), - .value = &mem_limit, -}; - -#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) - -static void __init export_crashk_values(struct device_node *node) -{ - /* There might be existing crash kernel properties, but we can't - * be sure what's in them, so remove them. */ - of_remove_property(node, of_find_property(node, - "linux,crashkernel-base", NULL)); - of_remove_property(node, of_find_property(node, - "linux,crashkernel-size", NULL)); - - if (crashk_res.start != 0) { - crashk_base = cpu_to_be_ulong(crashk_res.start), - of_add_property(node, &crashk_base_prop); - crashk_size = cpu_to_be_ulong(resource_size(&crashk_res)); - of_add_property(node, &crashk_size_prop); - } - - /* - * memory_limit is required by the kexec-tools to limit the - * crash regions to the actual memory used. - */ - mem_limit = cpu_to_be_ulong(memory_limit); - of_update_property(node, &memory_limit_prop); -} - -static int __init kexec_setup(void) -{ - struct device_node *node; - - node = of_find_node_by_path("/chosen"); - if (!node) - return -ENOENT; - - /* remove any stale properties so ours can be found */ - of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL)); - - /* information needed by userspace when using default_machine_kexec */ - kernel_end = cpu_to_be_ulong(__pa(_end)); - of_add_property(node, &kernel_end_prop); - - export_crashk_values(node); - - of_node_put(node); - return 0; -} -late_initcall(kexec_setup); diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c deleted file mode 100644 index bf9f1f906d646..0000000000000 --- a/arch/powerpc/kernel/machine_kexec_32.c +++ /dev/null @@ -1,69 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * PPC32 code to handle Linux booting another kernel. - * - * Copyright (C) 2002-2003 Eric Biederman - * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz - * Copyright (C) 2005 IBM Corporation. - */ - -#include -#include -#include -#include -#include -#include - -typedef void (*relocate_new_kernel_t)( - unsigned long indirection_page, - unsigned long reboot_code_buffer, - unsigned long start_address) __noreturn; - -/* - * This is a generic machine_kexec function suitable at least for - * non-OpenFirmware embedded platforms. - * It merely copies the image relocation code to the control page and - * jumps to it. - * A platform specific function may just call this one. - */ -void default_machine_kexec(struct kimage *image) -{ - extern const unsigned int relocate_new_kernel_size; - unsigned long page_list; - unsigned long reboot_code_buffer, reboot_code_buffer_phys; - relocate_new_kernel_t rnk; - - /* Interrupts aren't acceptable while we reboot */ - local_irq_disable(); - - /* mask each interrupt so we are in a more sane state for the - * kexec kernel */ - machine_kexec_mask_interrupts(); - - page_list = image->head; - - /* we need both effective and real address here */ - reboot_code_buffer = - (unsigned long)page_address(image->control_code_page); - reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); - - /* copy our kernel relocation code to the control code page */ - memcpy((void *)reboot_code_buffer, relocate_new_kernel, - relocate_new_kernel_size); - - flush_icache_range(reboot_code_buffer, - reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); - printk(KERN_INFO "Bye!\n"); - - if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) - relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); - - /* now call it */ - rnk = (relocate_new_kernel_t) reboot_code_buffer; - (*rnk)(page_list, reboot_code_buffer_phys, image->start); -} - -int default_machine_kexec_prepare(struct kimage *image) -{ - return 0; -} diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c deleted file mode 100644 index 04a7cba58eff4..0000000000000 --- a/arch/powerpc/kernel/machine_kexec_64.c +++ /dev/null @@ -1,417 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * PPC64 code to handle Linux booting another kernel. - * - * Copyright (C) 2004-2005, IBM Corp. - * - * Created by: Milton D Miller II - */ - - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include /* _end */ -#include -#include -#include -#include -#include -#include - -int default_machine_kexec_prepare(struct kimage *image) -{ - int i; - unsigned long begin, end; /* limits of segment */ - unsigned long low, high; /* limits of blocked memory range */ - struct device_node *node; - const unsigned long *basep; - const unsigned int *sizep; - - /* - * Since we use the kernel fault handlers and paging code to - * handle the virtual mode, we must make sure no destination - * overlaps kernel static data or bss. - */ - for (i = 0; i < image->nr_segments; i++) - if (image->segment[i].mem < __pa(_end)) - return -ETXTBSY; - - /* We also should not overwrite the tce tables */ - for_each_node_by_type(node, "pci") { - basep = of_get_property(node, "linux,tce-base", NULL); - sizep = of_get_property(node, "linux,tce-size", NULL); - if (basep == NULL || sizep == NULL) - continue; - - low = *basep; - high = low + (*sizep); - - for (i = 0; i < image->nr_segments; i++) { - begin = image->segment[i].mem; - end = begin + image->segment[i].memsz; - - if ((begin < high) && (end > low)) - return -ETXTBSY; - } - } - - return 0; -} - -static void copy_segments(unsigned long ind) -{ - unsigned long entry; - unsigned long *ptr; - void *dest; - void *addr; - - /* - * We rely on kexec_load to create a lists that properly - * initializes these pointers before they are used. - * We will still crash if the list is wrong, but at least - * the compiler will be quiet. - */ - ptr = NULL; - dest = NULL; - - for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { - addr = __va(entry & PAGE_MASK); - - switch (entry & IND_FLAGS) { - case IND_DESTINATION: - dest = addr; - break; - case IND_INDIRECTION: - ptr = addr; - break; - case IND_SOURCE: - copy_page(dest, addr); - dest += PAGE_SIZE; - } - } -} - -void kexec_copy_flush(struct kimage *image) -{ - long i, nr_segments = image->nr_segments; - struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; - - /* save the ranges on the stack to efficiently flush the icache */ - memcpy(ranges, image->segment, sizeof(ranges)); - - /* - * After this call we may not use anything allocated in dynamic - * memory, including *image. - * - * Only globals and the stack are allowed. - */ - copy_segments(image->head); - - /* - * we need to clear the icache for all dest pages sometime, - * including ones that were in place on the original copy - */ - for (i = 0; i < nr_segments; i++) - flush_icache_range((unsigned long)__va(ranges[i].mem), - (unsigned long)__va(ranges[i].mem + ranges[i].memsz)); -} - -#ifdef CONFIG_SMP - -static int kexec_all_irq_disabled = 0; - -static void kexec_smp_down(void *arg) -{ - local_irq_disable(); - hard_irq_disable(); - - mb(); /* make sure our irqs are disabled before we say they are */ - get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; - while(kexec_all_irq_disabled == 0) - cpu_relax(); - mb(); /* make sure all irqs are disabled before this */ - hw_breakpoint_disable(); - /* - * Now every CPU has IRQs off, we can clear out any pending - * IPIs and be sure that no more will come in after this. - */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 1); - - kexec_smp_wait(); - /* NOTREACHED */ -} - -static void kexec_prepare_cpus_wait(int wait_state) -{ - int my_cpu, i, notified=-1; - - hw_breakpoint_disable(); - my_cpu = get_cpu(); - /* Make sure each CPU has at least made it to the state we need. - * - * FIXME: There is a (slim) chance of a problem if not all of the CPUs - * are correctly onlined. If somehow we start a CPU on boot with RTAS - * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in - * time, the boot CPU will timeout. If it does eventually execute - * stuff, the secondary will start up (paca_ptrs[]->cpu_start was - * written) and get into a peculiar state. - * If the platform supports smp_ops->take_timebase(), the secondary CPU - * will probably be spinning in there. If not (i.e. pseries), the - * secondary will continue on and try to online itself/idle/etc. If it - * survives that, we need to find these - * possible-but-not-online-but-should-be CPUs and chaperone them into - * kexec_smp_wait(). - */ - for_each_online_cpu(i) { - if (i == my_cpu) - continue; - - while (paca_ptrs[i]->kexec_state < wait_state) { - barrier(); - if (i != notified) { - printk(KERN_INFO "kexec: waiting for cpu %d " - "(physical %d) to enter %i state\n", - i, paca_ptrs[i]->hw_cpu_id, wait_state); - notified = i; - } - } - } - mb(); -} - -/* - * We need to make sure each present CPU is online. The next kernel will scan - * the device tree and assume primary threads are online and query secondary - * threads via RTAS to online them if required. If we don't online primary - * threads, they will be stuck. However, we also online secondary threads as we - * may be using 'cede offline'. In this case RTAS doesn't see the secondary - * threads as offline -- and again, these CPUs will be stuck. - * - * So, we online all CPUs that should be running, including secondary threads. - */ -static void wake_offline_cpus(void) -{ - int cpu = 0; - - for_each_present_cpu(cpu) { - if (!cpu_online(cpu)) { - printk(KERN_INFO "kexec: Waking offline cpu %d.\n", - cpu); - WARN_ON(cpu_up(cpu)); - } - } -} - -static void kexec_prepare_cpus(void) -{ - wake_offline_cpus(); - smp_call_function(kexec_smp_down, NULL, /* wait */0); - local_irq_disable(); - hard_irq_disable(); - - mb(); /* make sure IRQs are disabled before we say they are */ - get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; - - kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); - /* we are sure every CPU has IRQs off at this point */ - kexec_all_irq_disabled = 1; - - /* - * Before removing MMU mappings make sure all CPUs have entered real - * mode: - */ - kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); - - /* after we tell the others to go down */ - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - - put_cpu(); -} - -#else /* ! SMP */ - -static void kexec_prepare_cpus(void) -{ - /* - * move the secondarys to us so that we can copy - * the new kernel 0-0x100 safely - * - * do this if kexec in setup.c ? - * - * We need to release the cpus if we are ever going from an - * UP to an SMP kernel. - */ - smp_release_cpus(); - if (ppc_md.kexec_cpu_down) - ppc_md.kexec_cpu_down(0, 0); - local_irq_disable(); - hard_irq_disable(); -} - -#endif /* SMP */ - -/* - * kexec thread structure and stack. - * - * We need to make sure that this is 16384-byte aligned due to the - * way process stacks are handled. It also must be statically allocated - * or allocated as part of the kimage, because everything else may be - * overwritten when we copy the kexec image. We piggyback on the - * "init_task" linker section here to statically allocate a stack. - * - * We could use a smaller stack if we don't care about anything using - * current, but that audit has not been performed. - */ -static union thread_union kexec_stack __init_task_data = - { }; - -/* - * For similar reasons to the stack above, the kexecing CPU needs to be on a - * static PACA; we switch to kexec_paca. - */ -struct paca_struct kexec_paca; - -/* Our assembly helper, in misc_64.S */ -extern void kexec_sequence(void *newstack, unsigned long start, - void *image, void *control, - void (*clear_all)(void), - bool copy_with_mmu_off) __noreturn; - -/* too late to fail here */ -void default_machine_kexec(struct kimage *image) -{ - bool copy_with_mmu_off; - - /* prepare control code if any */ - - /* - * If the kexec boot is the normal one, need to shutdown other cpus - * into our wait loop and quiesce interrupts. - * Otherwise, in the case of crashed mode (crashing_cpu >= 0), - * stopping other CPUs and collecting their pt_regs is done before - * using debugger IPI. - */ - - if (!kdump_in_progress()) - kexec_prepare_cpus(); - - printk("kexec: Starting switchover sequence.\n"); - - /* switch to a staticly allocated stack. Based on irq stack code. - * We setup preempt_count to avoid using VMX in memcpy. - * XXX: the task struct will likely be invalid once we do the copy! - */ - current_thread_info()->flags = 0; - current_thread_info()->preempt_count = HARDIRQ_OFFSET; - - /* We need a static PACA, too; copy this CPU's PACA over and switch to - * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using - * non-static data. - */ - memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); - kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; -#ifdef CONFIG_PPC_PSERIES - kexec_paca.lppaca_ptr = NULL; -#endif - - if (is_secure_guest() && !(image->preserve_context || - image->type == KEXEC_TYPE_CRASH)) { - uv_unshare_all_pages(); - printk("kexec: Unshared all shared pages.\n"); - } - - paca_ptrs[kexec_paca.paca_index] = &kexec_paca; - - setup_paca(&kexec_paca); - - /* - * The lppaca should be unregistered at this point so the HV won't - * touch it. In the case of a crash, none of the lppacas are - * unregistered so there is not much we can do about it here. - */ - - /* - * On Book3S, the copy must happen with the MMU off if we are either - * using Radix page tables or we are not in an LPAR since we can - * overwrite the page tables while copying. - * - * In an LPAR, we keep the MMU on otherwise we can't access beyond - * the RMA. On BookE there is no real MMU off mode, so we have to - * keep it enabled as well (but then we have bolted TLB entries). - */ -#ifdef CONFIG_PPC_BOOK3E - copy_with_mmu_off = false; -#else - copy_with_mmu_off = radix_enabled() || - !(firmware_has_feature(FW_FEATURE_LPAR) || - firmware_has_feature(FW_FEATURE_PS3_LV1)); -#endif - - /* Some things are best done in assembly. Finding globals with - * a toc is easier in C, so pass in what we can. - */ - kexec_sequence(&kexec_stack, image->start, image, - page_address(image->control_code_page), - mmu_cleanup_all, copy_with_mmu_off); - /* NOTREACHED */ -} - -#ifdef CONFIG_PPC_BOOK3S_64 -/* Values we need to export to the second kernel via the device tree. */ -static unsigned long htab_base; -static unsigned long htab_size; - -static struct property htab_base_prop = { - .name = "linux,htab-base", - .length = sizeof(unsigned long), - .value = &htab_base, -}; - -static struct property htab_size_prop = { - .name = "linux,htab-size", - .length = sizeof(unsigned long), - .value = &htab_size, -}; - -static int __init export_htab_values(void) -{ - struct device_node *node; - - /* On machines with no htab htab_address is NULL */ - if (!htab_address) - return -ENODEV; - - node = of_find_node_by_path("/chosen"); - if (!node) - return -ENODEV; - - /* remove any stale propertys so ours can be found */ - of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL)); - of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL)); - - htab_base = cpu_to_be64(__pa(htab_address)); - of_add_property(node, &htab_base_prop); - htab_size = cpu_to_be64(htab_size_bytes); - of_add_property(node, &htab_size_prop); - - of_node_put(node); - return 0; -} -late_initcall(export_htab_values); -#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c deleted file mode 100644 index 143c91724617e..0000000000000 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ /dev/null @@ -1,254 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * ppc64 code to implement the kexec_file_load syscall - * - * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) - * Copyright (C) 2004 IBM Corp. - * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation - * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) - * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) - * Copyright (C) 2016 IBM Corporation - * - * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. - * Heavily modified for the kernel by - * Thiago Jung Bauermann . - */ - -#include -#include -#include -#include -#include - -#define SLAVE_CODE_SIZE 256 - -const struct kexec_file_ops * const kexec_file_loaders[] = { - &kexec_elf64_ops, - NULL -}; - -int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, - unsigned long buf_len) -{ - /* We don't support crash kernels yet. */ - if (image->type == KEXEC_TYPE_CRASH) - return -EOPNOTSUPP; - - return kexec_image_probe_default(image, buf, buf_len); -} - -/** - * setup_purgatory - initialize the purgatory's global variables - * @image: kexec image. - * @slave_code: Slave code for the purgatory. - * @fdt: Flattened device tree for the next kernel. - * @kernel_load_addr: Address where the kernel is loaded. - * @fdt_load_addr: Address where the flattened device tree is loaded. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_purgatory(struct kimage *image, const void *slave_code, - const void *fdt, unsigned long kernel_load_addr, - unsigned long fdt_load_addr) -{ - unsigned int *slave_code_buf, master_entry; - int ret; - - slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); - if (!slave_code_buf) - return -ENOMEM; - - /* Get the slave code from the new kernel and put it in purgatory. */ - ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", - slave_code_buf, SLAVE_CODE_SIZE, - true); - if (ret) { - kfree(slave_code_buf); - return ret; - } - - master_entry = slave_code_buf[0]; - memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); - slave_code_buf[0] = master_entry; - ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", - slave_code_buf, SLAVE_CODE_SIZE, - false); - kfree(slave_code_buf); - - ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, - sizeof(kernel_load_addr), false); - if (ret) - return ret; - ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, - sizeof(fdt_load_addr), false); - if (ret) - return ret; - - return 0; -} - -/** - * delete_fdt_mem_rsv - delete memory reservation with given address and size - * - * Return: 0 on success, or negative errno on error. - */ -int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) -{ - int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); - - for (i = 0; i < num_rsvs; i++) { - uint64_t rsv_start, rsv_size; - - ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); - if (ret) { - pr_err("Malformed device tree.\n"); - return -EINVAL; - } - - if (rsv_start == start && rsv_size == size) { - ret = fdt_del_mem_rsv(fdt, i); - if (ret) { - pr_err("Error deleting device tree reservation.\n"); - return -EINVAL; - } - - return 0; - } - } - - return -ENOENT; -} - -/* - * setup_new_fdt - modify /chosen and memory reservation for the next kernel - * @image: kexec image being loaded. - * @fdt: Flattened device tree for the next kernel. - * @initrd_load_addr: Address where the next initrd will be loaded. - * @initrd_len: Size of the next initrd, or 0 if there will be none. - * @cmdline: Command line for the next kernel, or NULL if there will - * be none. - * - * Return: 0 on success, or negative errno on error. - */ -int setup_new_fdt(const struct kimage *image, void *fdt, - unsigned long initrd_load_addr, unsigned long initrd_len, - const char *cmdline) -{ - int ret, chosen_node; - const void *prop; - - /* Remove memory reservation for the current device tree. */ - ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), - fdt_totalsize(initial_boot_params)); - if (ret == 0) - pr_debug("Removed old device tree reservation.\n"); - else if (ret != -ENOENT) - return ret; - - chosen_node = fdt_path_offset(fdt, "/chosen"); - if (chosen_node == -FDT_ERR_NOTFOUND) { - chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), - "chosen"); - if (chosen_node < 0) { - pr_err("Error creating /chosen.\n"); - return -EINVAL; - } - } else if (chosen_node < 0) { - pr_err("Malformed device tree: error reading /chosen.\n"); - return -EINVAL; - } - - /* Did we boot using an initrd? */ - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); - if (prop) { - uint64_t tmp_start, tmp_end, tmp_size; - - tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); - - prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); - if (!prop) { - pr_err("Malformed device tree.\n"); - return -EINVAL; - } - tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); - - /* - * kexec reserves exact initrd size, while firmware may - * reserve a multiple of PAGE_SIZE, so check for both. - */ - tmp_size = tmp_end - tmp_start; - ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); - if (ret == -ENOENT) - ret = delete_fdt_mem_rsv(fdt, tmp_start, - round_up(tmp_size, PAGE_SIZE)); - if (ret == 0) - pr_debug("Removed old initrd reservation.\n"); - else if (ret != -ENOENT) - return ret; - - /* If there's no new initrd, delete the old initrd's info. */ - if (initrd_len == 0) { - ret = fdt_delprop(fdt, chosen_node, - "linux,initrd-start"); - if (ret) { - pr_err("Error deleting linux,initrd-start.\n"); - return -EINVAL; - } - - ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); - if (ret) { - pr_err("Error deleting linux,initrd-end.\n"); - return -EINVAL; - } - } - } - - if (initrd_len) { - ret = fdt_setprop_u64(fdt, chosen_node, - "linux,initrd-start", - initrd_load_addr); - if (ret < 0) - goto err; - - /* initrd-end is the first address after the initrd image. */ - ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", - initrd_load_addr + initrd_len); - if (ret < 0) - goto err; - - ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); - if (ret) { - pr_err("Error reserving initrd memory: %s\n", - fdt_strerror(ret)); - return -EINVAL; - } - } - - if (cmdline != NULL) { - ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); - if (ret < 0) - goto err; - } else { - ret = fdt_delprop(fdt, chosen_node, "bootargs"); - if (ret && ret != -FDT_ERR_NOTFOUND) { - pr_err("Error deleting bootargs.\n"); - return -EINVAL; - } - } - - ret = setup_ima_buffer(image, fdt, chosen_node); - if (ret) { - pr_err("Error setting up the new device tree.\n"); - return ret; - } - - ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); - if (ret) - goto err; - - return 0; - -err: - pr_err("Error setting up the new device tree.\n"); - return -EINVAL; -} diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile new file mode 100644 index 0000000000000..16c1c5a195198 --- /dev/null +++ b/arch/powerpc/kexec/Makefile @@ -0,0 +1,25 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux kernel. +# + +# Disable clang warning for using setjmp without setjmp.h header +CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) + +obj-y += core.o crash.o core_$(BITS).o + +obj-$(CONFIG_PPC32) += relocate_32.o + +obj-$(CONFIG_KEXEC_FILE) += file_load.o elf_$(BITS).o + +ifdef CONFIG_HAVE_IMA_KEXEC +ifdef CONFIG_IMA +obj-y += ima.o +endif +endif + + +# Disable GCOV, KCOV & sanitizers in odd or sensitive code +GCOV_PROFILE_core_$(BITS).o := n +KCOV_INSTRUMENT_core_$(BITS).o := n +UBSAN_SANITIZE_core_$(BITS).o := n diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c new file mode 100644 index 0000000000000..078fe3d76feb6 --- /dev/null +++ b/arch/powerpc/kexec/core.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Code to handle transition of Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +void machine_kexec_mask_interrupts(void) { + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + +void machine_crash_shutdown(struct pt_regs *regs) +{ + default_machine_crash_shutdown(regs); +} + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + if (ppc_md.machine_kexec_prepare) + return ppc_md.machine_kexec_prepare(image); + else + return default_machine_kexec_prepare(image); +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +void arch_crash_save_vmcoreinfo(void) +{ + +#ifdef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(node_data); + VMCOREINFO_LENGTH(node_data, MAX_NUMNODES); +#endif +#ifndef CONFIG_NEED_MULTIPLE_NODES + VMCOREINFO_SYMBOL(contig_page_data); +#endif +#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP) + VMCOREINFO_SYMBOL(vmemmap_list); + VMCOREINFO_SYMBOL(mmu_vmemmap_psize); + VMCOREINFO_SYMBOL(mmu_psize_defs); + VMCOREINFO_STRUCT_SIZE(vmemmap_backing); + VMCOREINFO_OFFSET(vmemmap_backing, list); + VMCOREINFO_OFFSET(vmemmap_backing, phys); + VMCOREINFO_OFFSET(vmemmap_backing, virt_addr); + VMCOREINFO_STRUCT_SIZE(mmu_psize_def); + VMCOREINFO_OFFSET(mmu_psize_def, shift); +#endif + vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +void machine_kexec(struct kimage *image) +{ + int save_ftrace_enabled; + + save_ftrace_enabled = __ftrace_enabled_save(); + this_cpu_disable_ftrace(); + + if (ppc_md.machine_kexec) + ppc_md.machine_kexec(image); + else + default_machine_kexec(image); + + this_cpu_enable_ftrace(); + __ftrace_enabled_restore(save_ftrace_enabled); + + /* Fall back to normal restart if we're still alive. */ + machine_restart(NULL); + for(;;); +} + +void __init reserve_crashkernel(void) +{ + unsigned long long crash_size, crash_base; + int ret; + + /* use common parsing */ + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &crash_size, &crash_base); + if (ret == 0 && crash_size > 0) { + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; + } + + if (crashk_res.end == crashk_res.start) { + crashk_res.start = crashk_res.end = 0; + return; + } + + /* We might have got these values via the command line or the + * device tree, either way sanitise them now. */ + + crash_size = resource_size(&crashk_res); + +#ifndef CONFIG_NONSTATIC_KERNEL + if (crashk_res.start != KDUMP_KERNELBASE) + printk("Crash kernel location must be 0x%x\n", + KDUMP_KERNELBASE); + + crashk_res.start = KDUMP_KERNELBASE; +#else + if (!crashk_res.start) { +#ifdef CONFIG_PPC64 + /* + * On 64bit we split the RMO in half but cap it at half of + * a small SLB (128MB) since the crash kernel needs to place + * itself and some stacks to be in the first segment. + */ + crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2)); +#else + crashk_res.start = KDUMP_KERNELBASE; +#endif + } + + crash_base = PAGE_ALIGN(crashk_res.start); + if (crash_base != crashk_res.start) { + printk("Crash kernel base must be aligned to 0x%lx\n", + PAGE_SIZE); + crashk_res.start = crash_base; + } + +#endif + crash_size = PAGE_ALIGN(crash_size); + crashk_res.end = crashk_res.start + crash_size - 1; + + /* The crash region must not overlap the current kernel */ + if (overlaps_crashkernel(__pa(_stext), _end - _stext)) { + printk(KERN_WARNING + "Crash kernel can not overlap current kernel\n"); + crashk_res.start = crashk_res.end = 0; + return; + } + + /* Crash kernel trumps memory limit */ + if (memory_limit && memory_limit <= crashk_res.end) { + memory_limit = crashk_res.end + 1; + printk("Adjusted memory limit for crashkernel, now 0x%llx\n", + memory_limit); + } + + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crashk_res.start >> 20), + (unsigned long)(memblock_phys_mem_size() >> 20)); + + if (!memblock_is_region_memory(crashk_res.start, crash_size) || + memblock_reserve(crashk_res.start, crash_size)) { + pr_err("Failed to reserve memory for crashkernel!\n"); + crashk_res.start = crashk_res.end = 0; + return; + } +} + +int overlaps_crashkernel(unsigned long start, unsigned long size) +{ + return (start + size) > crashk_res.start && start <= crashk_res.end; +} + +/* Values we need to export to the second kernel via the device tree. */ +static phys_addr_t kernel_end; +static phys_addr_t crashk_base; +static phys_addr_t crashk_size; +static unsigned long long mem_limit; + +static struct property kernel_end_prop = { + .name = "linux,kernel-end", + .length = sizeof(phys_addr_t), + .value = &kernel_end, +}; + +static struct property crashk_base_prop = { + .name = "linux,crashkernel-base", + .length = sizeof(phys_addr_t), + .value = &crashk_base +}; + +static struct property crashk_size_prop = { + .name = "linux,crashkernel-size", + .length = sizeof(phys_addr_t), + .value = &crashk_size, +}; + +static struct property memory_limit_prop = { + .name = "linux,memory-limit", + .length = sizeof(unsigned long long), + .value = &mem_limit, +}; + +#define cpu_to_be_ulong __PASTE(cpu_to_be, BITS_PER_LONG) + +static void __init export_crashk_values(struct device_node *node) +{ + /* There might be existing crash kernel properties, but we can't + * be sure what's in them, so remove them. */ + of_remove_property(node, of_find_property(node, + "linux,crashkernel-base", NULL)); + of_remove_property(node, of_find_property(node, + "linux,crashkernel-size", NULL)); + + if (crashk_res.start != 0) { + crashk_base = cpu_to_be_ulong(crashk_res.start), + of_add_property(node, &crashk_base_prop); + crashk_size = cpu_to_be_ulong(resource_size(&crashk_res)); + of_add_property(node, &crashk_size_prop); + } + + /* + * memory_limit is required by the kexec-tools to limit the + * crash regions to the actual memory used. + */ + mem_limit = cpu_to_be_ulong(memory_limit); + of_update_property(node, &memory_limit_prop); +} + +static int __init kexec_setup(void) +{ + struct device_node *node; + + node = of_find_node_by_path("/chosen"); + if (!node) + return -ENOENT; + + /* remove any stale properties so ours can be found */ + of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL)); + + /* information needed by userspace when using default_machine_kexec */ + kernel_end = cpu_to_be_ulong(__pa(_end)); + of_add_property(node, &kernel_end_prop); + + export_crashk_values(node); + + of_node_put(node); + return 0; +} +late_initcall(kexec_setup); diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c new file mode 100644 index 0000000000000..bf9f1f906d646 --- /dev/null +++ b/arch/powerpc/kexec/core_32.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PPC32 code to handle Linux booting another kernel. + * + * Copyright (C) 2002-2003 Eric Biederman + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * Copyright (C) 2005 IBM Corporation. + */ + +#include +#include +#include +#include +#include +#include + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long reboot_code_buffer, + unsigned long start_address) __noreturn; + +/* + * This is a generic machine_kexec function suitable at least for + * non-OpenFirmware embedded platforms. + * It merely copies the image relocation code to the control page and + * jumps to it. + * A platform specific function may just call this one. + */ +void default_machine_kexec(struct kimage *image) +{ + extern const unsigned int relocate_new_kernel_size; + unsigned long page_list; + unsigned long reboot_code_buffer, reboot_code_buffer_phys; + relocate_new_kernel_t rnk; + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* mask each interrupt so we are in a more sane state for the + * kexec kernel */ + machine_kexec_mask_interrupts(); + + page_list = image->head; + + /* we need both effective and real address here */ + reboot_code_buffer = + (unsigned long)page_address(image->control_code_page); + reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer); + + /* copy our kernel relocation code to the control code page */ + memcpy((void *)reboot_code_buffer, relocate_new_kernel, + relocate_new_kernel_size); + + flush_icache_range(reboot_code_buffer, + reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); + printk(KERN_INFO "Bye!\n"); + + if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x)) + relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start); + + /* now call it */ + rnk = (relocate_new_kernel_t) reboot_code_buffer; + (*rnk)(page_list, reboot_code_buffer_phys, image->start); +} + +int default_machine_kexec_prepare(struct kimage *image) +{ + return 0; +} diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c new file mode 100644 index 0000000000000..04a7cba58eff4 --- /dev/null +++ b/arch/powerpc/kexec/core_64.c @@ -0,0 +1,417 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * PPC64 code to handle Linux booting another kernel. + * + * Copyright (C) 2004-2005, IBM Corp. + * + * Created by: Milton D Miller II + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include /* _end */ +#include +#include +#include +#include +#include +#include + +int default_machine_kexec_prepare(struct kimage *image) +{ + int i; + unsigned long begin, end; /* limits of segment */ + unsigned long low, high; /* limits of blocked memory range */ + struct device_node *node; + const unsigned long *basep; + const unsigned int *sizep; + + /* + * Since we use the kernel fault handlers and paging code to + * handle the virtual mode, we must make sure no destination + * overlaps kernel static data or bss. + */ + for (i = 0; i < image->nr_segments; i++) + if (image->segment[i].mem < __pa(_end)) + return -ETXTBSY; + + /* We also should not overwrite the tce tables */ + for_each_node_by_type(node, "pci") { + basep = of_get_property(node, "linux,tce-base", NULL); + sizep = of_get_property(node, "linux,tce-size", NULL); + if (basep == NULL || sizep == NULL) + continue; + + low = *basep; + high = low + (*sizep); + + for (i = 0; i < image->nr_segments; i++) { + begin = image->segment[i].mem; + end = begin + image->segment[i].memsz; + + if ((begin < high) && (end > low)) + return -ETXTBSY; + } + } + + return 0; +} + +static void copy_segments(unsigned long ind) +{ + unsigned long entry; + unsigned long *ptr; + void *dest; + void *addr; + + /* + * We rely on kexec_load to create a lists that properly + * initializes these pointers before they are used. + * We will still crash if the list is wrong, but at least + * the compiler will be quiet. + */ + ptr = NULL; + dest = NULL; + + for (entry = ind; !(entry & IND_DONE); entry = *ptr++) { + addr = __va(entry & PAGE_MASK); + + switch (entry & IND_FLAGS) { + case IND_DESTINATION: + dest = addr; + break; + case IND_INDIRECTION: + ptr = addr; + break; + case IND_SOURCE: + copy_page(dest, addr); + dest += PAGE_SIZE; + } + } +} + +void kexec_copy_flush(struct kimage *image) +{ + long i, nr_segments = image->nr_segments; + struct kexec_segment ranges[KEXEC_SEGMENT_MAX]; + + /* save the ranges on the stack to efficiently flush the icache */ + memcpy(ranges, image->segment, sizeof(ranges)); + + /* + * After this call we may not use anything allocated in dynamic + * memory, including *image. + * + * Only globals and the stack are allowed. + */ + copy_segments(image->head); + + /* + * we need to clear the icache for all dest pages sometime, + * including ones that were in place on the original copy + */ + for (i = 0; i < nr_segments; i++) + flush_icache_range((unsigned long)__va(ranges[i].mem), + (unsigned long)__va(ranges[i].mem + ranges[i].memsz)); +} + +#ifdef CONFIG_SMP + +static int kexec_all_irq_disabled = 0; + +static void kexec_smp_down(void *arg) +{ + local_irq_disable(); + hard_irq_disable(); + + mb(); /* make sure our irqs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + while(kexec_all_irq_disabled == 0) + cpu_relax(); + mb(); /* make sure all irqs are disabled before this */ + hw_breakpoint_disable(); + /* + * Now every CPU has IRQs off, we can clear out any pending + * IPIs and be sure that no more will come in after this. + */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 1); + + kexec_smp_wait(); + /* NOTREACHED */ +} + +static void kexec_prepare_cpus_wait(int wait_state) +{ + int my_cpu, i, notified=-1; + + hw_breakpoint_disable(); + my_cpu = get_cpu(); + /* Make sure each CPU has at least made it to the state we need. + * + * FIXME: There is a (slim) chance of a problem if not all of the CPUs + * are correctly onlined. If somehow we start a CPU on boot with RTAS + * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in + * time, the boot CPU will timeout. If it does eventually execute + * stuff, the secondary will start up (paca_ptrs[]->cpu_start was + * written) and get into a peculiar state. + * If the platform supports smp_ops->take_timebase(), the secondary CPU + * will probably be spinning in there. If not (i.e. pseries), the + * secondary will continue on and try to online itself/idle/etc. If it + * survives that, we need to find these + * possible-but-not-online-but-should-be CPUs and chaperone them into + * kexec_smp_wait(). + */ + for_each_online_cpu(i) { + if (i == my_cpu) + continue; + + while (paca_ptrs[i]->kexec_state < wait_state) { + barrier(); + if (i != notified) { + printk(KERN_INFO "kexec: waiting for cpu %d " + "(physical %d) to enter %i state\n", + i, paca_ptrs[i]->hw_cpu_id, wait_state); + notified = i; + } + } + } + mb(); +} + +/* + * We need to make sure each present CPU is online. The next kernel will scan + * the device tree and assume primary threads are online and query secondary + * threads via RTAS to online them if required. If we don't online primary + * threads, they will be stuck. However, we also online secondary threads as we + * may be using 'cede offline'. In this case RTAS doesn't see the secondary + * threads as offline -- and again, these CPUs will be stuck. + * + * So, we online all CPUs that should be running, including secondary threads. + */ +static void wake_offline_cpus(void) +{ + int cpu = 0; + + for_each_present_cpu(cpu) { + if (!cpu_online(cpu)) { + printk(KERN_INFO "kexec: Waking offline cpu %d.\n", + cpu); + WARN_ON(cpu_up(cpu)); + } + } +} + +static void kexec_prepare_cpus(void) +{ + wake_offline_cpus(); + smp_call_function(kexec_smp_down, NULL, /* wait */0); + local_irq_disable(); + hard_irq_disable(); + + mb(); /* make sure IRQs are disabled before we say they are */ + get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF; + + kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF); + /* we are sure every CPU has IRQs off at this point */ + kexec_all_irq_disabled = 1; + + /* + * Before removing MMU mappings make sure all CPUs have entered real + * mode: + */ + kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE); + + /* after we tell the others to go down */ + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + + put_cpu(); +} + +#else /* ! SMP */ + +static void kexec_prepare_cpus(void) +{ + /* + * move the secondarys to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + * + * We need to release the cpus if we are ever going from an + * UP to an SMP kernel. + */ + smp_release_cpus(); + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(0, 0); + local_irq_disable(); + hard_irq_disable(); +} + +#endif /* SMP */ + +/* + * kexec thread structure and stack. + * + * We need to make sure that this is 16384-byte aligned due to the + * way process stacks are handled. It also must be statically allocated + * or allocated as part of the kimage, because everything else may be + * overwritten when we copy the kexec image. We piggyback on the + * "init_task" linker section here to statically allocate a stack. + * + * We could use a smaller stack if we don't care about anything using + * current, but that audit has not been performed. + */ +static union thread_union kexec_stack __init_task_data = + { }; + +/* + * For similar reasons to the stack above, the kexecing CPU needs to be on a + * static PACA; we switch to kexec_paca. + */ +struct paca_struct kexec_paca; + +/* Our assembly helper, in misc_64.S */ +extern void kexec_sequence(void *newstack, unsigned long start, + void *image, void *control, + void (*clear_all)(void), + bool copy_with_mmu_off) __noreturn; + +/* too late to fail here */ +void default_machine_kexec(struct kimage *image) +{ + bool copy_with_mmu_off; + + /* prepare control code if any */ + + /* + * If the kexec boot is the normal one, need to shutdown other cpus + * into our wait loop and quiesce interrupts. + * Otherwise, in the case of crashed mode (crashing_cpu >= 0), + * stopping other CPUs and collecting their pt_regs is done before + * using debugger IPI. + */ + + if (!kdump_in_progress()) + kexec_prepare_cpus(); + + printk("kexec: Starting switchover sequence.\n"); + + /* switch to a staticly allocated stack. Based on irq stack code. + * We setup preempt_count to avoid using VMX in memcpy. + * XXX: the task struct will likely be invalid once we do the copy! + */ + current_thread_info()->flags = 0; + current_thread_info()->preempt_count = HARDIRQ_OFFSET; + + /* We need a static PACA, too; copy this CPU's PACA over and switch to + * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using + * non-static data. + */ + memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct)); + kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL; +#ifdef CONFIG_PPC_PSERIES + kexec_paca.lppaca_ptr = NULL; +#endif + + if (is_secure_guest() && !(image->preserve_context || + image->type == KEXEC_TYPE_CRASH)) { + uv_unshare_all_pages(); + printk("kexec: Unshared all shared pages.\n"); + } + + paca_ptrs[kexec_paca.paca_index] = &kexec_paca; + + setup_paca(&kexec_paca); + + /* + * The lppaca should be unregistered at this point so the HV won't + * touch it. In the case of a crash, none of the lppacas are + * unregistered so there is not much we can do about it here. + */ + + /* + * On Book3S, the copy must happen with the MMU off if we are either + * using Radix page tables or we are not in an LPAR since we can + * overwrite the page tables while copying. + * + * In an LPAR, we keep the MMU on otherwise we can't access beyond + * the RMA. On BookE there is no real MMU off mode, so we have to + * keep it enabled as well (but then we have bolted TLB entries). + */ +#ifdef CONFIG_PPC_BOOK3E + copy_with_mmu_off = false; +#else + copy_with_mmu_off = radix_enabled() || + !(firmware_has_feature(FW_FEATURE_LPAR) || + firmware_has_feature(FW_FEATURE_PS3_LV1)); +#endif + + /* Some things are best done in assembly. Finding globals with + * a toc is easier in C, so pass in what we can. + */ + kexec_sequence(&kexec_stack, image->start, image, + page_address(image->control_code_page), + mmu_cleanup_all, copy_with_mmu_off); + /* NOTREACHED */ +} + +#ifdef CONFIG_PPC_BOOK3S_64 +/* Values we need to export to the second kernel via the device tree. */ +static unsigned long htab_base; +static unsigned long htab_size; + +static struct property htab_base_prop = { + .name = "linux,htab-base", + .length = sizeof(unsigned long), + .value = &htab_base, +}; + +static struct property htab_size_prop = { + .name = "linux,htab-size", + .length = sizeof(unsigned long), + .value = &htab_size, +}; + +static int __init export_htab_values(void) +{ + struct device_node *node; + + /* On machines with no htab htab_address is NULL */ + if (!htab_address) + return -ENODEV; + + node = of_find_node_by_path("/chosen"); + if (!node) + return -ENODEV; + + /* remove any stale propertys so ours can be found */ + of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL)); + of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL)); + + htab_base = cpu_to_be64(__pa(htab_address)); + of_add_property(node, &htab_base_prop); + htab_size = cpu_to_be64(htab_size_bytes); + of_add_property(node, &htab_size_prop); + + of_node_put(node); + return 0; +} +late_initcall(export_htab_values); +#endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c new file mode 100644 index 0000000000000..d488311efab1f --- /dev/null +++ b/arch/powerpc/kexec/crash.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Architecture specific (PPC64) functions for kexec based crash dumps. + * + * Copyright (C) 2005, IBM Corp. + * + * Created by: Haren Myneni + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * The primary CPU waits a while for all secondary CPUs to enter. This is to + * avoid sending an IPI if the secondary CPUs are entering + * crash_kexec_secondary on their own (eg via a system reset). + * + * The secondary timeout has to be longer than the primary. Both timeouts are + * in milliseconds. + */ +#define PRIMARY_TIMEOUT 500 +#define SECONDARY_TIMEOUT 1000 + +#define IPI_TIMEOUT 10000 +#define REAL_MODE_TIMEOUT 10000 + +static int time_to_dump; +/* + * crash_wake_offline should be set to 1 by platforms that intend to wake + * up offline cpus prior to jumping to a kdump kernel. Currently powernv + * sets it to 1, since we want to avoid things from happening when an + * offline CPU wakes up due to something like an HMI (malfunction error), + * which propagates to all threads. + */ +int crash_wake_offline; + +#define CRASH_HANDLER_MAX 3 +/* List of shutdown handles */ +static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX]; +static DEFINE_SPINLOCK(crash_handlers_lock); + +static unsigned long crash_shutdown_buf[JMP_BUF_LEN]; +static int crash_shutdown_cpu = -1; + +static int handle_fault(struct pt_regs *regs) +{ + if (crash_shutdown_cpu == smp_processor_id()) + longjmp(crash_shutdown_buf, 1); + return 0; +} + +#ifdef CONFIG_SMP + +static atomic_t cpus_in_crash; +void crash_ipi_callback(struct pt_regs *regs) +{ + static cpumask_t cpus_state_saved = CPU_MASK_NONE; + + int cpu = smp_processor_id(); + + hard_irq_disable(); + if (!cpumask_test_cpu(cpu, &cpus_state_saved)) { + crash_save_cpu(regs, cpu); + cpumask_set_cpu(cpu, &cpus_state_saved); + } + + atomic_inc(&cpus_in_crash); + smp_mb__after_atomic(); + + /* + * Starting the kdump boot. + * This barrier is needed to make sure that all CPUs are stopped. + */ + while (!time_to_dump) + cpu_relax(); + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 1); + +#ifdef CONFIG_PPC64 + kexec_smp_wait(); +#else + for (;;); /* FIXME */ +#endif + + /* NOTREACHED */ +} + +static void crash_kexec_prepare_cpus(int cpu) +{ + unsigned int msecs; + unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */ + int tries = 0; + int (*old_handler)(struct pt_regs *regs); + + printk(KERN_EMERG "Sending IPI to other CPUs\n"); + + if (crash_wake_offline) + ncpus = num_present_cpus() - 1; + + crash_send_ipi(crash_ipi_callback); + smp_wmb(); + +again: + /* + * FIXME: Until we will have the way to stop other CPUs reliably, + * the crash CPU will send an IPI and wait for other CPUs to + * respond. + */ + msecs = IPI_TIMEOUT; + while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0)) + mdelay(1); + + /* Would it be better to replace the trap vector here? */ + + if (atomic_read(&cpus_in_crash) >= ncpus) { + printk(KERN_EMERG "IPI complete\n"); + return; + } + + printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n", + ncpus - atomic_read(&cpus_in_crash)); + + /* + * If we have a panic timeout set then we can't wait indefinitely + * for someone to activate system reset. We also give up on the + * second time through if system reset fail to work. + */ + if ((panic_timeout > 0) || (tries > 0)) + return; + + /* + * A system reset will cause all CPUs to take an 0x100 exception. + * The primary CPU returns here via setjmp, and the secondary + * CPUs reexecute the crash_kexec_secondary path. + */ + old_handler = __debugger; + __debugger = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + + if (setjmp(crash_shutdown_buf) == 0) { + printk(KERN_EMERG "Activate system reset (dumprestart) " + "to stop other cpu(s)\n"); + + /* + * A system reset will force all CPUs to execute the + * crash code again. We need to reset cpus_in_crash so we + * wait for everyone to do this. + */ + atomic_set(&cpus_in_crash, 0); + smp_mb(); + + while (atomic_read(&cpus_in_crash) < ncpus) + cpu_relax(); + } + + crash_shutdown_cpu = -1; + __debugger = old_handler; + + tries++; + goto again; +} + +/* + * This function will be called by secondary cpus. + */ +void crash_kexec_secondary(struct pt_regs *regs) +{ + unsigned long flags; + int msecs = SECONDARY_TIMEOUT; + + local_irq_save(flags); + + /* Wait for the primary crash CPU to signal its progress */ + while (crashing_cpu < 0) { + if (--msecs < 0) { + /* No response, kdump image may not have been loaded */ + local_irq_restore(flags); + return; + } + + mdelay(1); + } + + crash_ipi_callback(regs); +} + +#else /* ! CONFIG_SMP */ + +static void crash_kexec_prepare_cpus(int cpu) +{ + /* + * move the secondaries to us so that we can copy + * the new kernel 0-0x100 safely + * + * do this if kexec in setup.c ? + */ +#ifdef CONFIG_PPC64 + smp_release_cpus(); +#else + /* FIXME */ +#endif +} + +void crash_kexec_secondary(struct pt_regs *regs) +{ +} +#endif /* CONFIG_SMP */ + +/* wait for all the CPUs to hit real mode but timeout if they don't come in */ +#if defined(CONFIG_SMP) && defined(CONFIG_PPC64) +static void __maybe_unused crash_kexec_wait_realmode(int cpu) +{ + unsigned int msecs; + int i; + + msecs = REAL_MODE_TIMEOUT; + for (i=0; i < nr_cpu_ids && msecs > 0; i++) { + if (i == cpu) + continue; + + while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) { + barrier(); + if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0)) + break; + msecs--; + mdelay(1); + } + } + mb(); +} +#else +static inline void crash_kexec_wait_realmode(int cpu) {} +#endif /* CONFIG_SMP && CONFIG_PPC64 */ + +/* + * Register a function to be called on shutdown. Only use this if you + * can't reset your device in the second kernel. + */ +int crash_shutdown_register(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (!crash_shutdown_handles[i]) { + /* Insert handle at first empty entry */ + crash_shutdown_handles[i] = handler; + rc = 0; + break; + } + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handles full, " + "not registered.\n"); + rc = 1; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_register); + +int crash_shutdown_unregister(crash_shutdown_t handler) +{ + unsigned int i, rc; + + spin_lock(&crash_handlers_lock); + for (i = 0 ; i < CRASH_HANDLER_MAX; i++) + if (crash_shutdown_handles[i] == handler) + break; + + if (i == CRASH_HANDLER_MAX) { + printk(KERN_ERR "Crash shutdown handle not found\n"); + rc = 1; + } else { + /* Shift handles down */ + for (; i < (CRASH_HANDLER_MAX - 1); i++) + crash_shutdown_handles[i] = + crash_shutdown_handles[i+1]; + /* + * Reset last entry to NULL now that it has been shifted down, + * this will allow new handles to be added here. + */ + crash_shutdown_handles[i] = NULL; + rc = 0; + } + + spin_unlock(&crash_handlers_lock); + return rc; +} +EXPORT_SYMBOL(crash_shutdown_unregister); + +void default_machine_crash_shutdown(struct pt_regs *regs) +{ + unsigned int i; + int (*old_handler)(struct pt_regs *regs); + + /* + * This function is only called after the system + * has panicked or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means stopping other cpus in + * an SMP system. + * The kernel is broken so disable interrupts. + */ + hard_irq_disable(); + + /* + * Make a note of crashing cpu. Will be used in machine_kexec + * such that another IPI will not be sent. + */ + crashing_cpu = smp_processor_id(); + + /* + * If we came in via system reset, wait a while for the secondary + * CPUs to enter. + */ + if (TRAP(regs) == 0x100) + mdelay(PRIMARY_TIMEOUT); + + crash_kexec_prepare_cpus(crashing_cpu); + + crash_save_cpu(regs, crashing_cpu); + + time_to_dump = 1; + + crash_kexec_wait_realmode(crashing_cpu); + + machine_kexec_mask_interrupts(); + + /* + * Call registered shutdown routines safely. Swap out + * __debugger_fault_handler, and replace on exit. + */ + old_handler = __debugger_fault_handler; + __debugger_fault_handler = handle_fault; + crash_shutdown_cpu = smp_processor_id(); + for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) { + if (setjmp(crash_shutdown_buf) == 0) { + /* + * Insert syncs and delay to ensure + * instructions in the dangerous region don't + * leak away from this protected region. + */ + asm volatile("sync; isync"); + /* dangerous region */ + crash_shutdown_handles[i](); + asm volatile("sync; isync"); + } + } + crash_shutdown_cpu = -1; + __debugger_fault_handler = old_handler; + + if (ppc_md.kexec_cpu_down) + ppc_md.kexec_cpu_down(1, 0); +} diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c new file mode 100644 index 0000000000000..3072fd6dbe94a --- /dev/null +++ b/arch/powerpc/kexec/elf_64.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Load ELF vmlinux file for the kexec_file_load syscall. + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann . + */ + +#define pr_fmt(fmt) "kexec_elf: " fmt + +#include +#include +#include +#include +#include +#include +#include + +static void *elf64_load(struct kimage *image, char *kernel_buf, + unsigned long kernel_len, char *initrd, + unsigned long initrd_len, char *cmdline, + unsigned long cmdline_len) +{ + int ret; + unsigned int fdt_size; + unsigned long kernel_load_addr; + unsigned long initrd_load_addr = 0, fdt_load_addr; + void *fdt; + const void *slave_code; + struct elfhdr ehdr; + struct kexec_elf_info elf_info; + struct kexec_buf kbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size }; + struct kexec_buf pbuf = { .image = image, .buf_min = 0, + .buf_max = ppc64_rma_size, .top_down = true, + .mem = KEXEC_BUF_MEM_UNKNOWN }; + + ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info); + if (ret) + goto out; + + ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr); + if (ret) + goto out; + + pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr); + + ret = kexec_load_purgatory(image, &pbuf); + if (ret) { + pr_err("Loading purgatory failed.\n"); + goto out; + } + + pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem); + + if (initrd != NULL) { + kbuf.buffer = initrd; + kbuf.bufsz = kbuf.memsz = initrd_len; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = false; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + initrd_load_addr = kbuf.mem; + + pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr); + } + + fdt_size = fdt_totalsize(initial_boot_params) * 2; + fdt = kmalloc(fdt_size, GFP_KERNEL); + if (!fdt) { + pr_err("Not enough memory for the device tree.\n"); + ret = -ENOMEM; + goto out; + } + ret = fdt_open_into(initial_boot_params, fdt, fdt_size); + if (ret < 0) { + pr_err("Error setting up the new device tree.\n"); + ret = -EINVAL; + goto out; + } + + ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline); + if (ret) + goto out; + + fdt_pack(fdt); + + kbuf.buffer = fdt; + kbuf.bufsz = kbuf.memsz = fdt_size; + kbuf.buf_align = PAGE_SIZE; + kbuf.top_down = true; + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; + ret = kexec_add_buffer(&kbuf); + if (ret) + goto out; + fdt_load_addr = kbuf.mem; + + pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr); + + slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset; + ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr, + fdt_load_addr); + if (ret) + pr_err("Error setting up the purgatory.\n"); + +out: + kexec_free_elf_info(&elf_info); + + /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */ + return ret ? ERR_PTR(ret) : fdt; +} + +const struct kexec_file_ops kexec_elf64_ops = { + .probe = kexec_elf_probe, + .load = elf64_load, +}; diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c new file mode 100644 index 0000000000000..143c91724617e --- /dev/null +++ b/arch/powerpc/kexec/file_load.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * ppc64 code to implement the kexec_file_load syscall + * + * Copyright (C) 2004 Adam Litke (agl@us.ibm.com) + * Copyright (C) 2004 IBM Corp. + * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation + * Copyright (C) 2005 R Sharada (sharada@in.ibm.com) + * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com) + * Copyright (C) 2016 IBM Corporation + * + * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c. + * Heavily modified for the kernel by + * Thiago Jung Bauermann . + */ + +#include +#include +#include +#include +#include + +#define SLAVE_CODE_SIZE 256 + +const struct kexec_file_ops * const kexec_file_loaders[] = { + &kexec_elf64_ops, + NULL +}; + +int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, + unsigned long buf_len) +{ + /* We don't support crash kernels yet. */ + if (image->type == KEXEC_TYPE_CRASH) + return -EOPNOTSUPP; + + return kexec_image_probe_default(image, buf, buf_len); +} + +/** + * setup_purgatory - initialize the purgatory's global variables + * @image: kexec image. + * @slave_code: Slave code for the purgatory. + * @fdt: Flattened device tree for the next kernel. + * @kernel_load_addr: Address where the kernel is loaded. + * @fdt_load_addr: Address where the flattened device tree is loaded. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_purgatory(struct kimage *image, const void *slave_code, + const void *fdt, unsigned long kernel_load_addr, + unsigned long fdt_load_addr) +{ + unsigned int *slave_code_buf, master_entry; + int ret; + + slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL); + if (!slave_code_buf) + return -ENOMEM; + + /* Get the slave code from the new kernel and put it in purgatory. */ + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + true); + if (ret) { + kfree(slave_code_buf); + return ret; + } + + master_entry = slave_code_buf[0]; + memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE); + slave_code_buf[0] = master_entry; + ret = kexec_purgatory_get_set_symbol(image, "purgatory_start", + slave_code_buf, SLAVE_CODE_SIZE, + false); + kfree(slave_code_buf); + + ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr, + sizeof(kernel_load_addr), false); + if (ret) + return ret; + ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr, + sizeof(fdt_load_addr), false); + if (ret) + return ret; + + return 0; +} + +/** + * delete_fdt_mem_rsv - delete memory reservation with given address and size + * + * Return: 0 on success, or negative errno on error. + */ +int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size) +{ + int i, ret, num_rsvs = fdt_num_mem_rsv(fdt); + + for (i = 0; i < num_rsvs; i++) { + uint64_t rsv_start, rsv_size; + + ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size); + if (ret) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + + if (rsv_start == start && rsv_size == size) { + ret = fdt_del_mem_rsv(fdt, i); + if (ret) { + pr_err("Error deleting device tree reservation.\n"); + return -EINVAL; + } + + return 0; + } + } + + return -ENOENT; +} + +/* + * setup_new_fdt - modify /chosen and memory reservation for the next kernel + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @initrd_load_addr: Address where the next initrd will be loaded. + * @initrd_len: Size of the next initrd, or 0 if there will be none. + * @cmdline: Command line for the next kernel, or NULL if there will + * be none. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_new_fdt(const struct kimage *image, void *fdt, + unsigned long initrd_load_addr, unsigned long initrd_len, + const char *cmdline) +{ + int ret, chosen_node; + const void *prop; + + /* Remove memory reservation for the current device tree. */ + ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params), + fdt_totalsize(initial_boot_params)); + if (ret == 0) + pr_debug("Removed old device tree reservation.\n"); + else if (ret != -ENOENT) + return ret; + + chosen_node = fdt_path_offset(fdt, "/chosen"); + if (chosen_node == -FDT_ERR_NOTFOUND) { + chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), + "chosen"); + if (chosen_node < 0) { + pr_err("Error creating /chosen.\n"); + return -EINVAL; + } + } else if (chosen_node < 0) { + pr_err("Malformed device tree: error reading /chosen.\n"); + return -EINVAL; + } + + /* Did we boot using an initrd? */ + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL); + if (prop) { + uint64_t tmp_start, tmp_end, tmp_size; + + tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop)); + + prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL); + if (!prop) { + pr_err("Malformed device tree.\n"); + return -EINVAL; + } + tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop)); + + /* + * kexec reserves exact initrd size, while firmware may + * reserve a multiple of PAGE_SIZE, so check for both. + */ + tmp_size = tmp_end - tmp_start; + ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size); + if (ret == -ENOENT) + ret = delete_fdt_mem_rsv(fdt, tmp_start, + round_up(tmp_size, PAGE_SIZE)); + if (ret == 0) + pr_debug("Removed old initrd reservation.\n"); + else if (ret != -ENOENT) + return ret; + + /* If there's no new initrd, delete the old initrd's info. */ + if (initrd_len == 0) { + ret = fdt_delprop(fdt, chosen_node, + "linux,initrd-start"); + if (ret) { + pr_err("Error deleting linux,initrd-start.\n"); + return -EINVAL; + } + + ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end"); + if (ret) { + pr_err("Error deleting linux,initrd-end.\n"); + return -EINVAL; + } + } + } + + if (initrd_len) { + ret = fdt_setprop_u64(fdt, chosen_node, + "linux,initrd-start", + initrd_load_addr); + if (ret < 0) + goto err; + + /* initrd-end is the first address after the initrd image. */ + ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end", + initrd_load_addr + initrd_len); + if (ret < 0) + goto err; + + ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len); + if (ret) { + pr_err("Error reserving initrd memory: %s\n", + fdt_strerror(ret)); + return -EINVAL; + } + } + + if (cmdline != NULL) { + ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline); + if (ret < 0) + goto err; + } else { + ret = fdt_delprop(fdt, chosen_node, "bootargs"); + if (ret && ret != -FDT_ERR_NOTFOUND) { + pr_err("Error deleting bootargs.\n"); + return -EINVAL; + } + } + + ret = setup_ima_buffer(image, fdt, chosen_node); + if (ret) { + pr_err("Error setting up the new device tree.\n"); + return ret; + } + + ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0); + if (ret) + goto err; + + return 0; + +err: + pr_err("Error setting up the new device tree.\n"); + return -EINVAL; +} diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c new file mode 100644 index 0000000000000..720e50e490b61 --- /dev/null +++ b/arch/powerpc/kexec/ima.c @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2016 IBM Corporation + * + * Authors: + * Thiago Jung Bauermann + */ + +#include +#include +#include +#include +#include + +static int get_addr_size_cells(int *addr_cells, int *size_cells) +{ + struct device_node *root; + + root = of_find_node_by_path("/"); + if (!root) + return -EINVAL; + + *addr_cells = of_n_addr_cells(root); + *size_cells = of_n_size_cells(root); + + of_node_put(root); + + return 0; +} + +static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr, + size_t *size) +{ + int ret, addr_cells, size_cells; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + if (len < 4 * (addr_cells + size_cells)) + return -ENOENT; + + *addr = of_read_number(prop, addr_cells); + *size = of_read_number(prop + 4 * addr_cells, size_cells); + + return 0; +} + +/** + * ima_get_kexec_buffer - get IMA buffer from the previous kernel + * @addr: On successful return, set to point to the buffer contents. + * @size: On successful return, set to the buffer size. + * + * Return: 0 on success, negative errno on error. + */ +int ima_get_kexec_buffer(void **addr, size_t *size) +{ + int ret, len; + unsigned long tmp_addr; + size_t tmp_size; + const void *prop; + + prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size); + if (ret) + return ret; + + *addr = __va(tmp_addr); + *size = tmp_size; + + return 0; +} + +/** + * ima_free_kexec_buffer - free memory used by the IMA buffer + */ +int ima_free_kexec_buffer(void) +{ + int ret; + unsigned long addr; + size_t size; + struct property *prop; + + prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL); + if (!prop) + return -ENOENT; + + ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size); + if (ret) + return ret; + + ret = of_remove_property(of_chosen, prop); + if (ret) + return ret; + + return memblock_free(addr, size); + +} + +/** + * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt + * + * The IMA measurement buffer is of no use to a subsequent kernel, so we always + * remove it from the device tree. + */ +void remove_ima_buffer(void *fdt, int chosen_node) +{ + int ret, len; + unsigned long addr; + size_t size; + const void *prop; + + prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len); + if (!prop) + return; + + ret = do_get_kexec_buffer(prop, len, &addr, &size); + fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer"); + if (ret) + return; + + ret = delete_fdt_mem_rsv(fdt, addr, size); + if (!ret) + pr_debug("Removed old IMA buffer reservation.\n"); +} + +#ifdef CONFIG_IMA_KEXEC +/** + * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer + * + * Architectures should use this function to pass on the IMA buffer + * information to the next kernel. + * + * Return: 0 on success, negative errno on error. + */ +int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr, + size_t size) +{ + image->arch.ima_buffer_addr = load_addr; + image->arch.ima_buffer_size = size; + + return 0; +} + +static int write_number(void *p, u64 value, int cells) +{ + if (cells == 1) { + u32 tmp; + + if (value > U32_MAX) + return -EINVAL; + + tmp = cpu_to_be32(value); + memcpy(p, &tmp, sizeof(tmp)); + } else if (cells == 2) { + u64 tmp; + + tmp = cpu_to_be64(value); + memcpy(p, &tmp, sizeof(tmp)); + } else + return -EINVAL; + + return 0; +} + +/** + * setup_ima_buffer - add IMA buffer information to the fdt + * @image: kexec image being loaded. + * @fdt: Flattened device tree for the next kernel. + * @chosen_node: Offset to the chosen node. + * + * Return: 0 on success, or negative errno on error. + */ +int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node) +{ + int ret, addr_cells, size_cells, entry_size; + u8 value[16]; + + remove_ima_buffer(fdt, chosen_node); + if (!image->arch.ima_buffer_size) + return 0; + + ret = get_addr_size_cells(&addr_cells, &size_cells); + if (ret) + return ret; + + entry_size = 4 * (addr_cells + size_cells); + + if (entry_size > sizeof(value)) + return -EINVAL; + + ret = write_number(value, image->arch.ima_buffer_addr, addr_cells); + if (ret) + return ret; + + ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size, + size_cells); + if (ret) + return ret; + + ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value, + entry_size); + if (ret < 0) + return -EINVAL; + + ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr, + image->arch.ima_buffer_size); + if (ret) + return -EINVAL; + + pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n", + image->arch.ima_buffer_addr, image->arch.ima_buffer_size); + + return 0; +} +#endif /* CONFIG_IMA_KEXEC */ diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S new file mode 100644 index 0000000000000..61946c19e07ca --- /dev/null +++ b/arch/powerpc/kexec/relocate_32.S @@ -0,0 +1,500 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * This file contains kexec low-level functions. + * + * Copyright (C) 2002-2003 Eric Biederman + * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz + * PPC44x port. Copyright (C) 2011, IBM Corporation + * Author: Suzuki Poulose + */ + +#include +#include +#include +#include +#include + + .text + + /* + * Must be relocatable PIC code callable as a C function. + */ + .globl relocate_new_kernel +relocate_new_kernel: + /* r3 = page_list */ + /* r4 = reboot_code_buffer */ + /* r5 = start_address */ + +#ifdef CONFIG_FSL_BOOKE + + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#define ENTRY_MAPPING_KEXEC_SETUP +#include +#undef ENTRY_MAPPING_KEXEC_SETUP + + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#elif defined(CONFIG_44x) + + /* Save our parameters */ + mr r29, r3 + mr r30, r4 + mr r31, r5 + +#ifdef CONFIG_PPC_47x + /* Check for 47x cores */ + mfspr r3,SPRN_PVR + srwi r3,r3,16 + cmplwi cr0,r3,PVR_476FPE@h + beq setup_map_47x + cmplwi cr0,r3,PVR_476@h + beq setup_map_47x + cmplwi cr0,r3,PVR_476_ISS@h + beq setup_map_47x +#endif /* CONFIG_PPC_47x */ + +/* + * Code for setting up 1:1 mapping for PPC440x for KEXEC + * + * We cannot switch off the MMU on PPC44x. + * So we: + * 1) Invalidate all the mappings except the one we are running from. + * 2) Create a tmp mapping for our code in the other address space(TS) and + * jump to it. Invalidate the entry we started in. + * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS. + * 4) Jump to the 1:1 mapping in original TS. + * 5) Invalidate the tmp mapping. + * + * - Based on the kexec support code for FSL BookE + * + */ + + /* + * Load the PID with kernel PID (0). + * Also load our MSR_IS and TID to MMUCR for TLB search. + */ + li r3, 0 + mtspr SPRN_PID, r3 + mfmsr r4 + andi. r4,r4,MSR_IS@l + beq wmmucr + oris r3,r3,PPC44x_MMUCR_STS@h +wmmucr: + mtspr SPRN_MMUCR,r3 + sync + + /* + * Invalidate all the TLB entries except the current entry + * where we are running from + */ + bl 0f /* Find our address */ +0: mflr r5 /* Make it accessible */ + tlbsx r23,0,r5 /* Find entry we are in */ + li r4,0 /* Start at TLB entry 0 */ + li r3,0 /* Set PAGEID inval value */ +1: cmpw r23,r4 /* Is this our entry? */ + beq skip /* If so, skip the inval */ + tlbwe r3,r4,PPC44x_TLB_PAGEID /* If not, inval the entry */ +skip: + addi r4,r4,1 /* Increment */ + cmpwi r4,64 /* Are we done? */ + bne 1b /* If not, repeat */ + isync + + /* Create a temp mapping and jump to it */ + andi. r6, r23, 1 /* Find the index to use */ + addi r24, r6, 1 /* r24 will contain 1 or 2 */ + + mfmsr r9 /* get the MSR */ + rlwinm r5, r9, 27, 31, 31 /* Extract the MSR[IS] */ + xori r7, r5, 1 /* Use the other address space */ + + /* Read the current mapping entries */ + tlbre r3, r23, PPC44x_TLB_PAGEID + tlbre r4, r23, PPC44x_TLB_XLAT + tlbre r5, r23, PPC44x_TLB_ATTRIB + + /* Save our current XLAT entry */ + mr r25, r4 + + /* Extract the TLB PageSize */ + li r10, 1 /* r10 will hold PageSize */ + rlwinm r11, r3, 0, 24, 27 /* bits 24-27 */ + + /* XXX: As of now we use 256M, 4K pages */ + cmpwi r11, PPC44x_TLB_256M + bne tlb_4k + rotlwi r10, r10, 28 /* r10 = 256M */ + b write_out +tlb_4k: + cmpwi r11, PPC44x_TLB_4K + bne default + rotlwi r10, r10, 12 /* r10 = 4K */ + b write_out +default: + rotlwi r10, r10, 10 /* r10 = 1K */ + +write_out: + /* + * Write out the tmp 1:1 mapping for this code in other address space + * Fixup EPN = RPN , TS=other address space + */ + insrwi r3, r7, 1, 23 /* Bit 23 is TS for PAGEID field */ + + /* Write out the tmp mapping entries */ + tlbwe r3, r24, PPC44x_TLB_PAGEID + tlbwe r4, r24, PPC44x_TLB_XLAT + tlbwe r5, r24, PPC44x_TLB_ATTRIB + + subi r11, r10, 1 /* PageOffset Mask = PageSize - 1 */ + not r10, r11 /* Mask for PageNum */ + + /* Switch to other address space in MSR */ + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + addi r8, r8, (2f-1b) /* Find the target offset */ + + /* Jump to the tmp mapping */ + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi + +2: + /* Invalidate the entry we were executing from */ + li r3, 0 + tlbwe r3, r23, PPC44x_TLB_PAGEID + + /* attribute fields. rwx for SUPERVISOR mode */ + li r5, 0 + ori r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G) + + /* Create 1:1 mapping in 256M pages */ + xori r7, r7, 1 /* Revert back to Original TS */ + + li r8, 0 /* PageNumber */ + li r6, 3 /* TLB Index, start at 3 */ + +next_tlb: + rotlwi r3, r8, 28 /* Create EPN (bits 0-3) */ + mr r4, r3 /* RPN = EPN */ + ori r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */ + insrwi r3, r7, 1, 23 /* Set TS from r7 */ + + tlbwe r3, r6, PPC44x_TLB_PAGEID /* PageID field : EPN, V, SIZE */ + tlbwe r4, r6, PPC44x_TLB_XLAT /* Address translation : RPN */ + tlbwe r5, r6, PPC44x_TLB_ATTRIB /* Attributes */ + + addi r8, r8, 1 /* Increment PN */ + addi r6, r6, 1 /* Increment TLB Index */ + cmpwi r8, 8 /* Are we done ? */ + bne next_tlb + isync + + /* Jump to the new mapping 1:1 */ + li r9,0 + insrwi r9, r7, 1, 26 /* Set MSR[IS] = r7 */ + + bl 1f +1: mflr r8 + and r8, r8, r11 /* Get our offset within page */ + addi r8, r8, (2f-1b) + + and r5, r25, r10 /* Get our target PageNum */ + or r8, r8, r5 /* Target jump address */ + + mtspr SPRN_SRR0, r8 + mtspr SPRN_SRR1, r9 + rfi +2: + /* Invalidate the tmp entry we used */ + li r3, 0 + tlbwe r3, r24, PPC44x_TLB_PAGEID + sync + b ppc44x_map_done + +#ifdef CONFIG_PPC_47x + + /* 1:1 mapping for 47x */ + +setup_map_47x: + + /* + * Load the kernel pid (0) to PID and also to MMUCR[TID]. + * Also set the MSR IS->MMUCR STS + */ + li r3, 0 + mtspr SPRN_PID, r3 /* Set PID */ + mfmsr r4 /* Get MSR */ + andi. r4, r4, MSR_IS@l /* TS=1? */ + beq 1f /* If not, leave STS=0 */ + oris r3, r3, PPC47x_MMUCR_STS@h /* Set STS=1 */ +1: mtspr SPRN_MMUCR, r3 /* Put MMUCR */ + sync + + /* Find the entry we are running from */ + bl 2f +2: mflr r23 + tlbsx r23, 0, r23 + tlbre r24, r23, 0 /* TLB Word 0 */ + tlbre r25, r23, 1 /* TLB Word 1 */ + tlbre r26, r23, 2 /* TLB Word 2 */ + + + /* + * Invalidates all the tlb entries by writing to 256 RPNs(r4) + * of 4k page size in all 4 ways (0-3 in r3). + * This would invalidate the entire UTLB including the one we are + * running from. However the shadow TLB entries would help us + * to continue the execution, until we flush them (rfi/isync). + */ + addis r3, 0, 0x8000 /* specify the way */ + addi r4, 0, 0 /* TLB Word0 = (EPN=0, VALID = 0) */ + addi r5, 0, 0 + b clear_utlb_entry + + /* Align the loop to speed things up. from head_44x.S */ + .align 6 + +clear_utlb_entry: + + tlbwe r4, r3, 0 + tlbwe r5, r3, 1 + tlbwe r5, r3, 2 + addis r3, r3, 0x2000 /* Increment the way */ + cmpwi r3, 0 + bne clear_utlb_entry + addis r3, 0, 0x8000 + addis r4, r4, 0x100 /* Increment the EPN */ + cmpwi r4, 0 + bne clear_utlb_entry + + /* Create the entries in the other address space */ + mfmsr r5 + rlwinm r7, r5, 27, 31, 31 /* Get the TS (Bit 26) from MSR */ + xori r7, r7, 1 /* r7 = !TS */ + + insrwi r24, r7, 1, 21 /* Change the TS in the saved TLB word 0 */ + + /* + * write out the TLB entries for the tmp mapping + * Use way '0' so that we could easily invalidate it later. + */ + lis r3, 0x8000 /* Way '0' */ + + tlbwe r24, r3, 0 + tlbwe r25, r3, 1 + tlbwe r26, r3, 2 + + /* Update the msr to the new TS */ + insrwi r5, r7, 1, 26 + + bl 1f +1: mflr r6 + addi r6, r6, (2f-1b) + + mtspr SPRN_SRR0, r6 + mtspr SPRN_SRR1, r5 + rfi + + /* + * Now we are in the tmp address space. + * Create a 1:1 mapping for 0-2GiB in the original TS. + */ +2: + li r3, 0 + li r4, 0 /* TLB Word 0 */ + li r5, 0 /* TLB Word 1 */ + li r6, 0 + ori r6, r6, PPC47x_TLB2_S_RWX /* TLB word 2 */ + + li r8, 0 /* PageIndex */ + + xori r7, r7, 1 /* revert back to original TS */ + +write_utlb: + rotlwi r5, r8, 28 /* RPN = PageIndex * 256M */ + /* ERPN = 0 as we don't use memory above 2G */ + + mr r4, r5 /* EPN = RPN */ + ori r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M) + insrwi r4, r7, 1, 21 /* Insert the TS to Word 0 */ + + tlbwe r4, r3, 0 /* Write out the entries */ + tlbwe r5, r3, 1 + tlbwe r6, r3, 2 + addi r8, r8, 1 + cmpwi r8, 8 /* Have we completed ? */ + bne write_utlb + + /* make sure we complete the TLB write up */ + isync + + /* + * Prepare to jump to the 1:1 mapping. + * 1) Extract page size of the tmp mapping + * DSIZ = TLB_Word0[22:27] + * 2) Calculate the physical address of the address + * to jump to. + */ + rlwinm r10, r24, 0, 22, 27 + + cmpwi r10, PPC47x_TLB0_4K + bne 0f + li r10, 0x1000 /* r10 = 4k */ + bl 1f + +0: + /* Defaults to 256M */ + lis r10, 0x1000 + + bl 1f +1: mflr r4 + addi r4, r4, (2f-1b) /* virtual address of 2f */ + + subi r11, r10, 1 /* offsetmask = Pagesize - 1 */ + not r10, r11 /* Pagemask = ~(offsetmask) */ + + and r5, r25, r10 /* Physical page */ + and r6, r4, r11 /* offset within the current page */ + + or r5, r5, r6 /* Physical address for 2f */ + + /* Switch the TS in MSR to the original one */ + mfmsr r8 + insrwi r8, r7, 1, 26 + + mtspr SPRN_SRR1, r8 + mtspr SPRN_SRR0, r5 + rfi + +2: + /* Invalidate the tmp mapping */ + lis r3, 0x8000 /* Way '0' */ + + clrrwi r24, r24, 12 /* Clear the valid bit */ + tlbwe r24, r3, 0 + tlbwe r25, r3, 1 + tlbwe r26, r3, 2 + + /* Make sure we complete the TLB write and flush the shadow TLB */ + isync + +#endif + +ppc44x_map_done: + + + /* Restore the parameters */ + mr r3, r29 + mr r4, r30 + mr r5, r31 + + li r0, 0 +#else + li r0, 0 + + /* + * Set Machine Status Register to a known status, + * switch the MMU off and jump to 1: in a single step. + */ + + mr r8, r0 + ori r8, r8, MSR_RI|MSR_ME + mtspr SPRN_SRR1, r8 + addi r8, r4, 1f - relocate_new_kernel + mtspr SPRN_SRR0, r8 + sync + rfi + +1: +#endif + /* from this point address translation is turned off */ + /* and interrupts are disabled */ + + /* set a new stack at the bottom of our page... */ + /* (not really needed now) */ + addi r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */ + stw r0, 0(r1) + + /* Do the copies */ + li r6, 0 /* checksum */ + mr r0, r3 + b 1f + +0: /* top, read another word for the indirection page */ + lwzu r0, 4(r3) + +1: + /* is it a destination page? (r8) */ + rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */ + beq 2f + + rlwinm r8, r0, 0, 0, 19 /* clear kexec flags, page align */ + b 0b + +2: /* is it an indirection page? (r3) */ + rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */ + beq 2f + + rlwinm r3, r0, 0, 0, 19 /* clear kexec flags, page align */ + subi r3, r3, 4 + b 0b + +2: /* are we done? */ + rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */ + beq 2f + b 3f + +2: /* is it a source page? (r9) */ + rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */ + beq 0b + + rlwinm r9, r0, 0, 0, 19 /* clear kexec flags, page align */ + + li r7, PAGE_SIZE / 4 + mtctr r7 + subi r9, r9, 4 + subi r8, r8, 4 +9: + lwzu r0, 4(r9) /* do the copy */ + xor r6, r6, r0 + stwu r0, 4(r8) + dcbst 0, r8 + sync + icbi 0, r8 + bdnz 9b + + addi r9, r9, 4 + addi r8, r8, 4 + b 0b + +3: + + /* To be certain of avoiding problems with self-modifying code + * execute a serializing instruction here. + */ + isync + sync + + mfspr r3, SPRN_PIR /* current core we are running on */ + mr r4, r5 /* load physical address of chunk called */ + + /* jump to the entry point, usually the setup routine */ + mtlr r5 + blrl + +1: b 1b + +relocate_new_kernel_end: + + .globl relocate_new_kernel_size +relocate_new_kernel_size: + .long relocate_new_kernel_end - relocate_new_kernel