powerpc/kexec: Move kexec files into a dedicated subdir.
authorChristophe Leroy <christophe.leroy@c-s.fr>
Tue, 29 Oct 2019 12:13:58 +0000 (12:13 +0000)
committerMichael Ellerman <mpe@ellerman.id.au>
Thu, 21 Nov 2019 04:41:34 +0000 (15:41 +1100)
arch/powerpc/kernel/ contains 8 files dedicated to kexec.

Move them into a dedicated subdirectory.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[mpe: Move to a/p/kexec, drop the 'machine' naming and use 'core' instead]
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/afbef97ec6a978574a5cf91a4441000e0a9da42a.1572351221.git.christophe.leroy@c-s.fr
19 files changed:
arch/powerpc/Kbuild
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/crash.c [deleted file]
arch/powerpc/kernel/ima_kexec.c [deleted file]
arch/powerpc/kernel/kexec_elf_64.c [deleted file]
arch/powerpc/kernel/kexec_relocate_32.S [deleted file]
arch/powerpc/kernel/machine_kexec.c [deleted file]
arch/powerpc/kernel/machine_kexec_32.c [deleted file]
arch/powerpc/kernel/machine_kexec_64.c [deleted file]
arch/powerpc/kernel/machine_kexec_file_64.c [deleted file]
arch/powerpc/kexec/Makefile [new file with mode: 0644]
arch/powerpc/kexec/core.c [new file with mode: 0644]
arch/powerpc/kexec/core_32.c [new file with mode: 0644]
arch/powerpc/kexec/core_64.c [new file with mode: 0644]
arch/powerpc/kexec/crash.c [new file with mode: 0644]
arch/powerpc/kexec/elf_64.c [new file with mode: 0644]
arch/powerpc/kexec/file_load.c [new file with mode: 0644]
arch/powerpc/kexec/ima.c [new file with mode: 0644]
arch/powerpc/kexec/relocate_32.S [new file with mode: 0644]

index 51e6908323ad53189676d28463ced0664825ce25..5e2f9eaa3ee7d573c1371985efc20c2a1cbb3194 100644 (file)
@@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/
 obj-$(CONFIG_KVM)  += kvm/
 
 obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE)  += kexec/
 obj-$(CONFIG_KEXEC_FILE)  += purgatory/
index fadbc1eb25861fd54578452dfdd96c896729fe9f..c1df4e518829807cba7cbf09d3283fd826b70160 100644 (file)
@@ -5,9 +5,6 @@
 
 CFLAGS_ptrace.o                += -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-# Disable clang warning for using setjmp without setjmp.h header
-CFLAGS_crash.o         += $(call cc-disable-warning, builtin-requires-header)
-
 ifdef CONFIG_PPC64
 CFLAGS_prom_init.o     += $(NO_MINIMAL_TOC)
 endif
@@ -82,7 +79,6 @@ obj-$(CONFIG_FA_DUMP)         += fadump.o
 obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
 ifdef CONFIG_PPC32
 obj-$(CONFIG_E500)             += idle_e500.o
-obj-$(CONFIG_KEXEC_CORE)       += kexec_relocate_32.o
 endif
 obj-$(CONFIG_PPC_BOOK3S_32)    += idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
 obj-$(CONFIG_TAU)              += tau_6xx.o
@@ -126,14 +122,6 @@ pci64-$(CONFIG_PPC64)              += pci_dn.o pci-hotplug.o isa-bridge.o
 obj-$(CONFIG_PCI)              += pci_$(BITS).o $(pci64-y) \
                                   pci-common.o pci_of_scan.o
 obj-$(CONFIG_PCI_MSI)          += msi.o
-obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o crash.o \
-                                  machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_FILE)       += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y                          += ima_kexec.o
-endif
-endif
 
 obj-$(CONFIG_AUDIT)            += audit.o
 obj64-$(CONFIG_AUDIT)          += compat_audit.o
@@ -168,12 +156,6 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS)     += secvar-sysfs.o
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-KCOV_INSTRUMENT_machine_kexec_64.o := n
-UBSAN_SANITIZE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
-KCOV_INSTRUMENT_machine_kexec_32.o := n
-UBSAN_SANITIZE_machine_kexec_32.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
deleted file mode 100644 (file)
index d488311..0000000
+++ /dev/null
@@ -1,374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Architecture specific (PPC64) functions for kexec based crash dumps.
- *
- * Copyright (C) 2005, IBM Corp.
- *
- * Created by: Haren Myneni
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/export.h>
-#include <linux/crash_dump.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/machdep.h>
-#include <asm/kexec.h>
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/setjmp.h>
-#include <asm/debug.h>
-
-/*
- * The primary CPU waits a while for all secondary CPUs to enter. This is to
- * avoid sending an IPI if the secondary CPUs are entering
- * crash_kexec_secondary on their own (eg via a system reset).
- *
- * The secondary timeout has to be longer than the primary. Both timeouts are
- * in milliseconds.
- */
-#define PRIMARY_TIMEOUT                500
-#define SECONDARY_TIMEOUT      1000
-
-#define IPI_TIMEOUT            10000
-#define REAL_MODE_TIMEOUT      10000
-
-static int time_to_dump;
-/*
- * crash_wake_offline should be set to 1 by platforms that intend to wake
- * up offline cpus prior to jumping to a kdump kernel. Currently powernv
- * sets it to 1, since we want to avoid things from happening when an
- * offline CPU wakes up due to something like an HMI (malfunction error),
- * which propagates to all threads.
- */
-int crash_wake_offline;
-
-#define CRASH_HANDLER_MAX 3
-/* List of shutdown handles */
-static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
-static DEFINE_SPINLOCK(crash_handlers_lock);
-
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-static int crash_shutdown_cpu = -1;
-
-static int handle_fault(struct pt_regs *regs)
-{
-       if (crash_shutdown_cpu == smp_processor_id())
-               longjmp(crash_shutdown_buf, 1);
-       return 0;
-}
-
-#ifdef CONFIG_SMP
-
-static atomic_t cpus_in_crash;
-void crash_ipi_callback(struct pt_regs *regs)
-{
-       static cpumask_t cpus_state_saved = CPU_MASK_NONE;
-
-       int cpu = smp_processor_id();
-
-       hard_irq_disable();
-       if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
-               crash_save_cpu(regs, cpu);
-               cpumask_set_cpu(cpu, &cpus_state_saved);
-       }
-
-       atomic_inc(&cpus_in_crash);
-       smp_mb__after_atomic();
-
-       /*
-        * Starting the kdump boot.
-        * This barrier is needed to make sure that all CPUs are stopped.
-        */
-       while (!time_to_dump)
-               cpu_relax();
-
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(1, 1);
-
-#ifdef CONFIG_PPC64
-       kexec_smp_wait();
-#else
-       for (;;);       /* FIXME */
-#endif
-
-       /* NOTREACHED */
-}
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
-       unsigned int msecs;
-       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-       int tries = 0;
-       int (*old_handler)(struct pt_regs *regs);
-
-       printk(KERN_EMERG "Sending IPI to other CPUs\n");
-
-       if (crash_wake_offline)
-               ncpus = num_present_cpus() - 1;
-
-       crash_send_ipi(crash_ipi_callback);
-       smp_wmb();
-
-again:
-       /*
-        * FIXME: Until we will have the way to stop other CPUs reliably,
-        * the crash CPU will send an IPI and wait for other CPUs to
-        * respond.
-        */
-       msecs = IPI_TIMEOUT;
-       while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
-               mdelay(1);
-
-       /* Would it be better to replace the trap vector here? */
-
-       if (atomic_read(&cpus_in_crash) >= ncpus) {
-               printk(KERN_EMERG "IPI complete\n");
-               return;
-       }
-
-       printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
-               ncpus - atomic_read(&cpus_in_crash));
-
-       /*
-        * If we have a panic timeout set then we can't wait indefinitely
-        * for someone to activate system reset. We also give up on the
-        * second time through if system reset fail to work.
-        */
-       if ((panic_timeout > 0) || (tries > 0))
-               return;
-
-       /*
-        * A system reset will cause all CPUs to take an 0x100 exception.
-        * The primary CPU returns here via setjmp, and the secondary
-        * CPUs reexecute the crash_kexec_secondary path.
-        */
-       old_handler = __debugger;
-       __debugger = handle_fault;
-       crash_shutdown_cpu = smp_processor_id();
-
-       if (setjmp(crash_shutdown_buf) == 0) {
-               printk(KERN_EMERG "Activate system reset (dumprestart) "
-                                 "to stop other cpu(s)\n");
-
-               /*
-                * A system reset will force all CPUs to execute the
-                * crash code again. We need to reset cpus_in_crash so we
-                * wait for everyone to do this.
-                */
-               atomic_set(&cpus_in_crash, 0);
-               smp_mb();
-
-               while (atomic_read(&cpus_in_crash) < ncpus)
-                       cpu_relax();
-       }
-
-       crash_shutdown_cpu = -1;
-       __debugger = old_handler;
-
-       tries++;
-       goto again;
-}
-
-/*
- * This function will be called by secondary cpus.
- */
-void crash_kexec_secondary(struct pt_regs *regs)
-{
-       unsigned long flags;
-       int msecs = SECONDARY_TIMEOUT;
-
-       local_irq_save(flags);
-
-       /* Wait for the primary crash CPU to signal its progress */
-       while (crashing_cpu < 0) {
-               if (--msecs < 0) {
-                       /* No response, kdump image may not have been loaded */
-                       local_irq_restore(flags);
-                       return;
-               }
-
-               mdelay(1);
-       }
-
-       crash_ipi_callback(regs);
-}
-
-#else  /* ! CONFIG_SMP */
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
-       /*
-        * move the secondaries to us so that we can copy
-        * the new kernel 0-0x100 safely
-        *
-        * do this if kexec in setup.c ?
-        */
-#ifdef CONFIG_PPC64
-       smp_release_cpus();
-#else
-       /* FIXME */
-#endif
-}
-
-void crash_kexec_secondary(struct pt_regs *regs)
-{
-}
-#endif /* CONFIG_SMP */
-
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
-static void __maybe_unused crash_kexec_wait_realmode(int cpu)
-{
-       unsigned int msecs;
-       int i;
-
-       msecs = REAL_MODE_TIMEOUT;
-       for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
-               if (i == cpu)
-                       continue;
-
-               while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
-                       barrier();
-                       if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
-                               break;
-                       msecs--;
-                       mdelay(1);
-               }
-       }
-       mb();
-}
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif /* CONFIG_SMP && CONFIG_PPC64 */
-
-/*
- * Register a function to be called on shutdown.  Only use this if you
- * can't reset your device in the second kernel.
- */
-int crash_shutdown_register(crash_shutdown_t handler)
-{
-       unsigned int i, rc;
-
-       spin_lock(&crash_handlers_lock);
-       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
-               if (!crash_shutdown_handles[i]) {
-                       /* Insert handle at first empty entry */
-                       crash_shutdown_handles[i] = handler;
-                       rc = 0;
-                       break;
-               }
-
-       if (i == CRASH_HANDLER_MAX) {
-               printk(KERN_ERR "Crash shutdown handles full, "
-                      "not registered.\n");
-               rc = 1;
-       }
-
-       spin_unlock(&crash_handlers_lock);
-       return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_register);
-
-int crash_shutdown_unregister(crash_shutdown_t handler)
-{
-       unsigned int i, rc;
-
-       spin_lock(&crash_handlers_lock);
-       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
-               if (crash_shutdown_handles[i] == handler)
-                       break;
-
-       if (i == CRASH_HANDLER_MAX) {
-               printk(KERN_ERR "Crash shutdown handle not found\n");
-               rc = 1;
-       } else {
-               /* Shift handles down */
-               for (; i < (CRASH_HANDLER_MAX - 1); i++)
-                       crash_shutdown_handles[i] =
-                               crash_shutdown_handles[i+1];
-               /*
-                * Reset last entry to NULL now that it has been shifted down,
-                * this will allow new handles to be added here.
-                */
-               crash_shutdown_handles[i] = NULL;
-               rc = 0;
-       }
-
-       spin_unlock(&crash_handlers_lock);
-       return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_unregister);
-
-void default_machine_crash_shutdown(struct pt_regs *regs)
-{
-       unsigned int i;
-       int (*old_handler)(struct pt_regs *regs);
-
-       /*
-        * This function is only called after the system
-        * has panicked or is otherwise in a critical state.
-        * The minimum amount of code to allow a kexec'd kernel
-        * to run successfully needs to happen here.
-        *
-        * In practice this means stopping other cpus in
-        * an SMP system.
-        * The kernel is broken so disable interrupts.
-        */
-       hard_irq_disable();
-
-       /*
-        * Make a note of crashing cpu. Will be used in machine_kexec
-        * such that another IPI will not be sent.
-        */
-       crashing_cpu = smp_processor_id();
-
-       /*
-        * If we came in via system reset, wait a while for the secondary
-        * CPUs to enter.
-        */
-       if (TRAP(regs) == 0x100)
-               mdelay(PRIMARY_TIMEOUT);
-
-       crash_kexec_prepare_cpus(crashing_cpu);
-
-       crash_save_cpu(regs, crashing_cpu);
-
-       time_to_dump = 1;
-
-       crash_kexec_wait_realmode(crashing_cpu);
-
-       machine_kexec_mask_interrupts();
-
-       /*
-        * Call registered shutdown routines safely.  Swap out
-        * __debugger_fault_handler, and replace on exit.
-        */
-       old_handler = __debugger_fault_handler;
-       __debugger_fault_handler = handle_fault;
-       crash_shutdown_cpu = smp_processor_id();
-       for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
-               if (setjmp(crash_shutdown_buf) == 0) {
-                       /*
-                        * Insert syncs and delay to ensure
-                        * instructions in the dangerous region don't
-                        * leak away from this protected region.
-                        */
-                       asm volatile("sync; isync");
-                       /* dangerous region */
-                       crash_shutdown_handles[i]();
-                       asm volatile("sync; isync");
-               }
-       }
-       crash_shutdown_cpu = -1;
-       __debugger_fault_handler = old_handler;
-
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(1, 0);
-}
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
deleted file mode 100644 (file)
index 720e50e..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2016 IBM Corporation
- *
- * Authors:
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of.h>
-#include <linux/memblock.h>
-#include <linux/libfdt.h>
-
-static int get_addr_size_cells(int *addr_cells, int *size_cells)
-{
-       struct device_node *root;
-
-       root = of_find_node_by_path("/");
-       if (!root)
-               return -EINVAL;
-
-       *addr_cells = of_n_addr_cells(root);
-       *size_cells = of_n_size_cells(root);
-
-       of_node_put(root);
-
-       return 0;
-}
-
-static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
-                              size_t *size)
-{
-       int ret, addr_cells, size_cells;
-
-       ret = get_addr_size_cells(&addr_cells, &size_cells);
-       if (ret)
-               return ret;
-
-       if (len < 4 * (addr_cells + size_cells))
-               return -ENOENT;
-
-       *addr = of_read_number(prop, addr_cells);
-       *size = of_read_number(prop + 4 * addr_cells, size_cells);
-
-       return 0;
-}
-
-/**
- * ima_get_kexec_buffer - get IMA buffer from the previous kernel
- * @addr:      On successful return, set to point to the buffer contents.
- * @size:      On successful return, set to the buffer size.
- *
- * Return: 0 on success, negative errno on error.
- */
-int ima_get_kexec_buffer(void **addr, size_t *size)
-{
-       int ret, len;
-       unsigned long tmp_addr;
-       size_t tmp_size;
-       const void *prop;
-
-       prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
-       if (!prop)
-               return -ENOENT;
-
-       ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
-       if (ret)
-               return ret;
-
-       *addr = __va(tmp_addr);
-       *size = tmp_size;
-
-       return 0;
-}
-
-/**
- * ima_free_kexec_buffer - free memory used by the IMA buffer
- */
-int ima_free_kexec_buffer(void)
-{
-       int ret;
-       unsigned long addr;
-       size_t size;
-       struct property *prop;
-
-       prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
-       if (!prop)
-               return -ENOENT;
-
-       ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
-       if (ret)
-               return ret;
-
-       ret = of_remove_property(of_chosen, prop);
-       if (ret)
-               return ret;
-
-       return memblock_free(addr, size);
-
-}
-
-/**
- * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
- *
- * The IMA measurement buffer is of no use to a subsequent kernel, so we always
- * remove it from the device tree.
- */
-void remove_ima_buffer(void *fdt, int chosen_node)
-{
-       int ret, len;
-       unsigned long addr;
-       size_t size;
-       const void *prop;
-
-       prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
-       if (!prop)
-               return;
-
-       ret = do_get_kexec_buffer(prop, len, &addr, &size);
-       fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
-       if (ret)
-               return;
-
-       ret = delete_fdt_mem_rsv(fdt, addr, size);
-       if (!ret)
-               pr_debug("Removed old IMA buffer reservation.\n");
-}
-
-#ifdef CONFIG_IMA_KEXEC
-/**
- * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
- *
- * Architectures should use this function to pass on the IMA buffer
- * information to the next kernel.
- *
- * Return: 0 on success, negative errno on error.
- */
-int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
-                             size_t size)
-{
-       image->arch.ima_buffer_addr = load_addr;
-       image->arch.ima_buffer_size = size;
-
-       return 0;
-}
-
-static int write_number(void *p, u64 value, int cells)
-{
-       if (cells == 1) {
-               u32 tmp;
-
-               if (value > U32_MAX)
-                       return -EINVAL;
-
-               tmp = cpu_to_be32(value);
-               memcpy(p, &tmp, sizeof(tmp));
-       } else if (cells == 2) {
-               u64 tmp;
-
-               tmp = cpu_to_be64(value);
-               memcpy(p, &tmp, sizeof(tmp));
-       } else
-               return -EINVAL;
-
-       return 0;
-}
-
-/**
- * setup_ima_buffer - add IMA buffer information to the fdt
- * @image:             kexec image being loaded.
- * @fdt:               Flattened device tree for the next kernel.
- * @chosen_node:       Offset to the chosen node.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
-{
-       int ret, addr_cells, size_cells, entry_size;
-       u8 value[16];
-
-       remove_ima_buffer(fdt, chosen_node);
-       if (!image->arch.ima_buffer_size)
-               return 0;
-
-       ret = get_addr_size_cells(&addr_cells, &size_cells);
-       if (ret)
-               return ret;
-
-       entry_size = 4 * (addr_cells + size_cells);
-
-       if (entry_size > sizeof(value))
-               return -EINVAL;
-
-       ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
-       if (ret)
-               return ret;
-
-       ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
-                          size_cells);
-       if (ret)
-               return ret;
-
-       ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
-                         entry_size);
-       if (ret < 0)
-               return -EINVAL;
-
-       ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
-                             image->arch.ima_buffer_size);
-       if (ret)
-               return -EINVAL;
-
-       pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
-                image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
-
-       return 0;
-}
-#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
deleted file mode 100644 (file)
index 3072fd6..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004  IBM Corp.
- * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016  IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#define pr_fmt(fmt)    "kexec_elf: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/module.h>
-#include <linux/of_fdt.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-static void *elf64_load(struct kimage *image, char *kernel_buf,
-                       unsigned long kernel_len, char *initrd,
-                       unsigned long initrd_len, char *cmdline,
-                       unsigned long cmdline_len)
-{
-       int ret;
-       unsigned int fdt_size;
-       unsigned long kernel_load_addr;
-       unsigned long initrd_load_addr = 0, fdt_load_addr;
-       void *fdt;
-       const void *slave_code;
-       struct elfhdr ehdr;
-       struct kexec_elf_info elf_info;
-       struct kexec_buf kbuf = { .image = image, .buf_min = 0,
-                                 .buf_max = ppc64_rma_size };
-       struct kexec_buf pbuf = { .image = image, .buf_min = 0,
-                                 .buf_max = ppc64_rma_size, .top_down = true,
-                                 .mem = KEXEC_BUF_MEM_UNKNOWN };
-
-       ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
-       if (ret)
-               goto out;
-
-       ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
-       if (ret)
-               goto out;
-
-       pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
-
-       ret = kexec_load_purgatory(image, &pbuf);
-       if (ret) {
-               pr_err("Loading purgatory failed.\n");
-               goto out;
-       }
-
-       pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
-
-       if (initrd != NULL) {
-               kbuf.buffer = initrd;
-               kbuf.bufsz = kbuf.memsz = initrd_len;
-               kbuf.buf_align = PAGE_SIZE;
-               kbuf.top_down = false;
-               kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
-               ret = kexec_add_buffer(&kbuf);
-               if (ret)
-                       goto out;
-               initrd_load_addr = kbuf.mem;
-
-               pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
-       }
-
-       fdt_size = fdt_totalsize(initial_boot_params) * 2;
-       fdt = kmalloc(fdt_size, GFP_KERNEL);
-       if (!fdt) {
-               pr_err("Not enough memory for the device tree.\n");
-               ret = -ENOMEM;
-               goto out;
-       }
-       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
-       if (ret < 0) {
-               pr_err("Error setting up the new device tree.\n");
-               ret = -EINVAL;
-               goto out;
-       }
-
-       ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
-       if (ret)
-               goto out;
-
-       fdt_pack(fdt);
-
-       kbuf.buffer = fdt;
-       kbuf.bufsz = kbuf.memsz = fdt_size;
-       kbuf.buf_align = PAGE_SIZE;
-       kbuf.top_down = true;
-       kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
-       ret = kexec_add_buffer(&kbuf);
-       if (ret)
-               goto out;
-       fdt_load_addr = kbuf.mem;
-
-       pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
-
-       slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-       ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
-                             fdt_load_addr);
-       if (ret)
-               pr_err("Error setting up the purgatory.\n");
-
-out:
-       kexec_free_elf_info(&elf_info);
-
-       /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
-       return ret ? ERR_PTR(ret) : fdt;
-}
-
-const struct kexec_file_ops kexec_elf64_ops = {
-       .probe = kexec_elf_probe,
-       .load = elf64_load,
-};
diff --git a/arch/powerpc/kernel/kexec_relocate_32.S b/arch/powerpc/kernel/kexec_relocate_32.S
deleted file mode 100644 (file)
index 8a8b488..0000000
+++ /dev/null
@@ -1,500 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * This file contains kexec low-level functions.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011,  IBM Corporation
- *             Author: Suzuki Poulose <suzuki@in.ibm.com>
- */
-
-#include <asm/reg.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/ppc_asm.h>
-#include <asm/kexec.h>
-
-       .text
-
-       /*
-        * Must be relocatable PIC code callable as a C function.
-        */
-       .globl relocate_new_kernel
-relocate_new_kernel:
-       /* r3 = page_list   */
-       /* r4 = reboot_code_buffer */
-       /* r5 = start_address      */
-
-#ifdef CONFIG_FSL_BOOKE
-
-       mr      r29, r3
-       mr      r30, r4
-       mr      r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
-       mr      r3, r29
-       mr      r4, r30
-       mr      r5, r31
-
-       li      r0, 0
-#elif defined(CONFIG_44x)
-
-       /* Save our parameters */
-       mr      r29, r3
-       mr      r30, r4
-       mr      r31, r5
-
-#ifdef CONFIG_PPC_47x
-       /* Check for 47x cores */
-       mfspr   r3,SPRN_PVR
-       srwi    r3,r3,16
-       cmplwi  cr0,r3,PVR_476FPE@h
-       beq     setup_map_47x
-       cmplwi  cr0,r3,PVR_476@h
-       beq     setup_map_47x
-       cmplwi  cr0,r3,PVR_476_ISS@h
-       beq     setup_map_47x
-#endif /* CONFIG_PPC_47x */
-
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- *    jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
-       /*
-        * Load the PID with kernel PID (0).
-        * Also load our MSR_IS and TID to MMUCR for TLB search.
-        */
-       li      r3, 0
-       mtspr   SPRN_PID, r3
-       mfmsr   r4
-       andi.   r4,r4,MSR_IS@l
-       beq     wmmucr
-       oris    r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
-       mtspr   SPRN_MMUCR,r3
-       sync
-
-       /*
-        * Invalidate all the TLB entries except the current entry
-        * where we are running from
-        */
-       bl      0f                              /* Find our address */
-0:     mflr    r5                              /* Make it accessible */
-       tlbsx   r23,0,r5                        /* Find entry we are in */
-       li      r4,0                            /* Start at TLB entry 0 */
-       li      r3,0                            /* Set PAGEID inval value */
-1:     cmpw    r23,r4                          /* Is this our entry? */
-       beq     skip                            /* If so, skip the inval */
-       tlbwe   r3,r4,PPC44x_TLB_PAGEID         /* If not, inval the entry */
-skip:
-       addi    r4,r4,1                         /* Increment */
-       cmpwi   r4,64                           /* Are we done? */
-       bne     1b                              /* If not, repeat */
-       isync
-
-       /* Create a temp mapping and jump to it */
-       andi.   r6, r23, 1              /* Find the index to use */
-       addi    r24, r6, 1              /* r24 will contain 1 or 2 */
-
-       mfmsr   r9                      /* get the MSR */
-       rlwinm  r5, r9, 27, 31, 31      /* Extract the MSR[IS] */
-       xori    r7, r5, 1               /* Use the other address space */
-
-       /* Read the current mapping entries */
-       tlbre   r3, r23, PPC44x_TLB_PAGEID
-       tlbre   r4, r23, PPC44x_TLB_XLAT
-       tlbre   r5, r23, PPC44x_TLB_ATTRIB
-
-       /* Save our current XLAT entry */
-       mr      r25, r4
-
-       /* Extract the TLB PageSize */
-       li      r10, 1                  /* r10 will hold PageSize */
-       rlwinm  r11, r3, 0, 24, 27      /* bits 24-27 */
-
-       /* XXX: As of now we use 256M, 4K pages */
-       cmpwi   r11, PPC44x_TLB_256M
-       bne     tlb_4k
-       rotlwi  r10, r10, 28            /* r10 = 256M */
-       b       write_out
-tlb_4k:
-       cmpwi   r11, PPC44x_TLB_4K
-       bne     default
-       rotlwi  r10, r10, 12            /* r10 = 4K */
-       b       write_out
-default:
-       rotlwi  r10, r10, 10            /* r10 = 1K */
-
-write_out:
-       /*
-        * Write out the tmp 1:1 mapping for this code in other address space
-        * Fixup  EPN = RPN , TS=other address space
-        */
-       insrwi  r3, r7, 1, 23           /* Bit 23 is TS for PAGEID field */
-
-       /* Write out the tmp mapping entries */
-       tlbwe   r3, r24, PPC44x_TLB_PAGEID
-       tlbwe   r4, r24, PPC44x_TLB_XLAT
-       tlbwe   r5, r24, PPC44x_TLB_ATTRIB
-
-       subi    r11, r10, 1             /* PageOffset Mask = PageSize - 1 */
-       not     r10, r11                /* Mask for PageNum */
-
-       /* Switch to other address space in MSR */
-       insrwi  r9, r7, 1, 26           /* Set MSR[IS] = r7 */
-
-       bl      1f
-1:     mflr    r8
-       addi    r8, r8, (2f-1b)         /* Find the target offset */
-
-       /* Jump to the tmp mapping */
-       mtspr   SPRN_SRR0, r8
-       mtspr   SPRN_SRR1, r9
-       rfi
-
-2:
-       /* Invalidate the entry we were executing from */
-       li      r3, 0
-       tlbwe   r3, r23, PPC44x_TLB_PAGEID
-
-       /* attribute fields. rwx for SUPERVISOR mode */
-       li      r5, 0
-       ori     r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-       /* Create 1:1 mapping in 256M pages */
-       xori    r7, r7, 1                       /* Revert back to Original TS */
-
-       li      r8, 0                           /* PageNumber */
-       li      r6, 3                           /* TLB Index, start at 3  */
-
-next_tlb:
-       rotlwi  r3, r8, 28                      /* Create EPN (bits 0-3) */
-       mr      r4, r3                          /* RPN = EPN  */
-       ori     r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
-       insrwi  r3, r7, 1, 23                   /* Set TS from r7 */
-
-       tlbwe   r3, r6, PPC44x_TLB_PAGEID       /* PageID field : EPN, V, SIZE */
-       tlbwe   r4, r6, PPC44x_TLB_XLAT         /* Address translation : RPN   */
-       tlbwe   r5, r6, PPC44x_TLB_ATTRIB       /* Attributes */
-
-       addi    r8, r8, 1                       /* Increment PN */
-       addi    r6, r6, 1                       /* Increment TLB Index */
-       cmpwi   r8, 8                           /* Are we done ? */
-       bne     next_tlb
-       isync
-
-       /* Jump to the new mapping 1:1 */
-       li      r9,0
-       insrwi  r9, r7, 1, 26                   /* Set MSR[IS] = r7 */
-
-       bl      1f
-1:     mflr    r8
-       and     r8, r8, r11                     /* Get our offset within page */
-       addi    r8, r8, (2f-1b)
-
-       and     r5, r25, r10                    /* Get our target PageNum */
-       or      r8, r8, r5                      /* Target jump address */
-
-       mtspr   SPRN_SRR0, r8
-       mtspr   SPRN_SRR1, r9
-       rfi
-2:
-       /* Invalidate the tmp entry we used */
-       li      r3, 0
-       tlbwe   r3, r24, PPC44x_TLB_PAGEID
-       sync
-       b       ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
-       /* 1:1 mapping for 47x */
-
-setup_map_47x:
-
-       /*
-        * Load the kernel pid (0) to PID and also to MMUCR[TID].
-        * Also set the MSR IS->MMUCR STS
-        */
-       li      r3, 0
-       mtspr   SPRN_PID, r3                    /* Set PID */
-       mfmsr   r4                              /* Get MSR */
-       andi.   r4, r4, MSR_IS@l                /* TS=1? */
-       beq     1f                              /* If not, leave STS=0 */
-       oris    r3, r3, PPC47x_MMUCR_STS@h      /* Set STS=1 */
-1:     mtspr   SPRN_MMUCR, r3                  /* Put MMUCR */
-       sync
-
-       /* Find the entry we are running from */
-       bl      2f
-2:     mflr    r23
-       tlbsx   r23, 0, r23
-       tlbre   r24, r23, 0                     /* TLB Word 0 */
-       tlbre   r25, r23, 1                     /* TLB Word 1 */
-       tlbre   r26, r23, 2                     /* TLB Word 2 */
-
-
-       /*
-        * Invalidates all the tlb entries by writing to 256 RPNs(r4)
-        * of 4k page size in all  4 ways (0-3 in r3).
-        * This would invalidate the entire UTLB including the one we are
-        * running from. However the shadow TLB entries would help us
-        * to continue the execution, until we flush them (rfi/isync).
-        */
-       addis   r3, 0, 0x8000                   /* specify the way */
-       addi    r4, 0, 0                        /* TLB Word0 = (EPN=0, VALID = 0) */
-       addi    r5, 0, 0
-       b       clear_utlb_entry
-
-       /* Align the loop to speed things up. from head_44x.S */
-       .align  6
-
-clear_utlb_entry:
-
-       tlbwe   r4, r3, 0
-       tlbwe   r5, r3, 1
-       tlbwe   r5, r3, 2
-       addis   r3, r3, 0x2000                  /* Increment the way */
-       cmpwi   r3, 0
-       bne     clear_utlb_entry
-       addis   r3, 0, 0x8000
-       addis   r4, r4, 0x100                   /* Increment the EPN */
-       cmpwi   r4, 0
-       bne     clear_utlb_entry
-
-       /* Create the entries in the other address space */
-       mfmsr   r5
-       rlwinm  r7, r5, 27, 31, 31              /* Get the TS (Bit 26) from MSR */
-       xori    r7, r7, 1                       /* r7 = !TS */
-
-       insrwi  r24, r7, 1, 21                  /* Change the TS in the saved TLB word 0 */
-
-       /*
-        * write out the TLB entries for the tmp mapping
-        * Use way '0' so that we could easily invalidate it later.
-        */
-       lis     r3, 0x8000                      /* Way '0' */
-
-       tlbwe   r24, r3, 0
-       tlbwe   r25, r3, 1
-       tlbwe   r26, r3, 2
-
-       /* Update the msr to the new TS */
-       insrwi  r5, r7, 1, 26
-
-       bl      1f
-1:     mflr    r6
-       addi    r6, r6, (2f-1b)
-
-       mtspr   SPRN_SRR0, r6
-       mtspr   SPRN_SRR1, r5
-       rfi
-
-       /*
-        * Now we are in the tmp address space.
-        * Create a 1:1 mapping for 0-2GiB in the original TS.
-        */
-2:
-       li      r3, 0
-       li      r4, 0                           /* TLB Word 0 */
-       li      r5, 0                           /* TLB Word 1 */
-       li      r6, 0
-       ori     r6, r6, PPC47x_TLB2_S_RWX       /* TLB word 2 */
-
-       li      r8, 0                           /* PageIndex */
-
-       xori    r7, r7, 1                       /* revert back to original TS */
-
-write_utlb:
-       rotlwi  r5, r8, 28                      /* RPN = PageIndex * 256M */
-                                               /* ERPN = 0 as we don't use memory above 2G */
-
-       mr      r4, r5                          /* EPN = RPN */
-       ori     r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
-       insrwi  r4, r7, 1, 21                   /* Insert the TS to Word 0 */
-
-       tlbwe   r4, r3, 0                       /* Write out the entries */
-       tlbwe   r5, r3, 1
-       tlbwe   r6, r3, 2
-       addi    r8, r8, 1
-       cmpwi   r8, 8                           /* Have we completed ? */
-       bne     write_utlb
-
-       /* make sure we complete the TLB write up */
-       isync
-
-       /*
-        * Prepare to jump to the 1:1 mapping.
-        * 1) Extract page size of the tmp mapping
-        *    DSIZ = TLB_Word0[22:27]
-        * 2) Calculate the physical address of the address
-        *    to jump to.
-        */
-       rlwinm  r10, r24, 0, 22, 27
-
-       cmpwi   r10, PPC47x_TLB0_4K
-       bne     0f
-       li      r10, 0x1000                     /* r10 = 4k */
-       bl      1f
-
-0:
-       /* Defaults to 256M */
-       lis     r10, 0x1000
-
-       bl      1f
-1:     mflr    r4
-       addi    r4, r4, (2f-1b)                 /* virtual address  of 2f */
-
-       subi    r11, r10, 1                     /* offsetmask = Pagesize - 1 */
-       not     r10, r11                        /* Pagemask = ~(offsetmask) */
-
-       and     r5, r25, r10                    /* Physical page */
-       and     r6, r4, r11                     /* offset within the current page */
-
-       or      r5, r5, r6                      /* Physical address for 2f */
-
-       /* Switch the TS in MSR to the original one */
-       mfmsr   r8
-       insrwi  r8, r7, 1, 26
-
-       mtspr   SPRN_SRR1, r8
-       mtspr   SPRN_SRR0, r5
-       rfi
-
-2:
-       /* Invalidate the tmp mapping */
-       lis     r3, 0x8000                      /* Way '0' */
-
-       clrrwi  r24, r24, 12                    /* Clear the valid bit */
-       tlbwe   r24, r3, 0
-       tlbwe   r25, r3, 1
-       tlbwe   r26, r3, 2
-
-       /* Make sure we complete the TLB write and flush the shadow TLB */
-       isync
-
-#endif
-
-ppc44x_map_done:
-
-
-       /* Restore the parameters */
-       mr      r3, r29
-       mr      r4, r30
-       mr      r5, r31
-
-       li      r0, 0
-#else
-       li      r0, 0
-
-       /*
-        * Set Machine Status Register to a known status,
-        * switch the MMU off and jump to 1: in a single step.
-        */
-
-       mr      r8, r0
-       ori     r8, r8, MSR_RI|MSR_ME
-       mtspr   SPRN_SRR1, r8
-       addi    r8, r4, 1f - relocate_new_kernel
-       mtspr   SPRN_SRR0, r8
-       sync
-       rfi
-
-1:
-#endif
-       /* from this point address translation is turned off */
-       /* and interrupts are disabled */
-
-       /* set a new stack at the bottom of our page... */
-       /* (not really needed now) */
-       addi    r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
-       stw     r0, 0(r1)
-
-       /* Do the copies */
-       li      r6, 0 /* checksum */
-       mr      r0, r3
-       b       1f
-
-0:     /* top, read another word for the indirection page */
-       lwzu    r0, 4(r3)
-
-1:
-       /* is it a destination page? (r8) */
-       rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
-       beq     2f
-
-       rlwinm  r8, r0, 0, 0, 19 /* clear kexec flags, page align */
-       b       0b
-
-2:     /* is it an indirection page? (r3) */
-       rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
-       beq     2f
-
-       rlwinm  r3, r0, 0, 0, 19 /* clear kexec flags, page align */
-       subi    r3, r3, 4
-       b       0b
-
-2:     /* are we done? */
-       rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
-       beq     2f
-       b       3f
-
-2:     /* is it a source page? (r9) */
-       rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
-       beq     0b
-
-       rlwinm  r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
-       li      r7, PAGE_SIZE / 4
-       mtctr   r7
-       subi    r9, r9, 4
-       subi    r8, r8, 4
-9:
-       lwzu    r0, 4(r9)  /* do the copy */
-       xor     r6, r6, r0
-       stwu    r0, 4(r8)
-       dcbst   0, r8
-       sync
-       icbi    0, r8
-       bdnz    9b
-
-       addi    r9, r9, 4
-       addi    r8, r8, 4
-       b       0b
-
-3:
-
-       /* To be certain of avoiding problems with self-modifying code
-        * execute a serializing instruction here.
-        */
-       isync
-       sync
-
-       mfspr   r3, SPRN_PIR /* current core we are running on */
-       mr      r4, r5 /* load physical address of chunk called */
-
-       /* jump to the entry point, usually the setup routine */
-       mtlr    r5
-       blrl
-
-1:     b       1b
-
-relocate_new_kernel_end:
-
-       .globl relocate_new_kernel_size
-relocate_new_kernel_size:
-       .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
deleted file mode 100644 (file)
index 078fe3d..0000000
+++ /dev/null
@@ -1,280 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Code to handle transition of Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-#include <linux/threads.h>
-#include <linux/memblock.h>
-#include <linux/of.h>
-#include <linux/irq.h>
-#include <linux/ftrace.h>
-
-#include <asm/kdump.h>
-#include <asm/machdep.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/sections.h>
-
-void machine_kexec_mask_interrupts(void) {
-       unsigned int i;
-       struct irq_desc *desc;
-
-       for_each_irq_desc(i, desc) {
-               struct irq_chip *chip;
-
-               chip = irq_desc_get_chip(desc);
-               if (!chip)
-                       continue;
-
-               if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
-                       chip->irq_eoi(&desc->irq_data);
-
-               if (chip->irq_mask)
-                       chip->irq_mask(&desc->irq_data);
-
-               if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
-                       chip->irq_disable(&desc->irq_data);
-       }
-}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-       default_machine_crash_shutdown(regs);
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-       if (ppc_md.machine_kexec_prepare)
-               return ppc_md.machine_kexec_prepare(image);
-       else
-               return default_machine_kexec_prepare(image);
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-       VMCOREINFO_SYMBOL(node_data);
-       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-       VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
-       VMCOREINFO_SYMBOL(vmemmap_list);
-       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
-       VMCOREINFO_SYMBOL(mmu_psize_defs);
-       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
-       VMCOREINFO_OFFSET(vmemmap_backing, list);
-       VMCOREINFO_OFFSET(vmemmap_backing, phys);
-       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
-       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
-       VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-       int save_ftrace_enabled;
-
-       save_ftrace_enabled = __ftrace_enabled_save();
-       this_cpu_disable_ftrace();
-
-       if (ppc_md.machine_kexec)
-               ppc_md.machine_kexec(image);
-       else
-               default_machine_kexec(image);
-
-       this_cpu_enable_ftrace();
-       __ftrace_enabled_restore(save_ftrace_enabled);
-
-       /* Fall back to normal restart if we're still alive. */
-       machine_restart(NULL);
-       for(;;);
-}
-
-void __init reserve_crashkernel(void)
-{
-       unsigned long long crash_size, crash_base;
-       int ret;
-
-       /* use common parsing */
-       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
-                       &crash_size, &crash_base);
-       if (ret == 0 && crash_size > 0) {
-               crashk_res.start = crash_base;
-               crashk_res.end = crash_base + crash_size - 1;
-       }
-
-       if (crashk_res.end == crashk_res.start) {
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-
-       /* We might have got these values via the command line or the
-        * device tree, either way sanitise them now. */
-
-       crash_size = resource_size(&crashk_res);
-
-#ifndef CONFIG_NONSTATIC_KERNEL
-       if (crashk_res.start != KDUMP_KERNELBASE)
-               printk("Crash kernel location must be 0x%x\n",
-                               KDUMP_KERNELBASE);
-
-       crashk_res.start = KDUMP_KERNELBASE;
-#else
-       if (!crashk_res.start) {
-#ifdef CONFIG_PPC64
-               /*
-                * On 64bit we split the RMO in half but cap it at half of
-                * a small SLB (128MB) since the crash kernel needs to place
-                * itself and some stacks to be in the first segment.
-                */
-               crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
-#else
-               crashk_res.start = KDUMP_KERNELBASE;
-#endif
-       }
-
-       crash_base = PAGE_ALIGN(crashk_res.start);
-       if (crash_base != crashk_res.start) {
-               printk("Crash kernel base must be aligned to 0x%lx\n",
-                               PAGE_SIZE);
-               crashk_res.start = crash_base;
-       }
-
-#endif
-       crash_size = PAGE_ALIGN(crash_size);
-       crashk_res.end = crashk_res.start + crash_size - 1;
-
-       /* The crash region must not overlap the current kernel */
-       if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
-               printk(KERN_WARNING
-                       "Crash kernel can not overlap current kernel\n");
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-
-       /* Crash kernel trumps memory limit */
-       if (memory_limit && memory_limit <= crashk_res.end) {
-               memory_limit = crashk_res.end + 1;
-               printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
-                      memory_limit);
-       }
-
-       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-                       "for crashkernel (System RAM: %ldMB)\n",
-                       (unsigned long)(crash_size >> 20),
-                       (unsigned long)(crashk_res.start >> 20),
-                       (unsigned long)(memblock_phys_mem_size() >> 20));
-
-       if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
-           memblock_reserve(crashk_res.start, crash_size)) {
-               pr_err("Failed to reserve memory for crashkernel!\n");
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-}
-
-int overlaps_crashkernel(unsigned long start, unsigned long size)
-{
-       return (start + size) > crashk_res.start && start <= crashk_res.end;
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static phys_addr_t kernel_end;
-static phys_addr_t crashk_base;
-static phys_addr_t crashk_size;
-static unsigned long long mem_limit;
-
-static struct property kernel_end_prop = {
-       .name = "linux,kernel-end",
-       .length = sizeof(phys_addr_t),
-       .value = &kernel_end,
-};
-
-static struct property crashk_base_prop = {
-       .name = "linux,crashkernel-base",
-       .length = sizeof(phys_addr_t),
-       .value = &crashk_base
-};
-
-static struct property crashk_size_prop = {
-       .name = "linux,crashkernel-size",
-       .length = sizeof(phys_addr_t),
-       .value = &crashk_size,
-};
-
-static struct property memory_limit_prop = {
-       .name = "linux,memory-limit",
-       .length = sizeof(unsigned long long),
-       .value = &mem_limit,
-};
-
-#define cpu_to_be_ulong        __PASTE(cpu_to_be, BITS_PER_LONG)
-
-static void __init export_crashk_values(struct device_node *node)
-{
-       /* There might be existing crash kernel properties, but we can't
-        * be sure what's in them, so remove them. */
-       of_remove_property(node, of_find_property(node,
-                               "linux,crashkernel-base", NULL));
-       of_remove_property(node, of_find_property(node,
-                               "linux,crashkernel-size", NULL));
-
-       if (crashk_res.start != 0) {
-               crashk_base = cpu_to_be_ulong(crashk_res.start),
-               of_add_property(node, &crashk_base_prop);
-               crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
-               of_add_property(node, &crashk_size_prop);
-       }
-
-       /*
-        * memory_limit is required by the kexec-tools to limit the
-        * crash regions to the actual memory used.
-        */
-       mem_limit = cpu_to_be_ulong(memory_limit);
-       of_update_property(node, &memory_limit_prop);
-}
-
-static int __init kexec_setup(void)
-{
-       struct device_node *node;
-
-       node = of_find_node_by_path("/chosen");
-       if (!node)
-               return -ENOENT;
-
-       /* remove any stale properties so ours can be found */
-       of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
-
-       /* information needed by userspace when using default_machine_kexec */
-       kernel_end = cpu_to_be_ulong(__pa(_end));
-       of_add_property(node, &kernel_end_prop);
-
-       export_crashk_values(node);
-
-       of_node_put(node);
-       return 0;
-}
-late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
deleted file mode 100644 (file)
index bf9f1f9..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC32 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <asm/cacheflush.h>
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-
-typedef void (*relocate_new_kernel_t)(
-                               unsigned long indirection_page,
-                               unsigned long reboot_code_buffer,
-                               unsigned long start_address) __noreturn;
-
-/*
- * This is a generic machine_kexec function suitable at least for
- * non-OpenFirmware embedded platforms.
- * It merely copies the image relocation code to the control page and
- * jumps to it.
- * A platform specific function may just call this one.
- */
-void default_machine_kexec(struct kimage *image)
-{
-       extern const unsigned int relocate_new_kernel_size;
-       unsigned long page_list;
-       unsigned long reboot_code_buffer, reboot_code_buffer_phys;
-       relocate_new_kernel_t rnk;
-
-       /* Interrupts aren't acceptable while we reboot */
-       local_irq_disable();
-
-       /* mask each interrupt so we are in a more sane state for the
-        * kexec kernel */
-       machine_kexec_mask_interrupts();
-
-       page_list = image->head;
-
-       /* we need both effective and real address here */
-       reboot_code_buffer =
-                       (unsigned long)page_address(image->control_code_page);
-       reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
-
-       /* copy our kernel relocation code to the control code page */
-       memcpy((void *)reboot_code_buffer, relocate_new_kernel,
-                                               relocate_new_kernel_size);
-
-       flush_icache_range(reboot_code_buffer,
-                               reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
-       printk(KERN_INFO "Bye!\n");
-
-       if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
-               relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
-
-       /* now call it */
-       rnk = (relocate_new_kernel_t) reboot_code_buffer;
-       (*rnk)(page_list, reboot_code_buffer_phys, image->start);
-}
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
-       return 0;
-}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
deleted file mode 100644 (file)
index 04a7cba..0000000
+++ /dev/null
@@ -1,417 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC64 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2004-2005, IBM Corp.
- *
- * Created by: Milton D Miller II
- */
-
-
-#include <linux/kexec.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/init_task.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/cpu.h>
-#include <linux/hardirq.h>
-
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/machdep.h>
-#include <asm/cacheflush.h>
-#include <asm/firmware.h>
-#include <asm/paca.h>
-#include <asm/mmu.h>
-#include <asm/sections.h>      /* _end */
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/hw_breakpoint.h>
-#include <asm/asm-prototypes.h>
-#include <asm/svm.h>
-#include <asm/ultravisor.h>
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
-       int i;
-       unsigned long begin, end;       /* limits of segment */
-       unsigned long low, high;        /* limits of blocked memory range */
-       struct device_node *node;
-       const unsigned long *basep;
-       const unsigned int *sizep;
-
-       /*
-        * Since we use the kernel fault handlers and paging code to
-        * handle the virtual mode, we must make sure no destination
-        * overlaps kernel static data or bss.
-        */
-       for (i = 0; i < image->nr_segments; i++)
-               if (image->segment[i].mem < __pa(_end))
-                       return -ETXTBSY;
-
-       /* We also should not overwrite the tce tables */
-       for_each_node_by_type(node, "pci") {
-               basep = of_get_property(node, "linux,tce-base", NULL);
-               sizep = of_get_property(node, "linux,tce-size", NULL);
-               if (basep == NULL || sizep == NULL)
-                       continue;
-
-               low = *basep;
-               high = low + (*sizep);
-
-               for (i = 0; i < image->nr_segments; i++) {
-                       begin = image->segment[i].mem;
-                       end = begin + image->segment[i].memsz;
-
-                       if ((begin < high) && (end > low))
-                               return -ETXTBSY;
-               }
-       }
-
-       return 0;
-}
-
-static void copy_segments(unsigned long ind)
-{
-       unsigned long entry;
-       unsigned long *ptr;
-       void *dest;
-       void *addr;
-
-       /*
-        * We rely on kexec_load to create a lists that properly
-        * initializes these pointers before they are used.
-        * We will still crash if the list is wrong, but at least
-        * the compiler will be quiet.
-        */
-       ptr = NULL;
-       dest = NULL;
-
-       for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
-               addr = __va(entry & PAGE_MASK);
-
-               switch (entry & IND_FLAGS) {
-               case IND_DESTINATION:
-                       dest = addr;
-                       break;
-               case IND_INDIRECTION:
-                       ptr = addr;
-                       break;
-               case IND_SOURCE:
-                       copy_page(dest, addr);
-                       dest += PAGE_SIZE;
-               }
-       }
-}
-
-void kexec_copy_flush(struct kimage *image)
-{
-       long i, nr_segments = image->nr_segments;
-       struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
-
-       /* save the ranges on the stack to efficiently flush the icache */
-       memcpy(ranges, image->segment, sizeof(ranges));
-
-       /*
-        * After this call we may not use anything allocated in dynamic
-        * memory, including *image.
-        *
-        * Only globals and the stack are allowed.
-        */
-       copy_segments(image->head);
-
-       /*
-        * we need to clear the icache for all dest pages sometime,
-        * including ones that were in place on the original copy
-        */
-       for (i = 0; i < nr_segments; i++)
-               flush_icache_range((unsigned long)__va(ranges[i].mem),
-                       (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
-}
-
-#ifdef CONFIG_SMP
-
-static int kexec_all_irq_disabled = 0;
-
-static void kexec_smp_down(void *arg)
-{
-       local_irq_disable();
-       hard_irq_disable();
-
-       mb(); /* make sure our irqs are disabled before we say they are */
-       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
-       while(kexec_all_irq_disabled == 0)
-               cpu_relax();
-       mb(); /* make sure all irqs are disabled before this */
-       hw_breakpoint_disable();
-       /*
-        * Now every CPU has IRQs off, we can clear out any pending
-        * IPIs and be sure that no more will come in after this.
-        */
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 1);
-
-       kexec_smp_wait();
-       /* NOTREACHED */
-}
-
-static void kexec_prepare_cpus_wait(int wait_state)
-{
-       int my_cpu, i, notified=-1;
-
-       hw_breakpoint_disable();
-       my_cpu = get_cpu();
-       /* Make sure each CPU has at least made it to the state we need.
-        *
-        * FIXME: There is a (slim) chance of a problem if not all of the CPUs
-        * are correctly onlined.  If somehow we start a CPU on boot with RTAS
-        * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
-        * time, the boot CPU will timeout.  If it does eventually execute
-        * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
-        * written) and get into a peculiar state.
-        * If the platform supports smp_ops->take_timebase(), the secondary CPU
-        * will probably be spinning in there.  If not (i.e. pseries), the
-        * secondary will continue on and try to online itself/idle/etc. If it
-        * survives that, we need to find these
-        * possible-but-not-online-but-should-be CPUs and chaperone them into
-        * kexec_smp_wait().
-        */
-       for_each_online_cpu(i) {
-               if (i == my_cpu)
-                       continue;
-
-               while (paca_ptrs[i]->kexec_state < wait_state) {
-                       barrier();
-                       if (i != notified) {
-                               printk(KERN_INFO "kexec: waiting for cpu %d "
-                                      "(physical %d) to enter %i state\n",
-                                      i, paca_ptrs[i]->hw_cpu_id, wait_state);
-                               notified = i;
-                       }
-               }
-       }
-       mb();
-}
-
-/*
- * We need to make sure each present CPU is online.  The next kernel will scan
- * the device tree and assume primary threads are online and query secondary
- * threads via RTAS to online them if required.  If we don't online primary
- * threads, they will be stuck.  However, we also online secondary threads as we
- * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
- * threads as offline -- and again, these CPUs will be stuck.
- *
- * So, we online all CPUs that should be running, including secondary threads.
- */
-static void wake_offline_cpus(void)
-{
-       int cpu = 0;
-
-       for_each_present_cpu(cpu) {
-               if (!cpu_online(cpu)) {
-                       printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
-                              cpu);
-                       WARN_ON(cpu_up(cpu));
-               }
-       }
-}
-
-static void kexec_prepare_cpus(void)
-{
-       wake_offline_cpus();
-       smp_call_function(kexec_smp_down, NULL, /* wait */0);
-       local_irq_disable();
-       hard_irq_disable();
-
-       mb(); /* make sure IRQs are disabled before we say they are */
-       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
-
-       kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
-       /* we are sure every CPU has IRQs off at this point */
-       kexec_all_irq_disabled = 1;
-
-       /*
-        * Before removing MMU mappings make sure all CPUs have entered real
-        * mode:
-        */
-       kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
-
-       /* after we tell the others to go down */
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 0);
-
-       put_cpu();
-}
-
-#else /* ! SMP */
-
-static void kexec_prepare_cpus(void)
-{
-       /*
-        * move the secondarys to us so that we can copy
-        * the new kernel 0-0x100 safely
-        *
-        * do this if kexec in setup.c ?
-        *
-        * We need to release the cpus if we are ever going from an
-        * UP to an SMP kernel.
-        */
-       smp_release_cpus();
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 0);
-       local_irq_disable();
-       hard_irq_disable();
-}
-
-#endif /* SMP */
-
-/*
- * kexec thread structure and stack.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled.  It also must be statically allocated
- * or allocated as part of the kimage, because everything else may be
- * overwritten when we copy the kexec image.  We piggyback on the
- * "init_task" linker section here to statically allocate a stack.
- *
- * We could use a smaller stack if we don't care about anything using
- * current, but that audit has not been performed.
- */
-static union thread_union kexec_stack __init_task_data =
-       { };
-
-/*
- * For similar reasons to the stack above, the kexecing CPU needs to be on a
- * static PACA; we switch to kexec_paca.
- */
-struct paca_struct kexec_paca;
-
-/* Our assembly helper, in misc_64.S */
-extern void kexec_sequence(void *newstack, unsigned long start,
-                          void *image, void *control,
-                          void (*clear_all)(void),
-                          bool copy_with_mmu_off) __noreturn;
-
-/* too late to fail here */
-void default_machine_kexec(struct kimage *image)
-{
-       bool copy_with_mmu_off;
-
-       /* prepare control code if any */
-
-       /*
-        * If the kexec boot is the normal one, need to shutdown other cpus
-        * into our wait loop and quiesce interrupts.
-        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
-        * stopping other CPUs and collecting their pt_regs is done before
-        * using debugger IPI.
-        */
-
-       if (!kdump_in_progress())
-               kexec_prepare_cpus();
-
-       printk("kexec: Starting switchover sequence.\n");
-
-       /* switch to a staticly allocated stack.  Based on irq stack code.
-        * We setup preempt_count to avoid using VMX in memcpy.
-        * XXX: the task struct will likely be invalid once we do the copy!
-        */
-       current_thread_info()->flags = 0;
-       current_thread_info()->preempt_count = HARDIRQ_OFFSET;
-
-       /* We need a static PACA, too; copy this CPU's PACA over and switch to
-        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
-        * non-static data.
-        */
-       memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
-       kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
-#ifdef CONFIG_PPC_PSERIES
-       kexec_paca.lppaca_ptr = NULL;
-#endif
-
-       if (is_secure_guest() && !(image->preserve_context ||
-                                  image->type == KEXEC_TYPE_CRASH)) {
-               uv_unshare_all_pages();
-               printk("kexec: Unshared all shared pages.\n");
-       }
-
-       paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
-
-       setup_paca(&kexec_paca);
-
-       /*
-        * The lppaca should be unregistered at this point so the HV won't
-        * touch it. In the case of a crash, none of the lppacas are
-        * unregistered so there is not much we can do about it here.
-        */
-
-       /*
-        * On Book3S, the copy must happen with the MMU off if we are either
-        * using Radix page tables or we are not in an LPAR since we can
-        * overwrite the page tables while copying.
-        *
-        * In an LPAR, we keep the MMU on otherwise we can't access beyond
-        * the RMA. On BookE there is no real MMU off mode, so we have to
-        * keep it enabled as well (but then we have bolted TLB entries).
-        */
-#ifdef CONFIG_PPC_BOOK3E
-       copy_with_mmu_off = false;
-#else
-       copy_with_mmu_off = radix_enabled() ||
-               !(firmware_has_feature(FW_FEATURE_LPAR) ||
-                 firmware_has_feature(FW_FEATURE_PS3_LV1));
-#endif
-
-       /* Some things are best done in assembly.  Finding globals with
-        * a toc is easier in C, so pass in what we can.
-        */
-       kexec_sequence(&kexec_stack, image->start, image,
-                      page_address(image->control_code_page),
-                      mmu_cleanup_all, copy_with_mmu_off);
-       /* NOTREACHED */
-}
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base;
-static unsigned long htab_size;
-
-static struct property htab_base_prop = {
-       .name = "linux,htab-base",
-       .length = sizeof(unsigned long),
-       .value = &htab_base,
-};
-
-static struct property htab_size_prop = {
-       .name = "linux,htab-size",
-       .length = sizeof(unsigned long),
-       .value = &htab_size,
-};
-
-static int __init export_htab_values(void)
-{
-       struct device_node *node;
-
-       /* On machines with no htab htab_address is NULL */
-       if (!htab_address)
-               return -ENODEV;
-
-       node = of_find_node_by_path("/chosen");
-       if (!node)
-               return -ENODEV;
-
-       /* remove any stale propertys so ours can be found */
-       of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
-       of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
-
-       htab_base = cpu_to_be64(__pa(htab_address));
-       of_add_property(node, &htab_base_prop);
-       htab_size = cpu_to_be64(htab_size_bytes);
-       of_add_property(node, &htab_size_prop);
-
-       of_node_put(node);
-       return 0;
-}
-late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
deleted file mode 100644 (file)
index 143c917..0000000
+++ /dev/null
@@ -1,254 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ppc64 code to implement the kexec_file_load syscall
- *
- * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004  IBM Corp.
- * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
- * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016  IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of_fdt.h>
-#include <linux/libfdt.h>
-#include <asm/ima.h>
-
-#define SLAVE_CODE_SIZE                256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-       &kexec_elf64_ops,
-       NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-                                 unsigned long buf_len)
-{
-       /* We don't support crash kernels yet. */
-       if (image->type == KEXEC_TYPE_CRASH)
-               return -EOPNOTSUPP;
-
-       return kexec_image_probe_default(image, buf, buf_len);
-}
-
-/**
- * setup_purgatory - initialize the purgatory's global variables
- * @image:             kexec image.
- * @slave_code:                Slave code for the purgatory.
- * @fdt:               Flattened device tree for the next kernel.
- * @kernel_load_addr:  Address where the kernel is loaded.
- * @fdt_load_addr:     Address where the flattened device tree is loaded.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_purgatory(struct kimage *image, const void *slave_code,
-                   const void *fdt, unsigned long kernel_load_addr,
-                   unsigned long fdt_load_addr)
-{
-       unsigned int *slave_code_buf, master_entry;
-       int ret;
-
-       slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
-       if (!slave_code_buf)
-               return -ENOMEM;
-
-       /* Get the slave code from the new kernel and put it in purgatory. */
-       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
-                                            slave_code_buf, SLAVE_CODE_SIZE,
-                                            true);
-       if (ret) {
-               kfree(slave_code_buf);
-               return ret;
-       }
-
-       master_entry = slave_code_buf[0];
-       memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
-       slave_code_buf[0] = master_entry;
-       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
-                                            slave_code_buf, SLAVE_CODE_SIZE,
-                                            false);
-       kfree(slave_code_buf);
-
-       ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
-                                            sizeof(kernel_load_addr), false);
-       if (ret)
-               return ret;
-       ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
-                                            sizeof(fdt_load_addr), false);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-/**
- * delete_fdt_mem_rsv - delete memory reservation with given address and size
- *
- * Return: 0 on success, or negative errno on error.
- */
-int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
-{
-       int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
-
-       for (i = 0; i < num_rsvs; i++) {
-               uint64_t rsv_start, rsv_size;
-
-               ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
-               if (ret) {
-                       pr_err("Malformed device tree.\n");
-                       return -EINVAL;
-               }
-
-               if (rsv_start == start && rsv_size == size) {
-                       ret = fdt_del_mem_rsv(fdt, i);
-                       if (ret) {
-                               pr_err("Error deleting device tree reservation.\n");
-                               return -EINVAL;
-                       }
-
-                       return 0;
-               }
-       }
-
-       return -ENOENT;
-}
-
-/*
- * setup_new_fdt - modify /chosen and memory reservation for the next kernel
- * @image:             kexec image being loaded.
- * @fdt:               Flattened device tree for the next kernel.
- * @initrd_load_addr:  Address where the next initrd will be loaded.
- * @initrd_len:                Size of the next initrd, or 0 if there will be none.
- * @cmdline:           Command line for the next kernel, or NULL if there will
- *                     be none.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_new_fdt(const struct kimage *image, void *fdt,
-                 unsigned long initrd_load_addr, unsigned long initrd_len,
-                 const char *cmdline)
-{
-       int ret, chosen_node;
-       const void *prop;
-
-       /* Remove memory reservation for the current device tree. */
-       ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
-                                fdt_totalsize(initial_boot_params));
-       if (ret == 0)
-               pr_debug("Removed old device tree reservation.\n");
-       else if (ret != -ENOENT)
-               return ret;
-
-       chosen_node = fdt_path_offset(fdt, "/chosen");
-       if (chosen_node == -FDT_ERR_NOTFOUND) {
-               chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
-                                             "chosen");
-               if (chosen_node < 0) {
-                       pr_err("Error creating /chosen.\n");
-                       return -EINVAL;
-               }
-       } else if (chosen_node < 0) {
-               pr_err("Malformed device tree: error reading /chosen.\n");
-               return -EINVAL;
-       }
-
-       /* Did we boot using an initrd? */
-       prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
-       if (prop) {
-               uint64_t tmp_start, tmp_end, tmp_size;
-
-               tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
-
-               prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
-               if (!prop) {
-                       pr_err("Malformed device tree.\n");
-                       return -EINVAL;
-               }
-               tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
-
-               /*
-                * kexec reserves exact initrd size, while firmware may
-                * reserve a multiple of PAGE_SIZE, so check for both.
-                */
-               tmp_size = tmp_end - tmp_start;
-               ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
-               if (ret == -ENOENT)
-                       ret = delete_fdt_mem_rsv(fdt, tmp_start,
-                                                round_up(tmp_size, PAGE_SIZE));
-               if (ret == 0)
-                       pr_debug("Removed old initrd reservation.\n");
-               else if (ret != -ENOENT)
-                       return ret;
-
-               /* If there's no new initrd, delete the old initrd's info. */
-               if (initrd_len == 0) {
-                       ret = fdt_delprop(fdt, chosen_node,
-                                         "linux,initrd-start");
-                       if (ret) {
-                               pr_err("Error deleting linux,initrd-start.\n");
-                               return -EINVAL;
-                       }
-
-                       ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
-                       if (ret) {
-                               pr_err("Error deleting linux,initrd-end.\n");
-                               return -EINVAL;
-                       }
-               }
-       }
-
-       if (initrd_len) {
-               ret = fdt_setprop_u64(fdt, chosen_node,
-                                     "linux,initrd-start",
-                                     initrd_load_addr);
-               if (ret < 0)
-                       goto err;
-
-               /* initrd-end is the first address after the initrd image. */
-               ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
-                                     initrd_load_addr + initrd_len);
-               if (ret < 0)
-                       goto err;
-
-               ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
-               if (ret) {
-                       pr_err("Error reserving initrd memory: %s\n",
-                              fdt_strerror(ret));
-                       return -EINVAL;
-               }
-       }
-
-       if (cmdline != NULL) {
-               ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
-               if (ret < 0)
-                       goto err;
-       } else {
-               ret = fdt_delprop(fdt, chosen_node, "bootargs");
-               if (ret && ret != -FDT_ERR_NOTFOUND) {
-                       pr_err("Error deleting bootargs.\n");
-                       return -EINVAL;
-               }
-       }
-
-       ret = setup_ima_buffer(image, fdt, chosen_node);
-       if (ret) {
-               pr_err("Error setting up the new device tree.\n");
-               return ret;
-       }
-
-       ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
-       if (ret)
-               goto err;
-
-       return 0;
-
-err:
-       pr_err("Error setting up the new device tree.\n");
-       return -EINVAL;
-}
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644 (file)
index 0000000..16c1c5a
--- /dev/null
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+# Disable clang warning for using setjmp without setjmp.h header
+CFLAGS_crash.o         += $(call cc-disable-warning, builtin-requires-header)
+
+obj-y                          += core.o crash.o core_$(BITS).o
+
+obj-$(CONFIG_PPC32)            += relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE)       += file_load.o elf_$(BITS).o
+
+ifdef CONFIG_HAVE_IMA_KEXEC
+ifdef CONFIG_IMA
+obj-y                          += ima.o
+endif
+endif
+
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
new file mode 100644 (file)
index 0000000..078fe3d
--- /dev/null
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/kdump.h>
+#include <asm/machdep.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+
+void machine_kexec_mask_interrupts(void) {
+       unsigned int i;
+       struct irq_desc *desc;
+
+       for_each_irq_desc(i, desc) {
+               struct irq_chip *chip;
+
+               chip = irq_desc_get_chip(desc);
+               if (!chip)
+                       continue;
+
+               if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+                       chip->irq_eoi(&desc->irq_data);
+
+               if (chip->irq_mask)
+                       chip->irq_mask(&desc->irq_data);
+
+               if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+                       chip->irq_disable(&desc->irq_data);
+       }
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+       default_machine_crash_shutdown(regs);
+}
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+       if (ppc_md.machine_kexec_prepare)
+               return ppc_md.machine_kexec_prepare(image);
+       else
+               return default_machine_kexec_prepare(image);
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+       VMCOREINFO_SYMBOL(node_data);
+       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+       VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+       VMCOREINFO_SYMBOL(vmemmap_list);
+       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+       VMCOREINFO_SYMBOL(mmu_psize_defs);
+       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+       VMCOREINFO_OFFSET(vmemmap_backing, list);
+       VMCOREINFO_OFFSET(vmemmap_backing, phys);
+       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+       VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+       int save_ftrace_enabled;
+
+       save_ftrace_enabled = __ftrace_enabled_save();
+       this_cpu_disable_ftrace();
+
+       if (ppc_md.machine_kexec)
+               ppc_md.machine_kexec(image);
+       else
+               default_machine_kexec(image);
+
+       this_cpu_enable_ftrace();
+       __ftrace_enabled_restore(save_ftrace_enabled);
+
+       /* Fall back to normal restart if we're still alive. */
+       machine_restart(NULL);
+       for(;;);
+}
+
+void __init reserve_crashkernel(void)
+{
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       /* use common parsing */
+       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size > 0) {
+               crashk_res.start = crash_base;
+               crashk_res.end = crash_base + crash_size - 1;
+       }
+
+       if (crashk_res.end == crashk_res.start) {
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+
+       /* We might have got these values via the command line or the
+        * device tree, either way sanitise them now. */
+
+       crash_size = resource_size(&crashk_res);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+       if (crashk_res.start != KDUMP_KERNELBASE)
+               printk("Crash kernel location must be 0x%x\n",
+                               KDUMP_KERNELBASE);
+
+       crashk_res.start = KDUMP_KERNELBASE;
+#else
+       if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+               /*
+                * On 64bit we split the RMO in half but cap it at half of
+                * a small SLB (128MB) since the crash kernel needs to place
+                * itself and some stacks to be in the first segment.
+                */
+               crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+#else
+               crashk_res.start = KDUMP_KERNELBASE;
+#endif
+       }
+
+       crash_base = PAGE_ALIGN(crashk_res.start);
+       if (crash_base != crashk_res.start) {
+               printk("Crash kernel base must be aligned to 0x%lx\n",
+                               PAGE_SIZE);
+               crashk_res.start = crash_base;
+       }
+
+#endif
+       crash_size = PAGE_ALIGN(crash_size);
+       crashk_res.end = crashk_res.start + crash_size - 1;
+
+       /* The crash region must not overlap the current kernel */
+       if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+               printk(KERN_WARNING
+                       "Crash kernel can not overlap current kernel\n");
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+
+       /* Crash kernel trumps memory limit */
+       if (memory_limit && memory_limit <= crashk_res.end) {
+               memory_limit = crashk_res.end + 1;
+               printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+                      memory_limit);
+       }
+
+       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                       "for crashkernel (System RAM: %ldMB)\n",
+                       (unsigned long)(crash_size >> 20),
+                       (unsigned long)(crashk_res.start >> 20),
+                       (unsigned long)(memblock_phys_mem_size() >> 20));
+
+       if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
+           memblock_reserve(crashk_res.start, crash_size)) {
+               pr_err("Failed to reserve memory for crashkernel!\n");
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+}
+
+int overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+       return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+       .name = "linux,kernel-end",
+       .length = sizeof(phys_addr_t),
+       .value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+       .name = "linux,crashkernel-base",
+       .length = sizeof(phys_addr_t),
+       .value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+       .name = "linux,crashkernel-size",
+       .length = sizeof(phys_addr_t),
+       .value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+       .name = "linux,memory-limit",
+       .length = sizeof(unsigned long long),
+       .value = &mem_limit,
+};
+
+#define cpu_to_be_ulong        __PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+       /* There might be existing crash kernel properties, but we can't
+        * be sure what's in them, so remove them. */
+       of_remove_property(node, of_find_property(node,
+                               "linux,crashkernel-base", NULL));
+       of_remove_property(node, of_find_property(node,
+                               "linux,crashkernel-size", NULL));
+
+       if (crashk_res.start != 0) {
+               crashk_base = cpu_to_be_ulong(crashk_res.start),
+               of_add_property(node, &crashk_base_prop);
+               crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+               of_add_property(node, &crashk_size_prop);
+       }
+
+       /*
+        * memory_limit is required by the kexec-tools to limit the
+        * crash regions to the actual memory used.
+        */
+       mem_limit = cpu_to_be_ulong(memory_limit);
+       of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+       struct device_node *node;
+
+       node = of_find_node_by_path("/chosen");
+       if (!node)
+               return -ENOENT;
+
+       /* remove any stale properties so ours can be found */
+       of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
+
+       /* information needed by userspace when using default_machine_kexec */
+       kernel_end = cpu_to_be_ulong(__pa(_end));
+       of_add_property(node, &kernel_end_prop);
+
+       export_crashk_values(node);
+
+       of_node_put(node);
+       return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
new file mode 100644 (file)
index 0000000..bf9f1f9
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC32 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/cacheflush.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+
+typedef void (*relocate_new_kernel_t)(
+                               unsigned long indirection_page,
+                               unsigned long reboot_code_buffer,
+                               unsigned long start_address) __noreturn;
+
+/*
+ * This is a generic machine_kexec function suitable at least for
+ * non-OpenFirmware embedded platforms.
+ * It merely copies the image relocation code to the control page and
+ * jumps to it.
+ * A platform specific function may just call this one.
+ */
+void default_machine_kexec(struct kimage *image)
+{
+       extern const unsigned int relocate_new_kernel_size;
+       unsigned long page_list;
+       unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+       relocate_new_kernel_t rnk;
+
+       /* Interrupts aren't acceptable while we reboot */
+       local_irq_disable();
+
+       /* mask each interrupt so we are in a more sane state for the
+        * kexec kernel */
+       machine_kexec_mask_interrupts();
+
+       page_list = image->head;
+
+       /* we need both effective and real address here */
+       reboot_code_buffer =
+                       (unsigned long)page_address(image->control_code_page);
+       reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
+
+       /* copy our kernel relocation code to the control code page */
+       memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+                                               relocate_new_kernel_size);
+
+       flush_icache_range(reboot_code_buffer,
+                               reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
+       printk(KERN_INFO "Bye!\n");
+
+       if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+               relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
+       /* now call it */
+       rnk = (relocate_new_kernel_t) reboot_code_buffer;
+       (*rnk)(page_list, reboot_code_buffer_phys, image->start);
+}
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+       return 0;
+}
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
new file mode 100644 (file)
index 0000000..04a7cba
--- /dev/null
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC64 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ */
+
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/init_task.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>      /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+       int i;
+       unsigned long begin, end;       /* limits of segment */
+       unsigned long low, high;        /* limits of blocked memory range */
+       struct device_node *node;
+       const unsigned long *basep;
+       const unsigned int *sizep;
+
+       /*
+        * Since we use the kernel fault handlers and paging code to
+        * handle the virtual mode, we must make sure no destination
+        * overlaps kernel static data or bss.
+        */
+       for (i = 0; i < image->nr_segments; i++)
+               if (image->segment[i].mem < __pa(_end))
+                       return -ETXTBSY;
+
+       /* We also should not overwrite the tce tables */
+       for_each_node_by_type(node, "pci") {
+               basep = of_get_property(node, "linux,tce-base", NULL);
+               sizep = of_get_property(node, "linux,tce-size", NULL);
+               if (basep == NULL || sizep == NULL)
+                       continue;
+
+               low = *basep;
+               high = low + (*sizep);
+
+               for (i = 0; i < image->nr_segments; i++) {
+                       begin = image->segment[i].mem;
+                       end = begin + image->segment[i].memsz;
+
+                       if ((begin < high) && (end > low))
+                               return -ETXTBSY;
+               }
+       }
+
+       return 0;
+}
+
+static void copy_segments(unsigned long ind)
+{
+       unsigned long entry;
+       unsigned long *ptr;
+       void *dest;
+       void *addr;
+
+       /*
+        * We rely on kexec_load to create a lists that properly
+        * initializes these pointers before they are used.
+        * We will still crash if the list is wrong, but at least
+        * the compiler will be quiet.
+        */
+       ptr = NULL;
+       dest = NULL;
+
+       for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+               addr = __va(entry & PAGE_MASK);
+
+               switch (entry & IND_FLAGS) {
+               case IND_DESTINATION:
+                       dest = addr;
+                       break;
+               case IND_INDIRECTION:
+                       ptr = addr;
+                       break;
+               case IND_SOURCE:
+                       copy_page(dest, addr);
+                       dest += PAGE_SIZE;
+               }
+       }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+       long i, nr_segments = image->nr_segments;
+       struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+       /* save the ranges on the stack to efficiently flush the icache */
+       memcpy(ranges, image->segment, sizeof(ranges));
+
+       /*
+        * After this call we may not use anything allocated in dynamic
+        * memory, including *image.
+        *
+        * Only globals and the stack are allowed.
+        */
+       copy_segments(image->head);
+
+       /*
+        * we need to clear the icache for all dest pages sometime,
+        * including ones that were in place on the original copy
+        */
+       for (i = 0; i < nr_segments; i++)
+               flush_icache_range((unsigned long)__va(ranges[i].mem),
+                       (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
+}
+
+#ifdef CONFIG_SMP
+
+static int kexec_all_irq_disabled = 0;
+
+static void kexec_smp_down(void *arg)
+{
+       local_irq_disable();
+       hard_irq_disable();
+
+       mb(); /* make sure our irqs are disabled before we say they are */
+       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+       while(kexec_all_irq_disabled == 0)
+               cpu_relax();
+       mb(); /* make sure all irqs are disabled before this */
+       hw_breakpoint_disable();
+       /*
+        * Now every CPU has IRQs off, we can clear out any pending
+        * IPIs and be sure that no more will come in after this.
+        */
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 1);
+
+       kexec_smp_wait();
+       /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus_wait(int wait_state)
+{
+       int my_cpu, i, notified=-1;
+
+       hw_breakpoint_disable();
+       my_cpu = get_cpu();
+       /* Make sure each CPU has at least made it to the state we need.
+        *
+        * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+        * are correctly onlined.  If somehow we start a CPU on boot with RTAS
+        * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+        * time, the boot CPU will timeout.  If it does eventually execute
+        * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+        * written) and get into a peculiar state.
+        * If the platform supports smp_ops->take_timebase(), the secondary CPU
+        * will probably be spinning in there.  If not (i.e. pseries), the
+        * secondary will continue on and try to online itself/idle/etc. If it
+        * survives that, we need to find these
+        * possible-but-not-online-but-should-be CPUs and chaperone them into
+        * kexec_smp_wait().
+        */
+       for_each_online_cpu(i) {
+               if (i == my_cpu)
+                       continue;
+
+               while (paca_ptrs[i]->kexec_state < wait_state) {
+                       barrier();
+                       if (i != notified) {
+                               printk(KERN_INFO "kexec: waiting for cpu %d "
+                                      "(physical %d) to enter %i state\n",
+                                      i, paca_ptrs[i]->hw_cpu_id, wait_state);
+                               notified = i;
+                       }
+               }
+       }
+       mb();
+}
+
+/*
+ * We need to make sure each present CPU is online.  The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required.  If we don't online primary
+ * threads, they will be stuck.  However, we also online secondary threads as we
+ * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
+{
+       int cpu = 0;
+
+       for_each_present_cpu(cpu) {
+               if (!cpu_online(cpu)) {
+                       printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+                              cpu);
+                       WARN_ON(cpu_up(cpu));
+               }
+       }
+}
+
+static void kexec_prepare_cpus(void)
+{
+       wake_offline_cpus();
+       smp_call_function(kexec_smp_down, NULL, /* wait */0);
+       local_irq_disable();
+       hard_irq_disable();
+
+       mb(); /* make sure IRQs are disabled before we say they are */
+       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+       kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+       /* we are sure every CPU has IRQs off at this point */
+       kexec_all_irq_disabled = 1;
+
+       /*
+        * Before removing MMU mappings make sure all CPUs have entered real
+        * mode:
+        */
+       kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+
+       /* after we tell the others to go down */
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 0);
+
+       put_cpu();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+       /*
+        * move the secondarys to us so that we can copy
+        * the new kernel 0-0x100 safely
+        *
+        * do this if kexec in setup.c ?
+        *
+        * We need to release the cpus if we are ever going from an
+        * UP to an SMP kernel.
+        */
+       smp_release_cpus();
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 0);
+       local_irq_disable();
+       hard_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled.  It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image.  We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+static union thread_union kexec_stack __init_task_data =
+       { };
+
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+struct paca_struct kexec_paca;
+
+/* Our assembly helper, in misc_64.S */
+extern void kexec_sequence(void *newstack, unsigned long start,
+                          void *image, void *control,
+                          void (*clear_all)(void),
+                          bool copy_with_mmu_off) __noreturn;
+
+/* too late to fail here */
+void default_machine_kexec(struct kimage *image)
+{
+       bool copy_with_mmu_off;
+
+       /* prepare control code if any */
+
+       /*
+        * If the kexec boot is the normal one, need to shutdown other cpus
+        * into our wait loop and quiesce interrupts.
+        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
+        * stopping other CPUs and collecting their pt_regs is done before
+        * using debugger IPI.
+        */
+
+       if (!kdump_in_progress())
+               kexec_prepare_cpus();
+
+       printk("kexec: Starting switchover sequence.\n");
+
+       /* switch to a staticly allocated stack.  Based on irq stack code.
+        * We setup preempt_count to avoid using VMX in memcpy.
+        * XXX: the task struct will likely be invalid once we do the copy!
+        */
+       current_thread_info()->flags = 0;
+       current_thread_info()->preempt_count = HARDIRQ_OFFSET;
+
+       /* We need a static PACA, too; copy this CPU's PACA over and switch to
+        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+        * non-static data.
+        */
+       memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+       kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+#ifdef CONFIG_PPC_PSERIES
+       kexec_paca.lppaca_ptr = NULL;
+#endif
+
+       if (is_secure_guest() && !(image->preserve_context ||
+                                  image->type == KEXEC_TYPE_CRASH)) {
+               uv_unshare_all_pages();
+               printk("kexec: Unshared all shared pages.\n");
+       }
+
+       paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
+       setup_paca(&kexec_paca);
+
+       /*
+        * The lppaca should be unregistered at this point so the HV won't
+        * touch it. In the case of a crash, none of the lppacas are
+        * unregistered so there is not much we can do about it here.
+        */
+
+       /*
+        * On Book3S, the copy must happen with the MMU off if we are either
+        * using Radix page tables or we are not in an LPAR since we can
+        * overwrite the page tables while copying.
+        *
+        * In an LPAR, we keep the MMU on otherwise we can't access beyond
+        * the RMA. On BookE there is no real MMU off mode, so we have to
+        * keep it enabled as well (but then we have bolted TLB entries).
+        */
+#ifdef CONFIG_PPC_BOOK3E
+       copy_with_mmu_off = false;
+#else
+       copy_with_mmu_off = radix_enabled() ||
+               !(firmware_has_feature(FW_FEATURE_LPAR) ||
+                 firmware_has_feature(FW_FEATURE_PS3_LV1));
+#endif
+
+       /* Some things are best done in assembly.  Finding globals with
+        * a toc is easier in C, so pass in what we can.
+        */
+       kexec_sequence(&kexec_stack, image->start, image,
+                      page_address(image->control_code_page),
+                      mmu_cleanup_all, copy_with_mmu_off);
+       /* NOTREACHED */
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base;
+static unsigned long htab_size;
+
+static struct property htab_base_prop = {
+       .name = "linux,htab-base",
+       .length = sizeof(unsigned long),
+       .value = &htab_base,
+};
+
+static struct property htab_size_prop = {
+       .name = "linux,htab-size",
+       .length = sizeof(unsigned long),
+       .value = &htab_size,
+};
+
+static int __init export_htab_values(void)
+{
+       struct device_node *node;
+
+       /* On machines with no htab htab_address is NULL */
+       if (!htab_address)
+               return -ENODEV;
+
+       node = of_find_node_by_path("/chosen");
+       if (!node)
+               return -ENODEV;
+
+       /* remove any stale propertys so ours can be found */
+       of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
+       of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
+
+       htab_base = cpu_to_be64(__pa(htab_address));
+       of_add_property(node, &htab_base_prop);
+       htab_size = cpu_to_be64(htab_size_bytes);
+       of_add_property(node, &htab_size_prop);
+
+       of_node_put(node);
+       return 0;
+}
+late_initcall(export_htab_values);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
new file mode 100644 (file)
index 0000000..d488311
--- /dev/null
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Architecture specific (PPC64) functions for kexec based crash dumps.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Haren Myneni
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/kexec.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/setjmp.h>
+#include <asm/debug.h>
+
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT                500
+#define SECONDARY_TIMEOUT      1000
+
+#define IPI_TIMEOUT            10000
+#define REAL_MODE_TIMEOUT      10000
+
+static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
+
+#define CRASH_HANDLER_MAX 3
+/* List of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
+static DEFINE_SPINLOCK(crash_handlers_lock);
+
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+       if (crash_shutdown_cpu == smp_processor_id())
+               longjmp(crash_shutdown_buf, 1);
+       return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static atomic_t cpus_in_crash;
+void crash_ipi_callback(struct pt_regs *regs)
+{
+       static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
+       int cpu = smp_processor_id();
+
+       hard_irq_disable();
+       if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
+               crash_save_cpu(regs, cpu);
+               cpumask_set_cpu(cpu, &cpus_state_saved);
+       }
+
+       atomic_inc(&cpus_in_crash);
+       smp_mb__after_atomic();
+
+       /*
+        * Starting the kdump boot.
+        * This barrier is needed to make sure that all CPUs are stopped.
+        */
+       while (!time_to_dump)
+               cpu_relax();
+
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(1, 1);
+
+#ifdef CONFIG_PPC64
+       kexec_smp_wait();
+#else
+       for (;;);       /* FIXME */
+#endif
+
+       /* NOTREACHED */
+}
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+       unsigned int msecs;
+       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+       int tries = 0;
+       int (*old_handler)(struct pt_regs *regs);
+
+       printk(KERN_EMERG "Sending IPI to other CPUs\n");
+
+       if (crash_wake_offline)
+               ncpus = num_present_cpus() - 1;
+
+       crash_send_ipi(crash_ipi_callback);
+       smp_wmb();
+
+again:
+       /*
+        * FIXME: Until we will have the way to stop other CPUs reliably,
+        * the crash CPU will send an IPI and wait for other CPUs to
+        * respond.
+        */
+       msecs = IPI_TIMEOUT;
+       while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
+               mdelay(1);
+
+       /* Would it be better to replace the trap vector here? */
+
+       if (atomic_read(&cpus_in_crash) >= ncpus) {
+               printk(KERN_EMERG "IPI complete\n");
+               return;
+       }
+
+       printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+               ncpus - atomic_read(&cpus_in_crash));
+
+       /*
+        * If we have a panic timeout set then we can't wait indefinitely
+        * for someone to activate system reset. We also give up on the
+        * second time through if system reset fail to work.
+        */
+       if ((panic_timeout > 0) || (tries > 0))
+               return;
+
+       /*
+        * A system reset will cause all CPUs to take an 0x100 exception.
+        * The primary CPU returns here via setjmp, and the secondary
+        * CPUs reexecute the crash_kexec_secondary path.
+        */
+       old_handler = __debugger;
+       __debugger = handle_fault;
+       crash_shutdown_cpu = smp_processor_id();
+
+       if (setjmp(crash_shutdown_buf) == 0) {
+               printk(KERN_EMERG "Activate system reset (dumprestart) "
+                                 "to stop other cpu(s)\n");
+
+               /*
+                * A system reset will force all CPUs to execute the
+                * crash code again. We need to reset cpus_in_crash so we
+                * wait for everyone to do this.
+                */
+               atomic_set(&cpus_in_crash, 0);
+               smp_mb();
+
+               while (atomic_read(&cpus_in_crash) < ncpus)
+                       cpu_relax();
+       }
+
+       crash_shutdown_cpu = -1;
+       __debugger = old_handler;
+
+       tries++;
+       goto again;
+}
+
+/*
+ * This function will be called by secondary cpus.
+ */
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+       unsigned long flags;
+       int msecs = SECONDARY_TIMEOUT;
+
+       local_irq_save(flags);
+
+       /* Wait for the primary crash CPU to signal its progress */
+       while (crashing_cpu < 0) {
+               if (--msecs < 0) {
+                       /* No response, kdump image may not have been loaded */
+                       local_irq_restore(flags);
+                       return;
+               }
+
+               mdelay(1);
+       }
+
+       crash_ipi_callback(regs);
+}
+
+#else  /* ! CONFIG_SMP */
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+       /*
+        * move the secondaries to us so that we can copy
+        * the new kernel 0-0x100 safely
+        *
+        * do this if kexec in setup.c ?
+        */
+#ifdef CONFIG_PPC64
+       smp_release_cpus();
+#else
+       /* FIXME */
+#endif
+}
+
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+}
+#endif /* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+static void __maybe_unused crash_kexec_wait_realmode(int cpu)
+{
+       unsigned int msecs;
+       int i;
+
+       msecs = REAL_MODE_TIMEOUT;
+       for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+               if (i == cpu)
+                       continue;
+
+               while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
+                       barrier();
+                       if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+                               break;
+                       msecs--;
+                       mdelay(1);
+               }
+       }
+       mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif /* CONFIG_SMP && CONFIG_PPC64 */
+
+/*
+ * Register a function to be called on shutdown.  Only use this if you
+ * can't reset your device in the second kernel.
+ */
+int crash_shutdown_register(crash_shutdown_t handler)
+{
+       unsigned int i, rc;
+
+       spin_lock(&crash_handlers_lock);
+       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+               if (!crash_shutdown_handles[i]) {
+                       /* Insert handle at first empty entry */
+                       crash_shutdown_handles[i] = handler;
+                       rc = 0;
+                       break;
+               }
+
+       if (i == CRASH_HANDLER_MAX) {
+               printk(KERN_ERR "Crash shutdown handles full, "
+                      "not registered.\n");
+               rc = 1;
+       }
+
+       spin_unlock(&crash_handlers_lock);
+       return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_register);
+
+int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+       unsigned int i, rc;
+
+       spin_lock(&crash_handlers_lock);
+       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+               if (crash_shutdown_handles[i] == handler)
+                       break;
+
+       if (i == CRASH_HANDLER_MAX) {
+               printk(KERN_ERR "Crash shutdown handle not found\n");
+               rc = 1;
+       } else {
+               /* Shift handles down */
+               for (; i < (CRASH_HANDLER_MAX - 1); i++)
+                       crash_shutdown_handles[i] =
+                               crash_shutdown_handles[i+1];
+               /*
+                * Reset last entry to NULL now that it has been shifted down,
+                * this will allow new handles to be added here.
+                */
+               crash_shutdown_handles[i] = NULL;
+               rc = 0;
+       }
+
+       spin_unlock(&crash_handlers_lock);
+       return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_unregister);
+
+void default_machine_crash_shutdown(struct pt_regs *regs)
+{
+       unsigned int i;
+       int (*old_handler)(struct pt_regs *regs);
+
+       /*
+        * This function is only called after the system
+        * has panicked or is otherwise in a critical state.
+        * The minimum amount of code to allow a kexec'd kernel
+        * to run successfully needs to happen here.
+        *
+        * In practice this means stopping other cpus in
+        * an SMP system.
+        * The kernel is broken so disable interrupts.
+        */
+       hard_irq_disable();
+
+       /*
+        * Make a note of crashing cpu. Will be used in machine_kexec
+        * such that another IPI will not be sent.
+        */
+       crashing_cpu = smp_processor_id();
+
+       /*
+        * If we came in via system reset, wait a while for the secondary
+        * CPUs to enter.
+        */
+       if (TRAP(regs) == 0x100)
+               mdelay(PRIMARY_TIMEOUT);
+
+       crash_kexec_prepare_cpus(crashing_cpu);
+
+       crash_save_cpu(regs, crashing_cpu);
+
+       time_to_dump = 1;
+
+       crash_kexec_wait_realmode(crashing_cpu);
+
+       machine_kexec_mask_interrupts();
+
+       /*
+        * Call registered shutdown routines safely.  Swap out
+        * __debugger_fault_handler, and replace on exit.
+        */
+       old_handler = __debugger_fault_handler;
+       __debugger_fault_handler = handle_fault;
+       crash_shutdown_cpu = smp_processor_id();
+       for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
+               if (setjmp(crash_shutdown_buf) == 0) {
+                       /*
+                        * Insert syncs and delay to ensure
+                        * instructions in the dangerous region don't
+                        * leak away from this protected region.
+                        */
+                       asm volatile("sync; isync");
+                       /* dangerous region */
+                       crash_shutdown_handles[i]();
+                       asm volatile("sync; isync");
+               }
+       }
+       crash_shutdown_cpu = -1;
+       __debugger_fault_handler = old_handler;
+
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(1, 0);
+}
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
new file mode 100644 (file)
index 0000000..3072fd6
--- /dev/null
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt)    "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+                       unsigned long kernel_len, char *initrd,
+                       unsigned long initrd_len, char *cmdline,
+                       unsigned long cmdline_len)
+{
+       int ret;
+       unsigned int fdt_size;
+       unsigned long kernel_load_addr;
+       unsigned long initrd_load_addr = 0, fdt_load_addr;
+       void *fdt;
+       const void *slave_code;
+       struct elfhdr ehdr;
+       struct kexec_elf_info elf_info;
+       struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+                                 .buf_max = ppc64_rma_size };
+       struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+                                 .buf_max = ppc64_rma_size, .top_down = true,
+                                 .mem = KEXEC_BUF_MEM_UNKNOWN };
+
+       ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+       if (ret)
+               goto out;
+
+       ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
+       if (ret)
+               goto out;
+
+       pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+       ret = kexec_load_purgatory(image, &pbuf);
+       if (ret) {
+               pr_err("Loading purgatory failed.\n");
+               goto out;
+       }
+
+       pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+       if (initrd != NULL) {
+               kbuf.buffer = initrd;
+               kbuf.bufsz = kbuf.memsz = initrd_len;
+               kbuf.buf_align = PAGE_SIZE;
+               kbuf.top_down = false;
+               kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+               ret = kexec_add_buffer(&kbuf);
+               if (ret)
+                       goto out;
+               initrd_load_addr = kbuf.mem;
+
+               pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+       }
+
+       fdt_size = fdt_totalsize(initial_boot_params) * 2;
+       fdt = kmalloc(fdt_size, GFP_KERNEL);
+       if (!fdt) {
+               pr_err("Not enough memory for the device tree.\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+       if (ret < 0) {
+               pr_err("Error setting up the new device tree.\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+       if (ret)
+               goto out;
+
+       fdt_pack(fdt);
+
+       kbuf.buffer = fdt;
+       kbuf.bufsz = kbuf.memsz = fdt_size;
+       kbuf.buf_align = PAGE_SIZE;
+       kbuf.top_down = true;
+       kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+       ret = kexec_add_buffer(&kbuf);
+       if (ret)
+               goto out;
+       fdt_load_addr = kbuf.mem;
+
+       pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+       slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+       ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+                             fdt_load_addr);
+       if (ret)
+               pr_err("Error setting up the purgatory.\n");
+
+out:
+       kexec_free_elf_info(&elf_info);
+
+       /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+       return ret ? ERR_PTR(ret) : fdt;
+}
+
+const struct kexec_file_ops kexec_elf64_ops = {
+       .probe = kexec_elf_probe,
+       .load = elf64_load,
+};
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
new file mode 100644 (file)
index 0000000..143c917
--- /dev/null
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/ima.h>
+
+#define SLAVE_CODE_SIZE                256
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+       &kexec_elf64_ops,
+       NULL
+};
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                 unsigned long buf_len)
+{
+       /* We don't support crash kernels yet. */
+       if (image->type == KEXEC_TYPE_CRASH)
+               return -EOPNOTSUPP;
+
+       return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image:             kexec image.
+ * @slave_code:                Slave code for the purgatory.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @kernel_load_addr:  Address where the kernel is loaded.
+ * @fdt_load_addr:     Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+                   const void *fdt, unsigned long kernel_load_addr,
+                   unsigned long fdt_load_addr)
+{
+       unsigned int *slave_code_buf, master_entry;
+       int ret;
+
+       slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+       if (!slave_code_buf)
+               return -ENOMEM;
+
+       /* Get the slave code from the new kernel and put it in purgatory. */
+       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+                                            slave_code_buf, SLAVE_CODE_SIZE,
+                                            true);
+       if (ret) {
+               kfree(slave_code_buf);
+               return ret;
+       }
+
+       master_entry = slave_code_buf[0];
+       memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+       slave_code_buf[0] = master_entry;
+       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+                                            slave_code_buf, SLAVE_CODE_SIZE,
+                                            false);
+       kfree(slave_code_buf);
+
+       ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+                                            sizeof(kernel_load_addr), false);
+       if (ret)
+               return ret;
+       ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+                                            sizeof(fdt_load_addr), false);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+/**
+ * delete_fdt_mem_rsv - delete memory reservation with given address and size
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+{
+       int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
+
+       for (i = 0; i < num_rsvs; i++) {
+               uint64_t rsv_start, rsv_size;
+
+               ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
+               if (ret) {
+                       pr_err("Malformed device tree.\n");
+                       return -EINVAL;
+               }
+
+               if (rsv_start == start && rsv_size == size) {
+                       ret = fdt_del_mem_rsv(fdt, i);
+                       if (ret) {
+                               pr_err("Error deleting device tree reservation.\n");
+                               return -EINVAL;
+                       }
+
+                       return 0;
+               }
+       }
+
+       return -ENOENT;
+}
+
+/*
+ * setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @image:             kexec image being loaded.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @initrd_load_addr:  Address where the next initrd will be loaded.
+ * @initrd_len:                Size of the next initrd, or 0 if there will be none.
+ * @cmdline:           Command line for the next kernel, or NULL if there will
+ *                     be none.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_new_fdt(const struct kimage *image, void *fdt,
+                 unsigned long initrd_load_addr, unsigned long initrd_len,
+                 const char *cmdline)
+{
+       int ret, chosen_node;
+       const void *prop;
+
+       /* Remove memory reservation for the current device tree. */
+       ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
+                                fdt_totalsize(initial_boot_params));
+       if (ret == 0)
+               pr_debug("Removed old device tree reservation.\n");
+       else if (ret != -ENOENT)
+               return ret;
+
+       chosen_node = fdt_path_offset(fdt, "/chosen");
+       if (chosen_node == -FDT_ERR_NOTFOUND) {
+               chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+                                             "chosen");
+               if (chosen_node < 0) {
+                       pr_err("Error creating /chosen.\n");
+                       return -EINVAL;
+               }
+       } else if (chosen_node < 0) {
+               pr_err("Malformed device tree: error reading /chosen.\n");
+               return -EINVAL;
+       }
+
+       /* Did we boot using an initrd? */
+       prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
+       if (prop) {
+               uint64_t tmp_start, tmp_end, tmp_size;
+
+               tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+               prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
+               if (!prop) {
+                       pr_err("Malformed device tree.\n");
+                       return -EINVAL;
+               }
+               tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+               /*
+                * kexec reserves exact initrd size, while firmware may
+                * reserve a multiple of PAGE_SIZE, so check for both.
+                */
+               tmp_size = tmp_end - tmp_start;
+               ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
+               if (ret == -ENOENT)
+                       ret = delete_fdt_mem_rsv(fdt, tmp_start,
+                                                round_up(tmp_size, PAGE_SIZE));
+               if (ret == 0)
+                       pr_debug("Removed old initrd reservation.\n");
+               else if (ret != -ENOENT)
+                       return ret;
+
+               /* If there's no new initrd, delete the old initrd's info. */
+               if (initrd_len == 0) {
+                       ret = fdt_delprop(fdt, chosen_node,
+                                         "linux,initrd-start");
+                       if (ret) {
+                               pr_err("Error deleting linux,initrd-start.\n");
+                               return -EINVAL;
+                       }
+
+                       ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
+                       if (ret) {
+                               pr_err("Error deleting linux,initrd-end.\n");
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (initrd_len) {
+               ret = fdt_setprop_u64(fdt, chosen_node,
+                                     "linux,initrd-start",
+                                     initrd_load_addr);
+               if (ret < 0)
+                       goto err;
+
+               /* initrd-end is the first address after the initrd image. */
+               ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
+                                     initrd_load_addr + initrd_len);
+               if (ret < 0)
+                       goto err;
+
+               ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
+               if (ret) {
+                       pr_err("Error reserving initrd memory: %s\n",
+                              fdt_strerror(ret));
+                       return -EINVAL;
+               }
+       }
+
+       if (cmdline != NULL) {
+               ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
+               if (ret < 0)
+                       goto err;
+       } else {
+               ret = fdt_delprop(fdt, chosen_node, "bootargs");
+               if (ret && ret != -FDT_ERR_NOTFOUND) {
+                       pr_err("Error deleting bootargs.\n");
+                       return -EINVAL;
+               }
+       }
+
+       ret = setup_ima_buffer(image, fdt, chosen_node);
+       if (ret) {
+               pr_err("Error setting up the new device tree.\n");
+               return ret;
+       }
+
+       ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
+       if (ret)
+               goto err;
+
+       return 0;
+
+err:
+       pr_err("Error setting up the new device tree.\n");
+       return -EINVAL;
+}
diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c
new file mode 100644 (file)
index 0000000..720e50e
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Authors:
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+
+static int get_addr_size_cells(int *addr_cells, int *size_cells)
+{
+       struct device_node *root;
+
+       root = of_find_node_by_path("/");
+       if (!root)
+               return -EINVAL;
+
+       *addr_cells = of_n_addr_cells(root);
+       *size_cells = of_n_size_cells(root);
+
+       of_node_put(root);
+
+       return 0;
+}
+
+static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
+                              size_t *size)
+{
+       int ret, addr_cells, size_cells;
+
+       ret = get_addr_size_cells(&addr_cells, &size_cells);
+       if (ret)
+               return ret;
+
+       if (len < 4 * (addr_cells + size_cells))
+               return -ENOENT;
+
+       *addr = of_read_number(prop, addr_cells);
+       *size = of_read_number(prop + 4 * addr_cells, size_cells);
+
+       return 0;
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr:      On successful return, set to point to the buffer contents.
+ * @size:      On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+       int ret, len;
+       unsigned long tmp_addr;
+       size_t tmp_size;
+       const void *prop;
+
+       prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
+       if (!prop)
+               return -ENOENT;
+
+       ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
+       if (ret)
+               return ret;
+
+       *addr = __va(tmp_addr);
+       *size = tmp_size;
+
+       return 0;
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+int ima_free_kexec_buffer(void)
+{
+       int ret;
+       unsigned long addr;
+       size_t size;
+       struct property *prop;
+
+       prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
+       if (!prop)
+               return -ENOENT;
+
+       ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
+       if (ret)
+               return ret;
+
+       ret = of_remove_property(of_chosen, prop);
+       if (ret)
+               return ret;
+
+       return memblock_free(addr, size);
+
+}
+
+/**
+ * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
+ *
+ * The IMA measurement buffer is of no use to a subsequent kernel, so we always
+ * remove it from the device tree.
+ */
+void remove_ima_buffer(void *fdt, int chosen_node)
+{
+       int ret, len;
+       unsigned long addr;
+       size_t size;
+       const void *prop;
+
+       prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
+       if (!prop)
+               return;
+
+       ret = do_get_kexec_buffer(prop, len, &addr, &size);
+       fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
+       if (ret)
+               return;
+
+       ret = delete_fdt_mem_rsv(fdt, addr, size);
+       if (!ret)
+               pr_debug("Removed old IMA buffer reservation.\n");
+}
+
+#ifdef CONFIG_IMA_KEXEC
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
+ *
+ * Architectures should use this function to pass on the IMA buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+                             size_t size)
+{
+       image->arch.ima_buffer_addr = load_addr;
+       image->arch.ima_buffer_size = size;
+
+       return 0;
+}
+
+static int write_number(void *p, u64 value, int cells)
+{
+       if (cells == 1) {
+               u32 tmp;
+
+               if (value > U32_MAX)
+                       return -EINVAL;
+
+               tmp = cpu_to_be32(value);
+               memcpy(p, &tmp, sizeof(tmp));
+       } else if (cells == 2) {
+               u64 tmp;
+
+               tmp = cpu_to_be64(value);
+               memcpy(p, &tmp, sizeof(tmp));
+       } else
+               return -EINVAL;
+
+       return 0;
+}
+
+/**
+ * setup_ima_buffer - add IMA buffer information to the fdt
+ * @image:             kexec image being loaded.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @chosen_node:       Offset to the chosen node.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
+{
+       int ret, addr_cells, size_cells, entry_size;
+       u8 value[16];
+
+       remove_ima_buffer(fdt, chosen_node);
+       if (!image->arch.ima_buffer_size)
+               return 0;
+
+       ret = get_addr_size_cells(&addr_cells, &size_cells);
+       if (ret)
+               return ret;
+
+       entry_size = 4 * (addr_cells + size_cells);
+
+       if (entry_size > sizeof(value))
+               return -EINVAL;
+
+       ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
+       if (ret)
+               return ret;
+
+       ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
+                          size_cells);
+       if (ret)
+               return ret;
+
+       ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
+                         entry_size);
+       if (ret < 0)
+               return -EINVAL;
+
+       ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
+                             image->arch.ima_buffer_size);
+       if (ret)
+               return -EINVAL;
+
+       pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
+                image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
+
+       return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644 (file)
index 0000000..61946c1
--- /dev/null
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011,  IBM Corporation
+ *             Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+       .text
+
+       /*
+        * Must be relocatable PIC code callable as a C function.
+        */
+       .globl relocate_new_kernel
+relocate_new_kernel:
+       /* r3 = page_list   */
+       /* r4 = reboot_code_buffer */
+       /* r5 = start_address      */
+
+#ifdef CONFIG_FSL_BOOKE
+
+       mr      r29, r3
+       mr      r30, r4
+       mr      r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/fsl_booke_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+       mr      r3, r29
+       mr      r4, r30
+       mr      r5, r31
+
+       li      r0, 0
+#elif defined(CONFIG_44x)
+
+       /* Save our parameters */
+       mr      r29, r3
+       mr      r30, r4
+       mr      r31, r5
+
+#ifdef CONFIG_PPC_47x
+       /* Check for 47x cores */
+       mfspr   r3,SPRN_PVR
+       srwi    r3,r3,16
+       cmplwi  cr0,r3,PVR_476FPE@h
+       beq     setup_map_47x
+       cmplwi  cr0,r3,PVR_476@h
+       beq     setup_map_47x
+       cmplwi  cr0,r3,PVR_476_ISS@h
+       beq     setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ *    jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+       /*
+        * Load the PID with kernel PID (0).
+        * Also load our MSR_IS and TID to MMUCR for TLB search.
+        */
+       li      r3, 0
+       mtspr   SPRN_PID, r3
+       mfmsr   r4
+       andi.   r4,r4,MSR_IS@l
+       beq     wmmucr
+       oris    r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+       mtspr   SPRN_MMUCR,r3
+       sync
+
+       /*
+        * Invalidate all the TLB entries except the current entry
+        * where we are running from
+        */
+       bl      0f                              /* Find our address */
+0:     mflr    r5                              /* Make it accessible */
+       tlbsx   r23,0,r5                        /* Find entry we are in */
+       li      r4,0                            /* Start at TLB entry 0 */
+       li      r3,0                            /* Set PAGEID inval value */
+1:     cmpw    r23,r4                          /* Is this our entry? */
+       beq     skip                            /* If so, skip the inval */
+       tlbwe   r3,r4,PPC44x_TLB_PAGEID         /* If not, inval the entry */
+skip:
+       addi    r4,r4,1                         /* Increment */
+       cmpwi   r4,64                           /* Are we done? */
+       bne     1b                              /* If not, repeat */
+       isync
+
+       /* Create a temp mapping and jump to it */
+       andi.   r6, r23, 1              /* Find the index to use */
+       addi    r24, r6, 1              /* r24 will contain 1 or 2 */
+
+       mfmsr   r9                      /* get the MSR */
+       rlwinm  r5, r9, 27, 31, 31      /* Extract the MSR[IS] */
+       xori    r7, r5, 1               /* Use the other address space */
+
+       /* Read the current mapping entries */
+       tlbre   r3, r23, PPC44x_TLB_PAGEID
+       tlbre   r4, r23, PPC44x_TLB_XLAT
+       tlbre   r5, r23, PPC44x_TLB_ATTRIB
+
+       /* Save our current XLAT entry */
+       mr      r25, r4
+
+       /* Extract the TLB PageSize */
+       li      r10, 1                  /* r10 will hold PageSize */
+       rlwinm  r11, r3, 0, 24, 27      /* bits 24-27 */
+
+       /* XXX: As of now we use 256M, 4K pages */
+       cmpwi   r11, PPC44x_TLB_256M
+       bne     tlb_4k
+       rotlwi  r10, r10, 28            /* r10 = 256M */
+       b       write_out
+tlb_4k:
+       cmpwi   r11, PPC44x_TLB_4K
+       bne     default
+       rotlwi  r10, r10, 12            /* r10 = 4K */
+       b       write_out
+default:
+       rotlwi  r10, r10, 10            /* r10 = 1K */
+
+write_out:
+       /*
+        * Write out the tmp 1:1 mapping for this code in other address space
+        * Fixup  EPN = RPN , TS=other address space
+        */
+       insrwi  r3, r7, 1, 23           /* Bit 23 is TS for PAGEID field */
+
+       /* Write out the tmp mapping entries */
+       tlbwe   r3, r24, PPC44x_TLB_PAGEID
+       tlbwe   r4, r24, PPC44x_TLB_XLAT
+       tlbwe   r5, r24, PPC44x_TLB_ATTRIB
+
+       subi    r11, r10, 1             /* PageOffset Mask = PageSize - 1 */
+       not     r10, r11                /* Mask for PageNum */
+
+       /* Switch to other address space in MSR */
+       insrwi  r9, r7, 1, 26           /* Set MSR[IS] = r7 */
+
+       bl      1f
+1:     mflr    r8
+       addi    r8, r8, (2f-1b)         /* Find the target offset */
+
+       /* Jump to the tmp mapping */
+       mtspr   SPRN_SRR0, r8
+       mtspr   SPRN_SRR1, r9
+       rfi
+
+2:
+       /* Invalidate the entry we were executing from */
+       li      r3, 0
+       tlbwe   r3, r23, PPC44x_TLB_PAGEID
+
+       /* attribute fields. rwx for SUPERVISOR mode */
+       li      r5, 0
+       ori     r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+       /* Create 1:1 mapping in 256M pages */
+       xori    r7, r7, 1                       /* Revert back to Original TS */
+
+       li      r8, 0                           /* PageNumber */
+       li      r6, 3                           /* TLB Index, start at 3  */
+
+next_tlb:
+       rotlwi  r3, r8, 28                      /* Create EPN (bits 0-3) */
+       mr      r4, r3                          /* RPN = EPN  */
+       ori     r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+       insrwi  r3, r7, 1, 23                   /* Set TS from r7 */
+
+       tlbwe   r3, r6, PPC44x_TLB_PAGEID       /* PageID field : EPN, V, SIZE */
+       tlbwe   r4, r6, PPC44x_TLB_XLAT         /* Address translation : RPN   */
+       tlbwe   r5, r6, PPC44x_TLB_ATTRIB       /* Attributes */
+
+       addi    r8, r8, 1                       /* Increment PN */
+       addi    r6, r6, 1                       /* Increment TLB Index */
+       cmpwi   r8, 8                           /* Are we done ? */
+       bne     next_tlb
+       isync
+
+       /* Jump to the new mapping 1:1 */
+       li      r9,0
+       insrwi  r9, r7, 1, 26                   /* Set MSR[IS] = r7 */
+
+       bl      1f
+1:     mflr    r8
+       and     r8, r8, r11                     /* Get our offset within page */
+       addi    r8, r8, (2f-1b)
+
+       and     r5, r25, r10                    /* Get our target PageNum */
+       or      r8, r8, r5                      /* Target jump address */
+
+       mtspr   SPRN_SRR0, r8
+       mtspr   SPRN_SRR1, r9
+       rfi
+2:
+       /* Invalidate the tmp entry we used */
+       li      r3, 0
+       tlbwe   r3, r24, PPC44x_TLB_PAGEID
+       sync
+       b       ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+       /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+       /*
+        * Load the kernel pid (0) to PID and also to MMUCR[TID].
+        * Also set the MSR IS->MMUCR STS
+        */
+       li      r3, 0
+       mtspr   SPRN_PID, r3                    /* Set PID */
+       mfmsr   r4                              /* Get MSR */
+       andi.   r4, r4, MSR_IS@l                /* TS=1? */
+       beq     1f                              /* If not, leave STS=0 */
+       oris    r3, r3, PPC47x_MMUCR_STS@h      /* Set STS=1 */
+1:     mtspr   SPRN_MMUCR, r3                  /* Put MMUCR */
+       sync
+
+       /* Find the entry we are running from */
+       bl      2f
+2:     mflr    r23
+       tlbsx   r23, 0, r23
+       tlbre   r24, r23, 0                     /* TLB Word 0 */
+       tlbre   r25, r23, 1                     /* TLB Word 1 */
+       tlbre   r26, r23, 2                     /* TLB Word 2 */
+
+
+       /*
+        * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+        * of 4k page size in all  4 ways (0-3 in r3).
+        * This would invalidate the entire UTLB including the one we are
+        * running from. However the shadow TLB entries would help us
+        * to continue the execution, until we flush them (rfi/isync).
+        */
+       addis   r3, 0, 0x8000                   /* specify the way */
+       addi    r4, 0, 0                        /* TLB Word0 = (EPN=0, VALID = 0) */
+       addi    r5, 0, 0
+       b       clear_utlb_entry
+
+       /* Align the loop to speed things up. from head_44x.S */
+       .align  6
+
+clear_utlb_entry:
+
+       tlbwe   r4, r3, 0
+       tlbwe   r5, r3, 1
+       tlbwe   r5, r3, 2
+       addis   r3, r3, 0x2000                  /* Increment the way */
+       cmpwi   r3, 0
+       bne     clear_utlb_entry
+       addis   r3, 0, 0x8000
+       addis   r4, r4, 0x100                   /* Increment the EPN */
+       cmpwi   r4, 0
+       bne     clear_utlb_entry
+
+       /* Create the entries in the other address space */
+       mfmsr   r5
+       rlwinm  r7, r5, 27, 31, 31              /* Get the TS (Bit 26) from MSR */
+       xori    r7, r7, 1                       /* r7 = !TS */
+
+       insrwi  r24, r7, 1, 21                  /* Change the TS in the saved TLB word 0 */
+
+       /*
+        * write out the TLB entries for the tmp mapping
+        * Use way '0' so that we could easily invalidate it later.
+        */
+       lis     r3, 0x8000                      /* Way '0' */
+
+       tlbwe   r24, r3, 0
+       tlbwe   r25, r3, 1
+       tlbwe   r26, r3, 2
+
+       /* Update the msr to the new TS */
+       insrwi  r5, r7, 1, 26
+
+       bl      1f
+1:     mflr    r6
+       addi    r6, r6, (2f-1b)
+
+       mtspr   SPRN_SRR0, r6
+       mtspr   SPRN_SRR1, r5
+       rfi
+
+       /*
+        * Now we are in the tmp address space.
+        * Create a 1:1 mapping for 0-2GiB in the original TS.
+        */
+2:
+       li      r3, 0
+       li      r4, 0                           /* TLB Word 0 */
+       li      r5, 0                           /* TLB Word 1 */
+       li      r6, 0
+       ori     r6, r6, PPC47x_TLB2_S_RWX       /* TLB word 2 */
+
+       li      r8, 0                           /* PageIndex */
+
+       xori    r7, r7, 1                       /* revert back to original TS */
+
+write_utlb:
+       rotlwi  r5, r8, 28                      /* RPN = PageIndex * 256M */
+                                               /* ERPN = 0 as we don't use memory above 2G */
+
+       mr      r4, r5                          /* EPN = RPN */
+       ori     r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+       insrwi  r4, r7, 1, 21                   /* Insert the TS to Word 0 */
+
+       tlbwe   r4, r3, 0                       /* Write out the entries */
+       tlbwe   r5, r3, 1
+       tlbwe   r6, r3, 2
+       addi    r8, r8, 1
+       cmpwi   r8, 8                           /* Have we completed ? */
+       bne     write_utlb
+
+       /* make sure we complete the TLB write up */
+       isync
+
+       /*
+        * Prepare to jump to the 1:1 mapping.
+        * 1) Extract page size of the tmp mapping
+        *    DSIZ = TLB_Word0[22:27]
+        * 2) Calculate the physical address of the address
+        *    to jump to.
+        */
+       rlwinm  r10, r24, 0, 22, 27
+
+       cmpwi   r10, PPC47x_TLB0_4K
+       bne     0f
+       li      r10, 0x1000                     /* r10 = 4k */
+       bl      1f
+
+0:
+       /* Defaults to 256M */
+       lis     r10, 0x1000
+
+       bl      1f
+1:     mflr    r4
+       addi    r4, r4, (2f-1b)                 /* virtual address  of 2f */
+
+       subi    r11, r10, 1                     /* offsetmask = Pagesize - 1 */
+       not     r10, r11                        /* Pagemask = ~(offsetmask) */
+
+       and     r5, r25, r10                    /* Physical page */
+       and     r6, r4, r11                     /* offset within the current page */
+
+       or      r5, r5, r6                      /* Physical address for 2f */
+
+       /* Switch the TS in MSR to the original one */
+       mfmsr   r8
+       insrwi  r8, r7, 1, 26
+
+       mtspr   SPRN_SRR1, r8
+       mtspr   SPRN_SRR0, r5
+       rfi
+
+2:
+       /* Invalidate the tmp mapping */
+       lis     r3, 0x8000                      /* Way '0' */
+
+       clrrwi  r24, r24, 12                    /* Clear the valid bit */
+       tlbwe   r24, r3, 0
+       tlbwe   r25, r3, 1
+       tlbwe   r26, r3, 2
+
+       /* Make sure we complete the TLB write and flush the shadow TLB */
+       isync
+
+#endif
+
+ppc44x_map_done:
+
+
+       /* Restore the parameters */
+       mr      r3, r29
+       mr      r4, r30
+       mr      r5, r31
+
+       li      r0, 0
+#else
+       li      r0, 0
+
+       /*
+        * Set Machine Status Register to a known status,
+        * switch the MMU off and jump to 1: in a single step.
+        */
+
+       mr      r8, r0
+       ori     r8, r8, MSR_RI|MSR_ME
+       mtspr   SPRN_SRR1, r8
+       addi    r8, r4, 1f - relocate_new_kernel
+       mtspr   SPRN_SRR0, r8
+       sync
+       rfi
+
+1:
+#endif
+       /* from this point address translation is turned off */
+       /* and interrupts are disabled */
+
+       /* set a new stack at the bottom of our page... */
+       /* (not really needed now) */
+       addi    r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+       stw     r0, 0(r1)
+
+       /* Do the copies */
+       li      r6, 0 /* checksum */
+       mr      r0, r3
+       b       1f
+
+0:     /* top, read another word for the indirection page */
+       lwzu    r0, 4(r3)
+
+1:
+       /* is it a destination page? (r8) */
+       rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+       beq     2f
+
+       rlwinm  r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+       b       0b
+
+2:     /* is it an indirection page? (r3) */
+       rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+       beq     2f
+
+       rlwinm  r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+       subi    r3, r3, 4
+       b       0b
+
+2:     /* are we done? */
+       rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+       beq     2f
+       b       3f
+
+2:     /* is it a source page? (r9) */
+       rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+       beq     0b
+
+       rlwinm  r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+       li      r7, PAGE_SIZE / 4
+       mtctr   r7
+       subi    r9, r9, 4
+       subi    r8, r8, 4
+9:
+       lwzu    r0, 4(r9)  /* do the copy */
+       xor     r6, r6, r0
+       stwu    r0, 4(r8)
+       dcbst   0, r8
+       sync
+       icbi    0, r8
+       bdnz    9b
+
+       addi    r9, r9, 4
+       addi    r8, r8, 4
+       b       0b
+
+3:
+
+       /* To be certain of avoiding problems with self-modifying code
+        * execute a serializing instruction here.
+        */
+       isync
+       sync
+
+       mfspr   r3, SPRN_PIR /* current core we are running on */
+       mr      r4, r5 /* load physical address of chunk called */
+
+       /* jump to the entry point, usually the setup routine */
+       mtlr    r5
+       blrl
+
+1:     b       1b
+
+relocate_new_kernel_end:
+
+       .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+       .long relocate_new_kernel_end - relocate_new_kernel