#include "opal-fadump.h"
 
 static const struct opal_fadump_mem_struct *opal_fdm_active;
+static const struct opal_mpipl_fadump *opal_cpu_metadata;
 static struct opal_fadump_mem_struct *opal_fdm;
 
 static int opal_fadump_unregister(struct fw_dump *fadump_conf);
                pr_warn("Could not reset (%llu) kernel metadata tag!\n", ret);
 }
 
+static inline void opal_fadump_set_regval_regnum(struct pt_regs *regs,
+                                                u32 reg_type, u32 reg_num,
+                                                u64 reg_val)
+{
+       if (reg_type == HDAT_FADUMP_REG_TYPE_GPR) {
+               if (reg_num < 32)
+                       regs->gpr[reg_num] = reg_val;
+               return;
+       }
+
+       switch (reg_num) {
+       case SPRN_CTR:
+               regs->ctr = reg_val;
+               break;
+       case SPRN_LR:
+               regs->link = reg_val;
+               break;
+       case SPRN_XER:
+               regs->xer = reg_val;
+               break;
+       case SPRN_DAR:
+               regs->dar = reg_val;
+               break;
+       case SPRN_DSISR:
+               regs->dsisr = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_NIP:
+               regs->nip = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_MSR:
+               regs->msr = reg_val;
+               break;
+       case HDAT_FADUMP_REG_ID_CCR:
+               regs->ccr = reg_val;
+               break;
+       }
+}
+
+static inline void opal_fadump_read_regs(char *bufp, unsigned int regs_cnt,
+                                        unsigned int reg_entry_size,
+                                        struct pt_regs *regs)
+{
+       struct hdat_fadump_reg_entry *reg_entry;
+       int i;
+
+       memset(regs, 0, sizeof(struct pt_regs));
+
+       for (i = 0; i < regs_cnt; i++, bufp += reg_entry_size) {
+               reg_entry = (struct hdat_fadump_reg_entry *)bufp;
+               opal_fadump_set_regval_regnum(regs,
+                                             be32_to_cpu(reg_entry->reg_type),
+                                             be32_to_cpu(reg_entry->reg_num),
+                                             be64_to_cpu(reg_entry->reg_val));
+       }
+}
+
+/*
+ * Verify if CPU state data is available. If available, do a bit of sanity
+ * checking before processing this data.
+ */
+static bool __init is_opal_fadump_cpu_data_valid(struct fw_dump *fadump_conf)
+{
+       if (!opal_cpu_metadata)
+               return false;
+
+       fadump_conf->cpu_state_data_version =
+               be32_to_cpu(opal_cpu_metadata->cpu_data_version);
+       fadump_conf->cpu_state_entry_size =
+               be32_to_cpu(opal_cpu_metadata->cpu_data_size);
+       fadump_conf->cpu_state_dest_vaddr =
+               (u64)__va(be64_to_cpu(opal_cpu_metadata->region[0].dest));
+       fadump_conf->cpu_state_data_size =
+               be64_to_cpu(opal_cpu_metadata->region[0].size);
+
+       if (fadump_conf->cpu_state_data_version != HDAT_FADUMP_CPU_DATA_VER) {
+               pr_warn("Supported CPU state data version: %u, found: %d!\n",
+                       HDAT_FADUMP_CPU_DATA_VER,
+                       fadump_conf->cpu_state_data_version);
+               pr_warn("WARNING: F/W using newer CPU state data format!!\n");
+       }
+
+       if ((fadump_conf->cpu_state_dest_vaddr == 0) ||
+           (fadump_conf->cpu_state_entry_size == 0) ||
+           (fadump_conf->cpu_state_entry_size >
+            fadump_conf->cpu_state_data_size)) {
+               pr_err("CPU state data is invalid. Ignoring!\n");
+               return false;
+       }
+
+       return true;
+}
+
 /*
  * Convert CPU state data saved at the time of crash into ELF notes.
  *
- * Append crashing CPU's register data saved by the kernel in the PT_NOTE.
+ * While the crashing CPU's register data is saved by the kernel, CPU state
+ * data for all CPUs is saved by f/w. In CPU state data provided by f/w,
+ * each register entry is of 16 bytes, a numerical identifier along with
+ * a GPR/SPR flag in the first 8 bytes and the register value in the next
+ * 8 bytes. For more details refer to F/W documentation. If this data is
+ * missing or in unsupported format, append crashing CPU's register data
+ * saved by the kernel in the PT_NOTE, to have something to work with in
+ * the vmcore file.
  */
 static int __init
 opal_fadump_build_cpu_notes(struct fw_dump *fadump_conf,
                            struct fadump_crash_info_header *fdh)
 {
+       u32 thread_pir, size_per_thread, regs_offset, regs_cnt, reg_esize;
+       struct hdat_fadump_thread_hdr *thdr;
+       bool is_cpu_data_valid = false;
        u32 num_cpus = 1, *note_buf;
-       int rc;
-
-       if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN)
-               return -ENODEV;
+       struct pt_regs regs;
+       char *bufp;
+       int rc, i;
+
+       if (is_opal_fadump_cpu_data_valid(fadump_conf)) {
+               size_per_thread = fadump_conf->cpu_state_entry_size;
+               num_cpus = (fadump_conf->cpu_state_data_size / size_per_thread);
+               bufp = __va(fadump_conf->cpu_state_dest_vaddr);
+               is_cpu_data_valid = true;
+       }
 
-       /* Allocate CPU notes buffer to hold crashing cpu notes. */
        rc = fadump_setup_cpu_notes_buf(num_cpus);
        if (rc != 0)
                return rc;
 
        note_buf = (u32 *)fadump_conf->cpu_notes_buf_vaddr;
-       note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+       if (!is_cpu_data_valid)
+               goto out;
+
+       /*
+        * Offset for register entries, entry size and registers count is
+        * duplicated in every thread header in keeping with HDAT format.
+        * Use these values from the first thread header.
+        */
+       thdr = (struct hdat_fadump_thread_hdr *)bufp;
+       regs_offset = (offsetof(struct hdat_fadump_thread_hdr, offset) +
+                      be32_to_cpu(thdr->offset));
+       reg_esize = be32_to_cpu(thdr->esize);
+       regs_cnt  = be32_to_cpu(thdr->ecnt);
+
+       pr_debug("--------CPU State Data------------\n");
+       pr_debug("NumCpus     : %u\n", num_cpus);
+       pr_debug("\tOffset: %u, Entry size: %u, Cnt: %u\n",
+                regs_offset, reg_esize, regs_cnt);
+
+       for (i = 0; i < num_cpus; i++, bufp += size_per_thread) {
+               thdr = (struct hdat_fadump_thread_hdr *)bufp;
+
+               thread_pir = be32_to_cpu(thdr->pir);
+               pr_debug("[%04d] PIR: 0x%x, core state: 0x%02x\n",
+                        i, thread_pir, thdr->core_state);
+
+               /*
+                * If this is kernel initiated crash, crashing_cpu would be set
+                * appropriately and register data of the crashing CPU saved by
+                * crashing kernel. Add this saved register data of crashing CPU
+                * to elf notes and populate the pt_regs for the remaining CPUs
+                * from register state data provided by firmware.
+                */
+               if (fdh->crashing_cpu == thread_pir) {
+                       note_buf = fadump_regs_to_elf_notes(note_buf,
+                                                           &fdh->regs);
+                       pr_debug("Crashing CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+                                fdh->crashing_cpu, fdh->regs.gpr[1],
+                                fdh->regs.nip);
+                       continue;
+               }
+
+               /*
+                * Register state data of MAX cores is provided by firmware,
+                * but some of this cores may not be active. So, while
+                * processing register state data, check core state and
+                * skip threads that belong to inactive cores.
+                */
+               if (thdr->core_state == HDAT_FADUMP_CORE_INACTIVE)
+                       continue;
+
+               opal_fadump_read_regs((bufp + regs_offset), regs_cnt,
+                                     reg_esize, ®s);
+               note_buf = fadump_regs_to_elf_notes(note_buf, ®s);
+               pr_debug("CPU PIR: 0x%x - R1 : 0x%lx, NIP : 0x%lx\n",
+                        thread_pir, regs.gpr[1], regs.nip);
+       }
+
+out:
+       /*
+        * CPU state data is invalid/unsupported. Try appending crashing CPU's
+        * register data, if it is saved by the kernel.
+        */
+       if (fadump_conf->cpu_notes_buf_vaddr == (u64)note_buf) {
+               if (fdh->crashing_cpu == FADUMP_CPU_UNKNOWN) {
+                       fadump_free_cpu_notes_buf();
+                       return -ENODEV;
+               }
+
+               pr_warn("WARNING: appending only crashing CPU's register data\n");
+               note_buf = fadump_regs_to_elf_notes(note_buf, &(fdh->regs));
+       }
+
        final_note(note_buf);
 
        pr_debug("Updating elfcore header (%llx) with cpu notes\n",
 {
        int rc;
 
+       /*
+        * Unlike on pSeries platform, logical CPU number is not provided
+        * with architected register state data. So, store the crashing
+        * CPU's PIR instead to plug the appropriate register data for
+        * crashing CPU in the vmcore file.
+        */
+       fdh->crashing_cpu = (u32)mfspr(SPRN_PIR);
+
        rc = opal_cec_reboot2(OPAL_REBOOT_MPIPL, msg);
        if (rc == OPAL_UNSUPPORTED) {
                pr_emerg("Reboot type %d not supported.\n",
                return;
        }
 
+       ret = opal_mpipl_query_tag(OPAL_MPIPL_TAG_CPU, &addr);
+       if (addr) {
+               addr = be64_to_cpu(addr);
+               pr_debug("CPU metadata addr: %llx\n", addr);
+               opal_cpu_metadata = __va(addr);
+       }
+
        pr_info("Firmware-assisted dump is active.\n");
        fadump_conf->dump_active = 1;
        opal_fadump_get_config(fadump_conf, opal_fdm_active);