/* Base PID to allocate from */
 extern unsigned int mmu_base_pid;
 
-/*
- * memory block size used with radix translation.
- */
-extern unsigned long __ro_after_init radix_mem_block_size;
+extern unsigned long __ro_after_init memory_block_size;
 
 #define PRTB_SIZE_SHIFT        (mmu_pid_bits + 4)
 #define PRTB_ENTRIES   (1ul << mmu_pid_bits)
 
 #include <mm/mmu_decl.h>
 
 unsigned int mmu_base_pid;
-unsigned long radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
                        unsigned long region_start, unsigned long region_end)
        bool prev_exec, exec = false;
        pgprot_t prot;
        int psize;
-       unsigned long max_mapping_size = radix_mem_block_size;
+       unsigned long max_mapping_size = memory_block_size;
 
        if (debug_pagealloc_enabled_or_kfence())
                max_mapping_size = PAGE_SIZE;
        return 1;
 }
 
-#ifdef CONFIG_MEMORY_HOTPLUG
-static int __init probe_memory_block_size(unsigned long node, const char *uname, int
-                                         depth, void *data)
-{
-       unsigned long *mem_block_size = (unsigned long *)data;
-       const __be32 *prop;
-       int len;
-
-       if (depth != 1)
-               return 0;
-
-       if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
-               return 0;
-
-       prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
-
-       if (!prop || len < dt_root_size_cells * sizeof(__be32))
-               /*
-                * Nothing in the device tree
-                */
-               *mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-       else
-               *mem_block_size = of_read_number(prop, dt_root_size_cells);
-       return 1;
-}
-
-static unsigned long __init radix_memory_block_size(void)
-{
-       unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
-
-       /*
-        * OPAL firmware feature is set by now. Hence we are ok
-        * to test OPAL feature.
-        */
-       if (firmware_has_feature(FW_FEATURE_OPAL))
-               mem_block_size = 1UL * 1024 * 1024 * 1024;
-       else
-               of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
-
-       return mem_block_size;
-}
-
-#else   /* CONFIG_MEMORY_HOTPLUG */
-
-static unsigned long __init radix_memory_block_size(void)
-{
-       return 1UL * 1024 * 1024 * 1024;
-}
-
-#endif /* CONFIG_MEMORY_HOTPLUG */
-
-
 void __init radix__early_init_devtree(void)
 {
        int rc;
                mmu_psize_defs[MMU_PAGE_64K].h_rpt_pgsize =
                        psize_to_rpti_pgsize(MMU_PAGE_64K);
        }
-
-       /*
-        * Max mapping size used when mapping pages. We don't use
-        * ppc_md.memory_block_size() here because this get called
-        * early and we don't have machine probe called yet. Also
-        * the pseries implementation only check for ibm,lmb-size.
-        * All hypervisor supporting radix do expose that device
-        * tree node.
-        */
-       radix_mem_block_size = radix_memory_block_size();
        return;
 }
 
 
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
 #include <linux/memremap.h>
+#include <linux/memory.h>
 
 #include <asm/pgalloc.h>
 #include <asm/page.h>
        return 1;
 }
 
+static void update_memory_block_size(unsigned long *block_size, unsigned long mem_size)
+{
+       unsigned long section_size = 1UL << SECTION_SIZE_BITS;
+
+       for (; *block_size > section_size; *block_size >>= 2) {
+
+               if ((mem_size & *block_size) == 0)
+                       break;
+       }
+}
+
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+                                         depth, void *data)
+{
+       const char *type;
+       unsigned long *block_size = (unsigned long *)data;
+       const __be32 *reg, *endp;
+       int l;
+
+       if (depth != 1)
+               return 0;
+       /*
+        * If we have dynamic-reconfiguration-memory node, use the
+        * lmb value.
+        */
+       if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
+
+               const __be32 *prop;
+
+               prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &l);
+
+               if (!prop || l < dt_root_size_cells * sizeof(__be32))
+                       /*
+                        * Nothing in the device tree
+                        */
+                       *block_size = MIN_MEMORY_BLOCK_SIZE;
+               else
+                       *block_size = of_read_number(prop, dt_root_size_cells);
+               /*
+                * We have found the final value. Don't probe further.
+                */
+               return 1;
+       }
+       /*
+        * Find all the device tree nodes of memory type and make sure
+        * the area can be mapped using the memory block size value
+        * we end up using. We start with 1G value and keep reducing
+        * it such that we can map the entire area using memory_block_size.
+        * This will be used on powernv and older pseries that don't
+        * have ibm,lmb-size node.
+        * For ex: with P5 we can end up with
+        * memory@0 -> 128MB
+        * memory@128M -> 64M
+        * This will end up using 64MB  memory block size value.
+        */
+       type = of_get_flat_dt_prop(node, "device_type", NULL);
+       if (type == NULL || strcmp(type, "memory") != 0)
+               return 0;
+
+       reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l);
+       if (!reg)
+               reg = of_get_flat_dt_prop(node, "reg", &l);
+       if (!reg)
+               return 0;
+
+       endp = reg + (l / sizeof(__be32));
+       while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) {
+               const char *compatible;
+               u64 size;
+
+               dt_mem_next_cell(dt_root_addr_cells, ®);
+               size = dt_mem_next_cell(dt_root_size_cells, ®);
+
+               if (size) {
+                       update_memory_block_size(block_size, size);
+                       continue;
+               }
+               /*
+                * ibm,coherent-device-memory with linux,usable-memory = 0
+                * Force 256MiB block size. Work around for GPUs on P9 PowerNV
+                * linux,usable-memory == 0 implies driver managed memory and
+                * we can't use large memory block size due to hotplug/unplug
+                * limitations.
+                */
+               compatible = of_get_flat_dt_prop(node, "compatible", NULL);
+               if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) {
+                       *block_size = SZ_256M;
+                       return 1;
+               }
+       }
+       /* continue looking for other memory device types */
+       return 0;
+}
+
+/*
+ * start with 1G memory block size. Early init will
+ * fix this with correct value.
+ */
+unsigned long memory_block_size __ro_after_init = 1UL << 30;
+static void __init early_init_memory_block_size(void)
+{
+       /*
+        * We need to do memory_block_size probe early so that
+        * radix__early_init_mmu() can use this as limit for
+        * mapping page size.
+        */
+       of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
+}
+
 void __init mmu_early_init_devtree(void)
 {
        bool hvmode = !!(mfmsr() & MSR_HV);
        if (!hvmode)
                early_check_vec5();
 
+       early_init_memory_block_size();
+
        if (early_radix_enabled()) {
                radix__early_init_devtree();
 
 
 #ifdef CONFIG_MEMORY_HOTPLUG
 static unsigned long pnv_memory_block_size(void)
 {
-       /*
-        * We map the kernel linear region with 1GB large pages on radix. For
-        * memory hot unplug to work our memory block size must be at least
-        * this size.
-        */
-       if (radix_enabled())
-               return radix_mem_block_size;
-       else
-               return 256UL * 1024 * 1024;
+       return memory_block_size;
 }
 #endif
 
 
 #include <asm/drmem.h>
 #include "pseries.h"
 
-unsigned long pseries_memory_block_size(void)
-{
-       struct device_node *np;
-       u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
-       struct resource r;
-
-       np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
-       if (np) {
-               int len;
-               int size_cells;
-               const __be32 *prop;
-
-               size_cells = of_n_size_cells(np);
-
-               prop = of_get_property(np, "ibm,lmb-size", &len);
-               if (prop && len >= size_cells * sizeof(__be32))
-                       memblock_size = of_read_number(prop, size_cells);
-               of_node_put(np);
-
-       } else  if (machine_is(pseries)) {
-               /* This fallback really only applies to pseries */
-               unsigned int memzero_size = 0;
-
-               np = of_find_node_by_path("/memory@0");
-               if (np) {
-                       if (!of_address_to_resource(np, 0, &r))
-                               memzero_size = resource_size(&r);
-                       of_node_put(np);
-               }
-
-               if (memzero_size) {
-                       /* We now know the size of memory@0, use this to find
-                        * the first memoryblock and get its size.
-                        */
-                       char buf[64];
-
-                       sprintf(buf, "/memory@%x", memzero_size);
-                       np = of_find_node_by_path(buf);
-                       if (np) {
-                               if (!of_address_to_resource(np, 0, &r))
-                                       memblock_size = resource_size(&r);
-                               of_node_put(np);
-                       }
-               }
-       }
-       return memblock_size;
-}
-
 static void dlpar_free_property(struct property *prop)
 {
        kfree(prop->name);
 
 static int pseries_remove_memblock(unsigned long base, unsigned long memblock_size)
 {
-       unsigned long block_sz, start_pfn;
+       unsigned long start_pfn;
        int sections_per_block;
        int i;
 
        if (!pfn_valid(start_pfn))
                goto out;
 
-       block_sz = pseries_memory_block_size();
-       sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
+       sections_per_block = memory_block_size / MIN_MEMORY_BLOCK_SIZE;
 
        for (i = 0; i < sections_per_block; i++) {
                __remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
 static int dlpar_remove_lmb(struct drmem_lmb *lmb)
 {
        struct memory_block *mem_block;
-       unsigned long block_sz;
        int rc;
 
        if (!lmb_is_removable(lmb))
                return rc;
        }
 
-       block_sz = pseries_memory_block_size();
-
-       __remove_memory(lmb->base_addr, block_sz);
+       __remove_memory(lmb->base_addr, memory_block_size);
        put_device(&mem_block->dev);
 
        /* Update memory regions for memory remove */
-       memblock_remove(lmb->base_addr, block_sz);
+       memblock_remove(lmb->base_addr, memory_block_size);
 
        invalidate_lmb_associativity_index(lmb);
        lmb->flags &= ~DRCONF_MEM_ASSIGNED;
 
 int pseries_msi_allocate_domains(struct pci_controller *phb);
 void pseries_msi_free_domains(struct pci_controller *phb);
 
-unsigned long pseries_memory_block_size(void);
-
 extern int CMO_PrPSP;
 extern int CMO_SecPSP;
 extern unsigned long CMO_PageSize;
 
        return PCI_PROBE_NORMAL;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static unsigned long pseries_memory_block_size(void)
+{
+       return memory_block_size;
+}
+#endif
+
 struct pci_controller_ops pseries_pci_controller_ops = {
        .probe_mode             = pSeries_pci_probe_mode,
 #ifdef CONFIG_SPAPR_TCE_IOMMU