smp_store_release(ctx->ptep, new);
 }
 
-static void stage2_put_pte(const struct kvm_pgtable_visit_ctx *ctx, struct kvm_s2_mmu *mmu,
-                          struct kvm_pgtable_mm_ops *mm_ops)
+static bool stage2_unmap_defer_tlb_flush(struct kvm_pgtable *pgt)
 {
        /*
-        * Clear the existing PTE, and perform break-before-make with
-        * TLB maintenance if it was valid.
+        * If FEAT_TLBIRANGE is implemented, defer the individual
+        * TLB invalidations until the entire walk is finished, and
+        * then use the range-based TLBI instructions to do the
+        * invalidations. Condition deferred TLB invalidation on the
+        * system supporting FWB as the optimization is entirely
+        * pointless when the unmap walker needs to perform CMOs.
+        */
+       return system_supports_tlb_range() && stage2_has_fwb(pgt);
+}
+
+static void stage2_unmap_put_pte(const struct kvm_pgtable_visit_ctx *ctx,
+                               struct kvm_s2_mmu *mmu,
+                               struct kvm_pgtable_mm_ops *mm_ops)
+{
+       struct kvm_pgtable *pgt = ctx->arg;
+
+       /*
+        * Clear the existing PTE, and perform break-before-make if it was
+        * valid. Depending on the system support, defer the TLB maintenance
+        * for the same until the entire unmap walk is completed.
         */
        if (kvm_pte_valid(ctx->old)) {
                kvm_clear_pte(ctx->ptep);
-               kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu, ctx->addr, ctx->level);
+
+               if (!stage2_unmap_defer_tlb_flush(pgt))
+                       kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, mmu,
+                                       ctx->addr, ctx->level);
        }
 
        mm_ops->put_page(ctx->ptep);
         * block entry and rely on the remaining portions being faulted
         * back lazily.
         */
-       stage2_put_pte(ctx, mmu, mm_ops);
+       stage2_unmap_put_pte(ctx, mmu, mm_ops);
 
        if (need_flush && mm_ops->dcache_clean_inval_poc)
                mm_ops->dcache_clean_inval_poc(kvm_pte_follow(ctx->old, mm_ops),
 
 int kvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
 {
+       int ret;
        struct kvm_pgtable_walker walker = {
                .cb     = stage2_unmap_walker,
                .arg    = pgt,
                .flags  = KVM_PGTABLE_WALK_LEAF | KVM_PGTABLE_WALK_TABLE_POST,
        };
 
-       return kvm_pgtable_walk(pgt, addr, size, &walker);
+       ret = kvm_pgtable_walk(pgt, addr, size, &walker);
+       if (stage2_unmap_defer_tlb_flush(pgt))
+               /* Perform the deferred TLB invalidations */
+               kvm_tlb_flush_vmid_range(pgt->mmu, addr, size);
+
+       return ret;
 }
 
 struct stage2_attr_data {