KVM: arm64: Add kvm_pgtable_stage2_split()
authorRicardo Koller <ricarkol@google.com>
Wed, 26 Apr 2023 17:23:24 +0000 (17:23 +0000)
committerOliver Upton <oliver.upton@linux.dev>
Tue, 16 May 2023 17:39:18 +0000 (17:39 +0000)
Add a new stage2 function, kvm_pgtable_stage2_split(), for splitting a
range of huge pages. This will be used for eager-splitting huge pages
into PAGE_SIZE pages. The goal is to avoid having to split huge pages
on write-protection faults, and instead use this function to do it
ahead of time for large ranges (e.g., all guest memory in 1G chunks at
a time).

Signed-off-by: Ricardo Koller <ricarkol@google.com>
Reviewed-by: Shaoqin Huang <shahuang@redhat.com>
Reviewed-by: Gavin Shan <gshan@redhat.com>
Link: https://lore.kernel.org/r/20230426172330.1439644-7-ricarkol@google.com
Signed-off-by: Oliver Upton <oliver.upton@linux.dev>
arch/arm64/include/asm/kvm_pgtable.h
arch/arm64/kvm/hyp/pgtable.c

index cbc6971e2cb4cff7f686075d7a05a2a513c9f339..850d65f705fa8054ed46fcf3e53985426500b7d9 100644 (file)
@@ -671,6 +671,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
  */
 int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
 
+/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ *                             to PAGE_SIZE guest pages.
+ * @pgt:        Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr:       Intermediate physical address from which to split.
+ * @size:       Size of the range.
+ * @mc:                 Cache of pre-allocated and zeroed memory from which to allocate
+ *              page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                            struct kvm_mmu_memory_cache *mc);
+
 /**
  * kvm_pgtable_walk() - Walk a page-table.
  * @pgt:       Page-table structure initialised by kvm_pgtable_*_init().
index 56edffc02bc64f31888d8909d31095e044abcc7f..8b03cd6fed12fd490538ecaff178b9d43d1451a7 100644 (file)
@@ -1276,6 +1276,109 @@ kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
        return pgtable;
 }
 
+/*
+ * Get the number of page-tables needed to replace a block with a
+ * fully populated tree up to the PTE entries. Note that @level is
+ * interpreted as in "level @level entry".
+ */
+static int stage2_block_get_nr_page_tables(u32 level)
+{
+       switch (level) {
+       case 1:
+               return PTRS_PER_PTE + 1;
+       case 2:
+               return 1;
+       case 3:
+               return 0;
+       default:
+               WARN_ON_ONCE(level < KVM_PGTABLE_MIN_BLOCK_LEVEL ||
+                            level >= KVM_PGTABLE_MAX_LEVELS);
+               return -EINVAL;
+       };
+}
+
+static int stage2_split_walker(const struct kvm_pgtable_visit_ctx *ctx,
+                              enum kvm_pgtable_walk_flags visit)
+{
+       struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+       struct kvm_mmu_memory_cache *mc = ctx->arg;
+       struct kvm_s2_mmu *mmu;
+       kvm_pte_t pte = ctx->old, new, *childp;
+       enum kvm_pgtable_prot prot;
+       u32 level = ctx->level;
+       bool force_pte;
+       int nr_pages;
+       u64 phys;
+
+       /* No huge-pages exist at the last level */
+       if (level == KVM_PGTABLE_MAX_LEVELS - 1)
+               return 0;
+
+       /* We only split valid block mappings */
+       if (!kvm_pte_valid(pte))
+               return 0;
+
+       nr_pages = stage2_block_get_nr_page_tables(level);
+       if (nr_pages < 0)
+               return nr_pages;
+
+       if (mc->nobjs >= nr_pages) {
+               /* Build a tree mapped down to the PTE granularity. */
+               force_pte = true;
+       } else {
+               /*
+                * Don't force PTEs, so create_unlinked() below does
+                * not populate the tree up to the PTE level. The
+                * consequence is that the call will require a single
+                * page of level 2 entries at level 1, or a single
+                * page of PTEs at level 2. If we are at level 1, the
+                * PTEs will be created recursively.
+                */
+               force_pte = false;
+               nr_pages = 1;
+       }
+
+       if (mc->nobjs < nr_pages)
+               return -ENOMEM;
+
+       mmu = container_of(mc, struct kvm_s2_mmu, split_page_cache);
+       phys = kvm_pte_to_phys(pte);
+       prot = kvm_pgtable_stage2_pte_prot(pte);
+
+       childp = kvm_pgtable_stage2_create_unlinked(mmu->pgt, phys,
+                                                   level, prot, mc, force_pte);
+       if (IS_ERR(childp))
+               return PTR_ERR(childp);
+
+       if (!stage2_try_break_pte(ctx, mmu)) {
+               kvm_pgtable_stage2_free_unlinked(mm_ops, childp, level);
+               mm_ops->put_page(childp);
+               return -EAGAIN;
+       }
+
+       /*
+        * Note, the contents of the page table are guaranteed to be made
+        * visible before the new PTE is assigned because stage2_make_pte()
+        * writes the PTE using smp_store_release().
+        */
+       new = kvm_init_table_pte(childp, mm_ops);
+       stage2_make_pte(ctx, new);
+       dsb(ishst);
+       return 0;
+}
+
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+                            struct kvm_mmu_memory_cache *mc)
+{
+       struct kvm_pgtable_walker walker = {
+               .cb     = stage2_split_walker,
+               .flags  = KVM_PGTABLE_WALK_LEAF,
+               .arg    = mc,
+       };
+
+       return kvm_pgtable_walk(pgt, addr, size, &walker);
+}
+
 int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
                              struct kvm_pgtable_mm_ops *mm_ops,
                              enum kvm_pgtable_stage2_flags flags,