In some functions the TDP iter risks not making forward progress if two
threads livelock yielding to one another. This is possible if two threads
are trying to execute wrprot_gfn_range. Each could write protect an entry
and then yield. This would reset the tdp_iter's walk over the paging
structure and the loop would end up repeating the same entry over and
over, preventing either thread from making forward progress.
Fix this issue by only yielding if the loop has made forward progress
since the last yield.
Fixes: a6a0b05da9f3 ("kvm: x86/mmu: Support dirty logging for the TDP MMU")
Reviewed-by: Peter Feiner <pfeiner@google.com>
Signed-off-by: Ben Gardon <bgardon@google.com>
Message-Id: <
20210202185734.
1680553-14-bgardon@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
        WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
 
        iter->next_last_level_gfn = next_last_level_gfn;
+       iter->yielded_gfn = iter->next_last_level_gfn;
        iter->root_level = root_level;
        iter->min_level = min_level;
        iter->level = root_level;
        iter->valid = false;
 }
 
-/*
- * Restart the walk over the paging structure from the root, starting from the
- * highest gfn the iterator had previously reached. Assumes that the entire
- * paging structure, except the root page, may have been completely torn down
- * and rebuilt.
- */
-void tdp_iter_refresh_walk(struct tdp_iter *iter)
-{
-       gfn_t next_last_level_gfn = iter->next_last_level_gfn;
-
-       if (iter->gfn > next_last_level_gfn)
-               next_last_level_gfn = iter->gfn;
-
-       tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
-                      iter->root_level, iter->min_level, next_last_level_gfn);
-}
-
 u64 *tdp_iter_root_pt(struct tdp_iter *iter)
 {
        return iter->pt_path[iter->root_level - 1];
 
         * for this GFN.
         */
        gfn_t next_last_level_gfn;
+       /*
+        * The next_last_level_gfn at the time when the thread last
+        * yielded. Only yielding when the next_last_level_gfn !=
+        * yielded_gfn helps ensure forward progress.
+        */
+       gfn_t yielded_gfn;
        /* Pointers to the page tables traversed to reach the current SPTE */
        u64 *pt_path[PT64_ROOT_MAX_LEVEL];
        /* A pointer to the current SPTE */
 void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
                    int min_level, gfn_t next_last_level_gfn);
 void tdp_iter_next(struct tdp_iter *iter);
-void tdp_iter_refresh_walk(struct tdp_iter *iter);
 u64 *tdp_iter_root_pt(struct tdp_iter *iter);
 
 #endif /* __KVM_X86_MMU_TDP_ITER_H */
 
  * TLB flush before yielding.
  *
  * If this function yields, it will also reset the tdp_iter's walk over the
- * paging structure and the calling function should allow the iterator to
- * continue its traversal from the paging structure root.
+ * paging structure and the calling function should skip to the next
+ * iteration to allow the iterator to continue its traversal from the
+ * paging structure root.
  *
  * Return true if this function yielded and the iterator's traversal was reset.
  * Return false if a yield was not needed.
 static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
                                             struct tdp_iter *iter, bool flush)
 {
+       /* Ensure forward progress has been made before yielding. */
+       if (iter->next_last_level_gfn == iter->yielded_gfn)
+               return false;
+
        if (need_resched() || spin_needbreak(&kvm->mmu_lock)) {
                if (flush)
                        kvm_flush_remote_tlbs(kvm);
 
                cond_resched_lock(&kvm->mmu_lock);
-               tdp_iter_refresh_walk(iter);
+
+               WARN_ON(iter->gfn > iter->next_last_level_gfn);
+
+               tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
+                              iter->root_level, iter->min_level,
+                              iter->next_last_level_gfn);
+
                return true;
        }
 
 
                tdp_mmu_set_spte(kvm, &iter, 0);
 
-               flush_needed = !can_yield ||
-                              !tdp_mmu_iter_cond_resched(kvm, &iter, true);
+               flush_needed = !(can_yield &&
+                                tdp_mmu_iter_cond_resched(kvm, &iter, true));
        }
        return flush_needed;
 }