From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Jul 2022 13:46:01 +0000 (-0400)
Subject: Merge remote-tracking branch 'kvm/next' into kvm-next-5.20
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=63f4b210414b65aa3103c54369cacbd0b1bdf02f;p=linux.git

Merge remote-tracking branch 'kvm/next' into kvm-next-5.20

KVM/s390, KVM/x86 and common infrastructure changes for 5.20

x86:

* Permit guests to ignore single-bit ECC errors

* Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache

* Intel IPI virtualization

* Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS

* PEBS virtualization

* Simplify PMU emulation by just using PERF_TYPE_RAW events

* More accurate event reinjection on SVM (avoid retrying instructions)

* Allow getting/setting the state of the speaker port data bit

* Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent

* "Notify" VM exit (detect microarchitectural hangs) for Intel

* Cleanups for MCE MSR emulation

s390:

* add an interface to provide a hypervisor dump for secure guests

* improve selftests to use TAP interface

* enable interpretive execution of zPCI instructions (for PCI passthrough)

* First part of deferred teardown

* CPU Topology

* PV attestation

* Minor fixes

Generic:

* new selftests API using struct kvm_vcpu instead of a (vm, id) tuple

x86:

* Use try_cmpxchg64 instead of cmpxchg64

* Bugfixes

* Ignore benign host accesses to PMU MSRs when PMU is disabled

* Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior

* x86/MMU: Allow NX huge pages to be disabled on a per-vm basis

* Port eager page splitting to shadow MMU as well

* Enable CMCI capability by default and handle injected UCNA errors

* Expose pid of vcpu threads in debugfs

* x2AVIC support for AMD

* cleanup PIO emulation

* Fixes for LLDT/LTR emulation

* Don't require refcounted "struct page" to create huge SPTEs

x86 cleanups:

* Use separate namespaces for guest PTEs and shadow PTEs bitmasks

* PIO emulation

* Reorganize rmap API, mostly around rmap destruction

* Do not workaround very old KVM bugs for L0 that runs with nesting enabled

* new selftests API for CPUID
---

63f4b210414b65aa3103c54369cacbd0b1bdf02f
diff --cc arch/riscv/kvm/mmu.c
index bc545aef60344,081f8d2b9cf3d..3a35b2d95697c
--- a/arch/riscv/kvm/mmu.c
+++ b/arch/riscv/kvm/mmu.c
@@@ -351,11 -350,7 +351,10 @@@ int kvm_riscv_gstage_ioremap(struct kv
  	int ret = 0;
  	unsigned long pfn;
  	phys_addr_t addr, end;
- 	struct kvm_mmu_memory_cache pcache;
- 
- 	memset(&pcache, 0, sizeof(pcache));
- 	pcache.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0;
- 	pcache.gfp_zero = __GFP_ZERO;
 -	struct kvm_mmu_memory_cache pcache = { .gfp_zero = __GFP_ZERO };
++	struct kvm_mmu_memory_cache pcache = {
++		.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0,
++		.gfp_zero = __GFP_ZERO,
++	};
  
  	end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK;
  	pfn = __phys_to_pfn(hpa);
diff --cc arch/x86/kvm/vmx/capabilities.h
index c0e24826a86f7,069d8d298e1de..c5e5dfef69c7f
--- a/arch/x86/kvm/vmx/capabilities.h
+++ b/arch/x86/kvm/vmx/capabilities.h
@@@ -4,8 -4,10 +4,10 @@@
  
  #include <asm/vmx.h>
  
 -#include "lapic.h"
 -#include "x86.h"
 -#include "pmu.h"
 -#include "cpuid.h"
 +#include "../lapic.h"
 +#include "../x86.h"
++#include "../pmu.h"
++#include "../cpuid.h"
  
  extern bool __read_mostly enable_vpid;
  extern bool __read_mostly flexpriority_enabled;
diff --cc arch/x86/kvm/x86.c
index e5fa335a4ea79,5366f884e9a70..33560bfa0cac6
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -298,7 -286,8 +286,8 @@@ const struct _kvm_stats_desc kvm_vcpu_s
  	STATS_DESC_COUNTER(VCPU, directed_yield_successful),
  	STATS_DESC_COUNTER(VCPU, preemption_reported),
  	STATS_DESC_COUNTER(VCPU, preemption_other),
- 	STATS_DESC_IBOOLEAN(VCPU, guest_mode)
 -	STATS_DESC_ICOUNTER(VCPU, guest_mode),
++	STATS_DESC_IBOOLEAN(VCPU, guest_mode),
+ 	STATS_DESC_COUNTER(VCPU, notify_window_exits),
  };
  
  const struct kvm_stats_header kvm_vcpu_stats_header = {
@@@ -6028,12 -6172,11 +6175,16 @@@ split_irqchip_unlock
  		kvm->arch.exception_payload_enabled = cap->args[0];
  		r = 0;
  		break;
+ 	case KVM_CAP_X86_TRIPLE_FAULT_EVENT:
+ 		kvm->arch.triple_fault_event = cap->args[0];
+ 		r = 0;
+ 		break;
  	case KVM_CAP_X86_USER_SPACE_MSR:
 +		r = -EINVAL;
 +		if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
 +				     KVM_MSR_EXIT_REASON_UNKNOWN |
 +				     KVM_MSR_EXIT_REASON_FILTER))
 +			break;
  		kvm->arch.user_space_msr_mask = cap->args[0];
  		r = 0;
  		break;
diff --cc include/linux/kvm_types.h
index 1dcfba68076a7,4d933518060fa..3ca3db020e0e3
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@@ -87,9 -92,9 +92,10 @@@ struct gfn_to_pfn_cache 
  struct kvm_mmu_memory_cache {
  	int nobjs;
  	gfp_t gfp_zero;
 +	gfp_t gfp_custom;
  	struct kmem_cache *kmem_cache;
- 	void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE];
+ 	int capacity;
+ 	void **objects;
  };
  #endif
  
diff --cc include/uapi/linux/kvm.h
index 0c1f42a40fd35,7e06194129e3f..c823a136f9236
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@@ -270,7 -270,7 +270,8 @@@ struct kvm_xen_exit 
  #define KVM_EXIT_X86_BUS_LOCK     33
  #define KVM_EXIT_XEN              34
  #define KVM_EXIT_RISCV_SBI        35
 -#define KVM_EXIT_NOTIFY           36
 +#define KVM_EXIT_RISCV_CSR        36
++#define KVM_EXIT_NOTIFY           37
  
  /* For KVM_EXIT_INTERNAL_ERROR */
  /* Emulate instruction failed. */
@@@ -497,13 -497,11 +498,18 @@@ struct kvm_run 
  			unsigned long args[6];
  			unsigned long ret[2];
  		} riscv_sbi;
 +		/* KVM_EXIT_RISCV_CSR */
 +		struct {
 +			unsigned long csr_num;
 +			unsigned long new_value;
 +			unsigned long write_mask;
 +			unsigned long ret_value;
 +		} riscv_csr;
+ 		/* KVM_EXIT_NOTIFY */
+ 		struct {
+ #define KVM_NOTIFY_CONTEXT_INVALID	(1 << 0)
+ 			__u32 flags;
+ 		} notify;
  		/* Fix the size of the union. */
  		char padding[256];
  	};
diff --cc tools/testing/selftests/kvm/lib/aarch64/ucall.c
index be1d9728c4cea,0b949ee06b5e7..ed237b7446907
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@@ -77,20 -78,19 +76,20 @@@ void ucall(uint64_t cmd, int nargs, ...
  	va_list va;
  	int i;
  
 +	WRITE_ONCE(uc.cmd, cmd);
- 	nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
+ 	nargs = min(nargs, UCALL_MAX_ARGS);
  
  	va_start(va, nargs);
  	for (i = 0; i < nargs; ++i)
 -		uc.args[i] = va_arg(va, uint64_t);
 +		WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
  	va_end(va);
  
 -	*ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
 +	WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
  }
  
- uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
+ uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
  {
- 	struct kvm_run *run = vcpu_state(vm, vcpu_id);
+ 	struct kvm_run *run = vcpu->run;
  	struct ucall ucall = {};
  
  	if (uc)
diff --cc tools/testing/selftests/kvm/rseq_test.c
index 2237d1aac8014,aba7be178dab6..a54d4d05a0584
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@@ -229,15 -224,14 +225,15 @@@ int main(int argc, char *argv[]
  	 * GUEST_SYNC, while concurrently migrating the process by setting its
  	 * CPU affinity.
  	 */
- 	vm = vm_create_default(VCPU_ID, 0, guest_code);
+ 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
  	ucall_init(vm, NULL);
  
 -	pthread_create(&migration_thread, NULL, migration_worker, 0);
 +	pthread_create(&migration_thread, NULL, migration_worker,
 +		       (void *)(unsigned long)gettid());
  
  	for (i = 0; !done; i++) {
- 		vcpu_run(vm, VCPU_ID);
- 		TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ 		vcpu_run(vcpu);
+ 		TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
  			    "Guest failed?");
  
  		/*
diff --cc virt/kvm/kvm_main.c
index e3a6f76474745,da263c370d00d..32896c845ffe2
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@@ -379,8 -396,9 +396,9 @@@ static inline void *mmu_memory_cache_al
  		return (void *)__get_free_page(gfp_flags);
  }
  
- int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min)
+ int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min)
  {
 -	gfp_t gfp = GFP_KERNEL_ACCOUNT;
++	gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT;
  	void *obj;
  
  	if (mc->nobjs >= min)