From: Paolo Bonzini <pbonzini@redhat.com> Date: Fri, 29 Jul 2022 13:46:01 +0000 (-0400) Subject: Merge remote-tracking branch 'kvm/next' into kvm-next-5.20 X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=63f4b210414b65aa3103c54369cacbd0b1bdf02f;p=linux.git Merge remote-tracking branch 'kvm/next' into kvm-next-5.20 KVM/s390, KVM/x86 and common infrastructure changes for 5.20 x86: * Permit guests to ignore single-bit ECC errors * Fix races in gfn->pfn cache refresh; do not pin pages tracked by the cache * Intel IPI virtualization * Allow getting/setting pending triple fault with KVM_GET/SET_VCPU_EVENTS * PEBS virtualization * Simplify PMU emulation by just using PERF_TYPE_RAW events * More accurate event reinjection on SVM (avoid retrying instructions) * Allow getting/setting the state of the speaker port data bit * Refuse starting the kvm-intel module if VM-Entry/VM-Exit controls are inconsistent * "Notify" VM exit (detect microarchitectural hangs) for Intel * Cleanups for MCE MSR emulation s390: * add an interface to provide a hypervisor dump for secure guests * improve selftests to use TAP interface * enable interpretive execution of zPCI instructions (for PCI passthrough) * First part of deferred teardown * CPU Topology * PV attestation * Minor fixes Generic: * new selftests API using struct kvm_vcpu instead of a (vm, id) tuple x86: * Use try_cmpxchg64 instead of cmpxchg64 * Bugfixes * Ignore benign host accesses to PMU MSRs when PMU is disabled * Allow disabling KVM's "MONITOR/MWAIT are NOPs!" behavior * x86/MMU: Allow NX huge pages to be disabled on a per-vm basis * Port eager page splitting to shadow MMU as well * Enable CMCI capability by default and handle injected UCNA errors * Expose pid of vcpu threads in debugfs * x2AVIC support for AMD * cleanup PIO emulation * Fixes for LLDT/LTR emulation * Don't require refcounted "struct page" to create huge SPTEs x86 cleanups: * Use separate namespaces for guest PTEs and shadow PTEs bitmasks * PIO emulation * Reorganize rmap API, mostly around rmap destruction * Do not workaround very old KVM bugs for L0 that runs with nesting enabled * new selftests API for CPUID --- 63f4b210414b65aa3103c54369cacbd0b1bdf02f diff --cc arch/riscv/kvm/mmu.c index bc545aef60344,081f8d2b9cf3d..3a35b2d95697c --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@@ -351,11 -350,7 +351,10 @@@ int kvm_riscv_gstage_ioremap(struct kv int ret = 0; unsigned long pfn; phys_addr_t addr, end; - struct kvm_mmu_memory_cache pcache; - - memset(&pcache, 0, sizeof(pcache)); - pcache.gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0; - pcache.gfp_zero = __GFP_ZERO; - struct kvm_mmu_memory_cache pcache = { .gfp_zero = __GFP_ZERO }; ++ struct kvm_mmu_memory_cache pcache = { ++ .gfp_custom = (in_atomic) ? GFP_ATOMIC | __GFP_ACCOUNT : 0, ++ .gfp_zero = __GFP_ZERO, ++ }; end = (gpa + size + PAGE_SIZE - 1) & PAGE_MASK; pfn = __phys_to_pfn(hpa); diff --cc arch/x86/kvm/vmx/capabilities.h index c0e24826a86f7,069d8d298e1de..c5e5dfef69c7f --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@@ -4,8 -4,10 +4,10 @@@ #include <asm/vmx.h> -#include "lapic.h" -#include "x86.h" -#include "pmu.h" -#include "cpuid.h" +#include "../lapic.h" +#include "../x86.h" ++#include "../pmu.h" ++#include "../cpuid.h" extern bool __read_mostly enable_vpid; extern bool __read_mostly flexpriority_enabled; diff --cc arch/x86/kvm/x86.c index e5fa335a4ea79,5366f884e9a70..33560bfa0cac6 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@@ -298,7 -286,8 +286,8 @@@ const struct _kvm_stats_desc kvm_vcpu_s STATS_DESC_COUNTER(VCPU, directed_yield_successful), STATS_DESC_COUNTER(VCPU, preemption_reported), STATS_DESC_COUNTER(VCPU, preemption_other), - STATS_DESC_IBOOLEAN(VCPU, guest_mode) - STATS_DESC_ICOUNTER(VCPU, guest_mode), ++ STATS_DESC_IBOOLEAN(VCPU, guest_mode), + STATS_DESC_COUNTER(VCPU, notify_window_exits), }; const struct kvm_stats_header kvm_vcpu_stats_header = { @@@ -6028,12 -6172,11 +6175,16 @@@ split_irqchip_unlock kvm->arch.exception_payload_enabled = cap->args[0]; r = 0; break; + case KVM_CAP_X86_TRIPLE_FAULT_EVENT: + kvm->arch.triple_fault_event = cap->args[0]; + r = 0; + break; case KVM_CAP_X86_USER_SPACE_MSR: + r = -EINVAL; + if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL | + KVM_MSR_EXIT_REASON_UNKNOWN | + KVM_MSR_EXIT_REASON_FILTER)) + break; kvm->arch.user_space_msr_mask = cap->args[0]; r = 0; break; diff --cc include/linux/kvm_types.h index 1dcfba68076a7,4d933518060fa..3ca3db020e0e3 --- a/include/linux/kvm_types.h +++ b/include/linux/kvm_types.h @@@ -87,9 -92,9 +92,10 @@@ struct gfn_to_pfn_cache struct kvm_mmu_memory_cache { int nobjs; gfp_t gfp_zero; + gfp_t gfp_custom; struct kmem_cache *kmem_cache; - void *objects[KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE]; + int capacity; + void **objects; }; #endif diff --cc include/uapi/linux/kvm.h index 0c1f42a40fd35,7e06194129e3f..c823a136f9236 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@@ -270,7 -270,7 +270,8 @@@ struct kvm_xen_exit #define KVM_EXIT_X86_BUS_LOCK 33 #define KVM_EXIT_XEN 34 #define KVM_EXIT_RISCV_SBI 35 -#define KVM_EXIT_NOTIFY 36 +#define KVM_EXIT_RISCV_CSR 36 ++#define KVM_EXIT_NOTIFY 37 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@@ -497,13 -497,11 +498,18 @@@ struct kvm_run unsigned long args[6]; unsigned long ret[2]; } riscv_sbi; + /* KVM_EXIT_RISCV_CSR */ + struct { + unsigned long csr_num; + unsigned long new_value; + unsigned long write_mask; + unsigned long ret_value; + } riscv_csr; + /* KVM_EXIT_NOTIFY */ + struct { + #define KVM_NOTIFY_CONTEXT_INVALID (1 << 0) + __u32 flags; + } notify; /* Fix the size of the union. */ char padding[256]; }; diff --cc tools/testing/selftests/kvm/lib/aarch64/ucall.c index be1d9728c4cea,0b949ee06b5e7..ed237b7446907 --- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c +++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c @@@ -77,20 -78,19 +76,20 @@@ void ucall(uint64_t cmd, int nargs, ... va_list va; int i; + WRITE_ONCE(uc.cmd, cmd); - nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS; + nargs = min(nargs, UCALL_MAX_ARGS); va_start(va, nargs); for (i = 0; i < nargs; ++i) - uc.args[i] = va_arg(va, uint64_t); + WRITE_ONCE(uc.args[i], va_arg(va, uint64_t)); va_end(va); - *ucall_exit_mmio_addr = (vm_vaddr_t)&uc; + WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc); } - uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc) + uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc) { - struct kvm_run *run = vcpu_state(vm, vcpu_id); + struct kvm_run *run = vcpu->run; struct ucall ucall = {}; if (uc) diff --cc tools/testing/selftests/kvm/rseq_test.c index 2237d1aac8014,aba7be178dab6..a54d4d05a0584 --- a/tools/testing/selftests/kvm/rseq_test.c +++ b/tools/testing/selftests/kvm/rseq_test.c @@@ -229,15 -224,14 +225,15 @@@ int main(int argc, char *argv[] * GUEST_SYNC, while concurrently migrating the process by setting its * CPU affinity. */ - vm = vm_create_default(VCPU_ID, 0, guest_code); + vm = vm_create_with_one_vcpu(&vcpu, guest_code); ucall_init(vm, NULL); - pthread_create(&migration_thread, NULL, migration_worker, 0); + pthread_create(&migration_thread, NULL, migration_worker, + (void *)(unsigned long)gettid()); for (i = 0; !done; i++) { - vcpu_run(vm, VCPU_ID); - TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC, + vcpu_run(vcpu); + TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC, "Guest failed?"); /* diff --cc virt/kvm/kvm_main.c index e3a6f76474745,da263c370d00d..32896c845ffe2 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@@ -379,8 -396,9 +396,9 @@@ static inline void *mmu_memory_cache_al return (void *)__get_free_page(gfp_flags); } - int kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int min) + int __kvm_mmu_topup_memory_cache(struct kvm_mmu_memory_cache *mc, int capacity, int min) { - gfp_t gfp = GFP_KERNEL_ACCOUNT; ++ gfp_t gfp = mc->gfp_custom ? mc->gfp_custom : GFP_KERNEL_ACCOUNT; void *obj; if (mc->nobjs >= min)