KVM: x86/pmu: Update AMD PMC sample period to fix guest NMI-watchdog
authorLike Xu <likexu@tencent.com>
Sat, 9 Apr 2022 01:52:26 +0000 (09:52 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 27 Apr 2022 12:39:00 +0000 (14:39 +0200)
commit 75189d1de1b377e580ebd2d2c55914631eac9c64 upstream.

NMI-watchdog is one of the favorite features of kernel developers,
but it does not work in AMD guest even with vPMU enabled and worse,
the system misrepresents this capability via /proc.

This is a PMC emulation error. KVM does not pass the latest valid
value to perf_event in time when guest NMI-watchdog is running, thus
the perf_event corresponding to the watchdog counter will enter the
old state at some point after the first guest NMI injection, forcing
the hardware register PMC0 to be constantly written to 0x800000000001.

Meanwhile, the running counter should accurately reflect its new value
based on the latest coordinated pmc->counter (from vPMC's point of view)
rather than the value written directly by the guest.

Fixes: 168d918f2643 ("KVM: x86: Adjust counter sample period after a wrmsr")
Reported-by: Dongli Cao <caodongli@kingsoft.com>
Signed-off-by: Like Xu <likexu@tencent.com>
Reviewed-by: Yanan Wang <wangyanan55@huawei.com>
Tested-by: Yanan Wang <wangyanan55@huawei.com>
Reviewed-by: Jim Mattson <jmattson@google.com>
Message-Id: <20220409015226.38619-1-likexu@tencent.com>
Cc: stable@vger.kernel.org
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
arch/x86/kvm/pmu.h
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/vmx/pmu_intel.c

index a06d95165ac7cf5687686b3e4dfcfac35d996d6d..c206decb39fab174757d756ac2376dc1a13668b9 100644 (file)
@@ -141,6 +141,15 @@ static inline u64 get_sample_period(struct kvm_pmc *pmc, u64 counter_value)
        return sample_period;
 }
 
+static inline void pmc_update_sample_period(struct kvm_pmc *pmc)
+{
+       if (!pmc->perf_event || pmc->is_paused)
+               return;
+
+       perf_event_period(pmc->perf_event,
+                         get_sample_period(pmc, pmc->counter));
+}
+
 void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel);
 void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int fixed_idx);
 void reprogram_counter(struct kvm_pmu *pmu, int pmc_idx);
index 3faf1d9c6c91c6ceace9a2a68a1c3d3d324601c9..f337ce7e898e3d8e2d2b8c130041c648ff959936 100644 (file)
@@ -256,6 +256,7 @@ static int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
        pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER);
        if (pmc) {
                pmc->counter += data - pmc_read_counter(pmc);
+               pmc_update_sample_period(pmc);
                return 0;
        }
        /* MSR_EVNTSELn */
index 7abe77c8b5d03f4a37bd8a7dca85a00155ebf3ce..e7275ce15a8b0160d3691c8faf6a2be1c89d440c 100644 (file)
@@ -439,15 +439,11 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                            !(msr & MSR_PMC_FULL_WIDTH_BIT))
                                data = (s64)(s32)data;
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event && !pmc->is_paused)
-                               perf_event_period(pmc->perf_event,
-                                                 get_sample_period(pmc, data));
+                       pmc_update_sample_period(pmc);
                        return 0;
                } else if ((pmc = get_fixed_pmc(pmu, msr))) {
                        pmc->counter += data - pmc_read_counter(pmc);
-                       if (pmc->perf_event && !pmc->is_paused)
-                               perf_event_period(pmc->perf_event,
-                                                 get_sample_period(pmc, data));
+                       pmc_update_sample_period(pmc);
                        return 0;
                } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) {
                        if (data == pmc->eventsel)