perf/x86/amd/lbr: Use freeze based on availability
authorSandipan Das <sandipan.das@amd.com>
Mon, 25 Mar 2024 07:31:45 +0000 (13:01 +0530)
committerIngo Molnar <mingo@kernel.org>
Mon, 25 Mar 2024 10:16:55 +0000 (11:16 +0100)
Currently, the LBR code assumes that LBR Freeze is supported on all processors
when X86_FEATURE_AMD_LBR_V2 is available i.e. CPUID leaf 0x80000022[EAX]
bit 1 is set. This is incorrect as the availability of the feature is
additionally dependent on CPUID leaf 0x80000022[EAX] bit 2 being set,
which may not be set for all Zen 4 processors.

Define a new feature bit for LBR and PMC freeze and set the freeze enable bit
(FLBRI) in DebugCtl (MSR 0x1d9) conditionally.

It should still be possible to use LBR without freeze for profile-guided
optimization of user programs by using an user-only branch filter during
profiling. When the user-only filter is enabled, branches are no longer
recorded after the transition to CPL 0 upon PMI arrival. When branch
entries are read in the PMI handler, the branch stack does not change.

E.g.

  $ perf record -j any,u -e ex_ret_brn_tkn ./workload

Since the feature bit is visible under flags in /proc/cpuinfo, it can be
used to determine the feasibility of use-cases which require LBR Freeze
to be supported by the hardware such as profile-guided optimization of
kernels.

Fixes: ca5b7c0d9621 ("perf/x86/amd/lbr: Add LbrExtV2 branch record support")
Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Link: https://lore.kernel.org/r/69a453c97cfd11c6f2584b19f937fe6df741510f.1711091584.git.sandipan.das@amd.com
arch/x86/events/amd/core.c
arch/x86/events/amd/lbr.c
arch/x86/include/asm/cpufeatures.h
arch/x86/kernel/cpu/scattered.c

index aec16e581f5b2aad520bcdfbab7bff61ca99c072..5692e827afef39a5352354e90543099a57e66757 100644 (file)
@@ -904,8 +904,8 @@ static int amd_pmu_v2_handle_irq(struct pt_regs *regs)
        if (!status)
                goto done;
 
-       /* Read branch records before unfreezing */
-       if (status & GLOBAL_STATUS_LBRS_FROZEN) {
+       /* Read branch records */
+       if (x86_pmu.lbr_nr) {
                amd_pmu_lbr_read();
                status &= ~GLOBAL_STATUS_LBRS_FROZEN;
        }
index 4a1e600314d5df124403636915a0f5fcf64ec475..5149830c7c4fa61207a3a30122e07a24964374d9 100644 (file)
@@ -402,10 +402,12 @@ void amd_pmu_lbr_enable_all(void)
                wrmsrl(MSR_AMD64_LBR_SELECT, lbr_select);
        }
 
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-       rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
+       if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+               rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+       }
 
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+       rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
        wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg | DBG_EXTN_CFG_LBRV2EN);
 }
 
@@ -418,10 +420,12 @@ void amd_pmu_lbr_disable_all(void)
                return;
 
        rdmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg);
-       rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
-
        wrmsrl(MSR_AMD_DBG_EXTN_CFG, dbg_extn_cfg & ~DBG_EXTN_CFG_LBRV2EN);
-       wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+
+       if (cpu_feature_enabled(X86_FEATURE_AMD_LBR_PMC_FREEZE)) {
+               rdmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl);
+               wrmsrl(MSR_IA32_DEBUGCTLMSR, dbg_ctl & ~DEBUGCTLMSR_FREEZE_LBRS_ON_PMI);
+       }
 }
 
 __init int amd_pmu_lbr_init(void)
index 4d850a780f7ed91092cba7845057a4c2c25400bb..a38f8f9ba65729125234814c08547498e4e3b8bc 100644 (file)
 #define X86_FEATURE_IBPB_BRTYPE                (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */
 #define X86_FEATURE_SRSO_NO            (20*32+29) /* "" CPU is not affected by SRSO */
 
+/*
+ * Extended auxiliary flags: Linux defined - for features scattered in various
+ * CPUID levels like 0x80000022, etc.
+ *
+ * Reuse free bits when adding new feature flags!
+ */
+#define X86_FEATURE_AMD_LBR_PMC_FREEZE (21*32+ 0) /* AMD LBR and PMC Freeze */
+
 /*
  * BUG word(s)
  */
index 0dad49a09b7a9e243c647a294e18e13326d140df..a515328d9d7d88b802f588bf678d098e0ba53b86 100644 (file)
@@ -49,6 +49,7 @@ static const struct cpuid_bit cpuid_bits[] = {
        { X86_FEATURE_BMEC,             CPUID_EBX,  3, 0x80000020, 0 },
        { X86_FEATURE_PERFMON_V2,       CPUID_EAX,  0, 0x80000022, 0 },
        { X86_FEATURE_AMD_LBR_V2,       CPUID_EAX,  1, 0x80000022, 0 },
+       { X86_FEATURE_AMD_LBR_PMC_FREEZE,       CPUID_EAX,  2, 0x80000022, 0 },
        { 0, 0, 0, 0, 0 }
 };