perf/x86/amd/lbr: Use fusion-aware branch classifier
authorSandipan Das <sandipan.das@amd.com>
Thu, 11 Aug 2022 12:29:59 +0000 (17:59 +0530)
committerPeter Zijlstra <peterz@infradead.org>
Fri, 26 Aug 2022 22:05:45 +0000 (00:05 +0200)
AMD Last Branch Record Extension Version 2 (LbrExtV2) can report a branch
from address that points to an instruction preceding the actual branch by
several bytes due to branch fusion and further optimizations in Zen4
processors.

In such cases, software should move forward sequentially in the instruction
stream from the reported address and the address of the first branch
encountered should be used instead. Hence, use the fusion-aware branch
classifier to determine the correct branch type and get the offset for
adjusting the branch from address.

Signed-off-by: Sandipan Das <sandipan.das@amd.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/c324d2d0a9c3976da30b9563d09e50bfee0f264d.1660211399.git.sandipan.das@amd.com
arch/x86/events/amd/lbr.c

index 1a8d27e0c1451ba78e6dab3766ccd45bba59f542..eb84f196b2ca0047251561c85d1a338e9cf59788 100644 (file)
@@ -97,7 +97,7 @@ static __always_inline u64 sign_ext_branch_ip(u64 ip)
 static void amd_pmu_lbr_filter(void)
 {
        struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
-       int br_sel = cpuc->br_sel, type, i, j;
+       int br_sel = cpuc->br_sel, offset, type, i, j;
        bool compress = false;
        u64 from, to;
 
@@ -109,7 +109,15 @@ static void amd_pmu_lbr_filter(void)
        for (i = 0; i < cpuc->lbr_stack.nr; i++) {
                from = cpuc->lbr_entries[i].from;
                to = cpuc->lbr_entries[i].to;
-               type = branch_type(from, to, 0);
+               type = branch_type_fused(from, to, 0, &offset);
+
+               /*
+                * Adjust the branch from address in case of instruction
+                * fusion where it points to an instruction preceding the
+                * actual branch
+                */
+               if (offset)
+                       cpuc->lbr_entries[i].from += offset;
 
                /* If type does not correspond, then discard */
                if (type == X86_BR_NONE || (br_sel & type) != type) {