perf tools: Add branch counter knob
authorKan Liang <kan.liang@linux.intel.com>
Wed, 25 Oct 2023 20:16:26 +0000 (13:16 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 9 Nov 2023 16:47:50 +0000 (13:47 -0300)
Add a new branch filter, "counter", for the branch counter option. It is
used to mark the events which should be logged in the branch. If it is
applied with the -j option, the counters of all the events should be
logged in the branch. If the legacy kernel doesn't support the new
branch sample type, switching off the branch counter filter.

The stored counter values in each branch are displayed right after the
regular branch stack information via perf report -D.

Usage examples:

  # perf record -e "{branch-instructions,branch-misses}:S" -j any,counter

Only the first event, branch-instructions, collect the LBR. Both
branch-instructions and branch-misses are marked as logged events.  The
occurrences information of them can be found in the branch stack
extension space of each branch.

  # perf record -e "{cpu/branch-instructions,branch_type=any/,cpu/branch-misses,branch_type=counter/}"

Only the first event, branch-instructions, collect the LBR. Only the
branch-misses event is marked as a logged event.

Committer notes:

I noticed 'perf test "Sample parsing"' failing, reported to the list and
Kan provided a patch that checks if the evsel has a leader and that
evsel->evlist is set, the comment in the source code further explains
it.

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Alexey Bayduraev <alexey.v.bayduraev@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Tinghao Zhang <tinghao.zhang@intel.com>
Link: https://lore.kernel.org/r/20231025201626.3000228-8-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-record.txt
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/parse-branch-options.c
tools/perf/util/perf_event_attr_fprintf.c
tools/perf/util/sample.h
tools/perf/util/session.c

index 1889f66addf2aa936bafea132aed55ab0908ff8d..6015fdd08fb63b679b56195b7734e7449a318851 100644 (file)
@@ -445,6 +445,10 @@ following filters are defined:
                     4th-Gen Xeon+ server), the save branch type is unconditionally enabled
                     when the taken branch stack sampling is enabled.
        - priv: save privilege state during sampling in case binary is not available later
+       - counter: save occurrences of the event since the last branch entry. Currently, the
+                  feature is only supported by a newer CPU, e.g., Intel Sierra Forest and
+                  later platforms. An error out is expected if it's used on the unsupported
+                  kernel or CPUs.
 
 +
 The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
index 72a5dfc38d3806c50ed3c0b933d9a94a56215945..a5da74e3a517b52ca90aa47e99b3999b80bdd4a2 100644 (file)
@@ -1832,6 +1832,8 @@ static int __evsel__prepare_open(struct evsel *evsel, struct perf_cpu_map *cpus,
 
 static void evsel__disable_missing_features(struct evsel *evsel)
 {
+       if (perf_missing_features.branch_counters)
+               evsel->core.attr.branch_sample_type &= ~PERF_SAMPLE_BRANCH_COUNTERS;
        if (perf_missing_features.read_lost)
                evsel->core.attr.read_format &= ~PERF_FORMAT_LOST;
        if (perf_missing_features.weight_struct) {
@@ -1885,7 +1887,12 @@ bool evsel__detect_missing_features(struct evsel *evsel)
         * Must probe features in the order they were added to the
         * perf_event_attr interface.
         */
-       if (!perf_missing_features.read_lost &&
+       if (!perf_missing_features.branch_counters &&
+           (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS)) {
+               perf_missing_features.branch_counters = true;
+               pr_debug2("switching off branch counters support\n");
+               return true;
+       } else if (!perf_missing_features.read_lost &&
            (evsel->core.attr.read_format & PERF_FORMAT_LOST)) {
                perf_missing_features.read_lost = true;
                pr_debug2("switching off PERF_FORMAT_LOST support\n");
@@ -2318,6 +2325,22 @@ u64 evsel__bitfield_swap_branch_flags(u64 value)
        return new_val;
 }
 
+static inline bool evsel__has_branch_counters(const struct evsel *evsel)
+{
+       struct evsel *cur, *leader = evsel__leader(evsel);
+
+       /* The branch counters feature only supports group */
+       if (!leader || !evsel->evlist)
+               return false;
+
+       evlist__for_each_entry(evsel->evlist, cur) {
+               if ((leader == evsel__leader(cur)) &&
+                   (cur->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS))
+                       return true;
+       }
+       return false;
+}
+
 int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
                        struct perf_sample *data)
 {
@@ -2551,6 +2574,16 @@ int evsel__parse_sample(struct evsel *evsel, union perf_event *event,
 
                OVERFLOW_CHECK(array, sz, max_size);
                array = (void *)array + sz;
+
+               if (evsel__has_branch_counters(evsel)) {
+                       OVERFLOW_CHECK_u64(array);
+
+                       data->branch_stack_cntr = (u64 *)array;
+                       sz = data->branch_stack->nr * sizeof(u64);
+
+                       OVERFLOW_CHECK(array, sz, max_size);
+                       array = (void *)array + sz;
+               }
        }
 
        if (type & PERF_SAMPLE_REGS_USER) {
index d791316a1792e5931ef5ebaf81215f21104636c8..f19ac9f027efc75db438b9f5f1ac9916b7b7f1aa 100644 (file)
@@ -191,6 +191,7 @@ struct perf_missing_features {
        bool code_page_size;
        bool weight_struct;
        bool read_lost;
+       bool branch_counters;
 };
 
 extern struct perf_missing_features perf_missing_features;
index fd67d204d720d9ba7859fa25e1b96a5b1ebf9c75..f7f7aff3d85a049000828a9fcb9ecc3ad9026389 100644 (file)
@@ -36,6 +36,7 @@ static const struct branch_mode branch_modes[] = {
        BRANCH_OPT("stack", PERF_SAMPLE_BRANCH_CALL_STACK),
        BRANCH_OPT("hw_index", PERF_SAMPLE_BRANCH_HW_INDEX),
        BRANCH_OPT("priv", PERF_SAMPLE_BRANCH_PRIV_SAVE),
+       BRANCH_OPT("counter", PERF_SAMPLE_BRANCH_COUNTERS),
        BRANCH_END
 };
 
index 2247991451f3aa1ba0969b9ad4f1f22e595b2a21..8f04d3b7f3ec783bee9981fa096b145e80fabc91 100644 (file)
@@ -55,6 +55,7 @@ static void __p_branch_sample_type(char *buf, size_t size, u64 value)
                bit_name(COND), bit_name(CALL_STACK), bit_name(IND_JUMP),
                bit_name(CALL), bit_name(NO_FLAGS), bit_name(NO_CYCLES),
                bit_name(TYPE_SAVE), bit_name(HW_INDEX), bit_name(PRIV_SAVE),
+               bit_name(COUNTERS),
                { .name = NULL, }
        };
 #undef bit_name
index c92ad0f51ecd97d5727474b0a6b73e24ef37c41e..70b2c3135555ec2689fb5e824293195103c41590 100644 (file)
@@ -113,6 +113,7 @@ struct perf_sample {
        void *raw_data;
        struct ip_callchain *callchain;
        struct branch_stack *branch_stack;
+       u64 *branch_stack_cntr;
        struct regs_dump  user_regs;
        struct regs_dump  intr_regs;
        struct stack_dump user_stack;
index 1e9aa8ed15b6445eb906b76738f1c780cebb0713..4a094ab0362b41ec84d5d69ede04ef69425404b6 100644 (file)
@@ -1150,9 +1150,13 @@ static void callchain__printf(struct evsel *evsel,
                       i, callchain->ips[i]);
 }
 
-static void branch_stack__printf(struct perf_sample *sample, bool callstack)
+static void branch_stack__printf(struct perf_sample *sample,
+                                struct evsel *evsel)
 {
        struct branch_entry *entries = perf_sample__branch_entries(sample);
+       bool callstack = evsel__has_branch_callstack(evsel);
+       u64 *branch_stack_cntr = sample->branch_stack_cntr;
+       struct perf_env *env = evsel__env(evsel);
        uint64_t i;
 
        if (!callstack) {
@@ -1194,6 +1198,13 @@ static void branch_stack__printf(struct perf_sample *sample, bool callstack)
                        }
                }
        }
+
+       if (branch_stack_cntr) {
+               printf("... branch stack counters: nr:%" PRIu64 " (counter width: %u max counter nr:%u)\n",
+                       sample->branch_stack->nr, env->br_cntr_width, env->br_cntr_nr);
+               for (i = 0; i < sample->branch_stack->nr; i++)
+                       printf("..... %2"PRIu64": %016" PRIx64 "\n", i, branch_stack_cntr[i]);
+       }
 }
 
 static void regs_dump__printf(u64 mask, u64 *regs, const char *arch)
@@ -1355,7 +1366,7 @@ static void dump_sample(struct evsel *evsel, union perf_event *event,
                callchain__printf(evsel, sample);
 
        if (evsel__has_br_stack(evsel))
-               branch_stack__printf(sample, evsel__has_branch_callstack(evsel));
+               branch_stack__printf(sample, evsel);
 
        if (sample_type & PERF_SAMPLE_REGS_USER)
                regs_user__printf(sample, arch);