perf top: Add option to enable the LBR stitching approach
authorKan Liang <kan.liang@linux.intel.com>
Thu, 19 Mar 2020 20:25:15 +0000 (13:25 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Sat, 18 Apr 2020 12:05:01 +0000 (09:05 -0300)
With the LBR stitching approach, the reconstructed LBR call stack
can break the HW limitation. However, it may reconstruct invalid call
stacks in some cases, e.g. exception handing such as setjmp/longjmp.
Also, it may impact the processing time especially when the number of
samples with stitched LBRs are huge.

Add an option to enable the approach.
The option must be used with --call-graph lbr.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexey Budankov <alexey.budankov@linux.intel.com>
Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Pavel Gerasimov <pavel.gerasimov@intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@linux.ibm.com>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vitaly Slobodskoy <vitaly.slobodskoy@intel.com>
Link: http://lore.kernel.org/lkml/20200319202517.23423-16-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-top.txt
tools/perf/builtin-top.c
tools/perf/util/top.h

index 487737a725e97c56ad580f5edacf52f2c5a4c90c..20227dabc208400f44f864b5c40cdd3f3dfefb1f 100644 (file)
@@ -319,6 +319,15 @@ Default is to monitor all CPUS.
        go straight to the histogram browser, just like 'perf top' with no events
        explicitely specified does.
 
+--stitch-lbr::
+       Show callgraph with stitched LBRs, which may have more complete
+       callgraph. The option must be used with --call-graph lbr recording.
+       Disabled by default. In common cases with call stack overflows,
+       it can recreate better call stacks than the default lbr call stack
+       output. But this approach is not full proof. There can be cases
+       where it creates incorrect call stacks from incorrect matches.
+       The known limitations include exception handing such as
+       setjmp/longjmp will have calls/returns not match.
 
 INTERACTIVE PROMPTING KEYS
 --------------------------
index 289cf83e658a371235fafdb61899077d5bb976e7..6b067a5ba1d50620ad4788c6b63b0fd935c864a3 100644 (file)
@@ -33,6 +33,7 @@
 #include "util/map.h"
 #include "util/mmap.h"
 #include "util/session.h"
+#include "util/thread.h"
 #include "util/symbol.h"
 #include "util/synthetic-events.h"
 #include "util/top.h"
@@ -775,6 +776,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
        if (machine__resolve(machine, &al, sample) < 0)
                return;
 
+       if (top->stitch_lbr)
+               al.thread->lbr_stitch_enable = true;
+
        if (!machine->kptr_restrict_warned &&
            symbol_conf.kptr_restrict &&
            al.cpumode == PERF_RECORD_MISC_KERNEL) {
@@ -1571,6 +1575,8 @@ int cmd_top(int argc, const char **argv)
                    "Sort the output by the event at the index n in group. "
                    "If n is invalid, sort by the first event. "
                    "WARNING: should be used on grouped events."),
+       OPT_BOOLEAN(0, "stitch-lbr", &top.stitch_lbr,
+                   "Enable LBR callgraph stitching approach"),
        OPTS_EVSWITCH(&top.evswitch),
        OPT_END()
        };
@@ -1640,6 +1646,11 @@ int cmd_top(int argc, const char **argv)
                }
        }
 
+       if (top.stitch_lbr && !(callchain_param.record_mode == CALLCHAIN_LBR)) {
+               pr_err("Error: --stitch-lbr must be used with --call-graph lbr\n");
+               goto out_delete_evlist;
+       }
+
        if (opts->branch_stack && callchain_param.enabled)
                symbol_conf.show_branchflag_count = true;
 
index f117d4f4821e0a26d441aec796c0ae6dcefcade4..45dc84ddff37412ad892d7125efaeca81f60f611 100644 (file)
@@ -36,6 +36,7 @@ struct perf_top {
        bool               use_tui, use_stdio;
        bool               vmlinux_warned;
        bool               dump_symtab;
+       bool               stitch_lbr;
        struct hist_entry  *sym_filter_entry;
        struct evsel       *sym_evsel;
        struct perf_session *session;