perf script: Support instruction latency
authorKan Liang <kan.liang@linux.intel.com>
Wed, 29 Sep 2021 15:38:14 +0000 (08:38 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 28 Oct 2021 12:28:03 +0000 (09:28 -0300)
The instruction latency information can be recorded on
some platforms, e.g., the Intel Sapphire Rapids server. With both memory
latency (weight) and the new instruction latency information, users can
easily locate the expensive load instructions, and also understand the time
spent in different stages. The users can optimize their applications in
different pipeline stages.

Add a new field "ins_lat" to filter the instruction latency information,
which is available with sample type PERF_SAMPLE_WEIGHT_STRUCT.

Signed-off-by: Kan Liang <kan.liang@linux.intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joe Mario <jmario@redhat.com>
Link: https://lore.kernel.org/r/1632929894-102778-2-git-send-email-kan.liang@linux.intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-script.txt
tools/perf/builtin-script.c

index c80515243560ce100a6f5c0ab69afff105282b48..b0070718784ddcfd343c22634c75c8132b3e9013 100644 (file)
@@ -130,7 +130,7 @@ OPTIONS
         comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
         srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output,
         brstackinsn, brstackoff, callindent, insn, insnlen, synth, phys_addr,
-        metric, misc, srccode, ipc, data_page_size, code_page_size.
+        metric, misc, srccode, ipc, data_page_size, code_page_size, ins_lat.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace
index 6211d0b84b7a63edb843429ac1c2e6f8390f3887..a6258f6f816c253f27bdc0a70f8d15d0d25165b8 100644 (file)
@@ -122,6 +122,7 @@ enum perf_output_field {
        PERF_OUTPUT_TOD             = 1ULL << 32,
        PERF_OUTPUT_DATA_PAGE_SIZE  = 1ULL << 33,
        PERF_OUTPUT_CODE_PAGE_SIZE  = 1ULL << 34,
+       PERF_OUTPUT_INS_LAT         = 1ULL << 35,
 };
 
 struct perf_script {
@@ -188,6 +189,7 @@ struct output_option {
        {.str = "tod", .field = PERF_OUTPUT_TOD},
        {.str = "data_page_size", .field = PERF_OUTPUT_DATA_PAGE_SIZE},
        {.str = "code_page_size", .field = PERF_OUTPUT_CODE_PAGE_SIZE},
+       {.str = "ins_lat", .field = PERF_OUTPUT_INS_LAT},
 };
 
 enum {
@@ -262,7 +264,8 @@ static struct {
                              PERF_OUTPUT_DSO | PERF_OUTPUT_PERIOD |
                              PERF_OUTPUT_ADDR | PERF_OUTPUT_DATA_SRC |
                              PERF_OUTPUT_WEIGHT | PERF_OUTPUT_PHYS_ADDR |
-                             PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE,
+                             PERF_OUTPUT_DATA_PAGE_SIZE | PERF_OUTPUT_CODE_PAGE_SIZE |
+                             PERF_OUTPUT_INS_LAT,
 
                .invalid_fields = PERF_OUTPUT_TRACE | PERF_OUTPUT_BPF_OUTPUT,
        },
@@ -522,6 +525,10 @@ static int evsel__check_attr(struct evsel *evsel, struct perf_session *session)
            evsel__check_stype(evsel, PERF_SAMPLE_CODE_PAGE_SIZE, "CODE_PAGE_SIZE", PERF_OUTPUT_CODE_PAGE_SIZE))
                return -EINVAL;
 
+       if (PRINT_FIELD(INS_LAT) &&
+           evsel__check_stype(evsel, PERF_SAMPLE_WEIGHT_STRUCT, "WEIGHT_STRUCT", PERF_OUTPUT_INS_LAT))
+               return -EINVAL;
+
        return 0;
 }
 
@@ -2039,6 +2046,9 @@ static void process_event(struct perf_script *script,
        if (PRINT_FIELD(WEIGHT))
                fprintf(fp, "%16" PRIu64, sample->weight);
 
+       if (PRINT_FIELD(INS_LAT))
+               fprintf(fp, "%16" PRIu16, sample->ins_lat);
+
        if (PRINT_FIELD(IP)) {
                struct callchain_cursor *cursor = NULL;
 
@@ -3715,7 +3725,7 @@ int cmd_script(int argc, const char **argv)
                     "addr,symoff,srcline,period,iregs,uregs,brstack,"
                     "brstacksym,flags,bpf-output,brstackinsn,brstackoff,"
                     "callindent,insn,insnlen,synth,phys_addr,metric,misc,ipc,tod,"
-                    "data_page_size,code_page_size",
+                    "data_page_size,code_page_size,ins_lat",
                     parse_output_fields),
        OPT_BOOLEAN('a', "all-cpus", &system_wide,
                    "system-wide collection from all CPUs"),