perf stat: Add user_time and system_time events
authorFlorian Fischer <florian.fischer@muhq.space>
Wed, 20 Apr 2022 10:23:53 +0000 (12:23 +0200)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Wed, 20 Apr 2022 16:44:56 +0000 (13:44 -0300)
It bothered me that during benchmarking using 'perf stat' (to collect
for example CPU cache events) I could not simultaneously retrieve the
times spend in user or kernel mode in a machine readable format.

When running 'perf stat' the output for humans contains the times
reported by rusage and wait4.

  $ perf stat -e cache-misses:u -- true

   Performance counter stats for 'true':

             4,206      cache-misses:u

       0.001113619 seconds time elapsed

       0.001175000 seconds user
       0.000000000 seconds sys

But 'perf stat's machine-readable format does not provide this information.

  $ perf stat -x, -e cache-misses:u -- true
  4282,,cache-misses:u,492859,100.00,,

I found no way to retrieve this information using the available events
while using machine-readable output.

This patch adds two new tool internal events 'user_time' and
'system_time', similarly to the already present 'duration_time' event.

Both events use the already collected rusage information obtained by
wait4 and tracked in the global ru_stats.

Examples presenting cache-misses and rusage information in both human
and machine-readable form:

  $ perf stat -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .

   Performance counter stats for 'grep -q -r duration_time .':

        67,422,542 ns   duration_time:u
        50,517,000 ns   user_time:u
        16,839,000 ns   system_time:u
            30,937      cache-misses:u

       0.067422542 seconds time elapsed

       0.050517000 seconds user
       0.016839000 seconds sys

  $ perf stat -x, -e duration_time,user_time,system_time,cache-misses -- grep -q -r duration_time .
  72134524,ns,duration_time:u,72134524,100.00,,
  65225000,ns,user_time:u,65225000,100.00,,
  6865000,ns,system_time:u,6865000,100.00,,
  38705,,cache-misses:u,71189328,100.00,,

Signed-off-by: Florian Fischer <florian.fischer@muhq.space>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Link: https://lore.kernel.org/r/20220420102354.468173-3-florian.fischer@muhq.space
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/builtin-stat.c
tools/perf/util/evsel.h
tools/perf/util/parse-events.c
tools/perf/util/parse-events.l

index 61faffb535f53ac0c628ca6db509b3a3e8d7abba..dea34c8990aea63db1701051c262d978652c618d 100644 (file)
@@ -342,15 +342,35 @@ static int evsel__write_stat_event(struct evsel *counter, int cpu_map_idx, u32 t
 static int read_single_counter(struct evsel *counter, int cpu_map_idx,
                               int thread, struct timespec *rs)
 {
-       if (counter->tool_event == PERF_TOOL_DURATION_TIME) {
-               u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
-               struct perf_counts_values *count =
-                       perf_counts(counter->counts, cpu_map_idx, thread);
-               count->ena = count->run = val;
-               count->val = val;
-               return 0;
+       switch(counter->tool_event) {
+               case PERF_TOOL_DURATION_TIME: {
+                       u64 val = rs->tv_nsec + rs->tv_sec*1000000000ULL;
+                       struct perf_counts_values *count =
+                               perf_counts(counter->counts, cpu_map_idx, thread);
+                       count->ena = count->run = val;
+                       count->val = val;
+                       return 0;
+               }
+               case PERF_TOOL_USER_TIME:
+               case PERF_TOOL_SYSTEM_TIME: {
+                       u64 val;
+                       struct perf_counts_values *count =
+                               perf_counts(counter->counts, cpu_map_idx, thread);
+                       if (counter->tool_event == PERF_TOOL_USER_TIME)
+                               val = ru_stats.ru_utime_usec_stat.mean;
+                       else
+                               val = ru_stats.ru_stime_usec_stat.mean;
+                       count->ena = count->run = val;
+                       count->val = val;
+                       return 0;
+               }
+               default:
+               case PERF_TOOL_NONE:
+                       return evsel__read_counter(counter, cpu_map_idx, thread);
+               case PERF_TOOL_MAX:
+                       /* This should never be reached */
+                       return 0;
        }
-       return evsel__read_counter(counter, cpu_map_idx, thread);
 }
 
 /*
index 041b42d33bf5acf2d143bc3b80cfa788b7fb23f7..7e2209b47b39e2887735e72790a6c588f29ab06f 100644 (file)
@@ -30,6 +30,10 @@ typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
 enum perf_tool_event {
        PERF_TOOL_NONE          = 0,
        PERF_TOOL_DURATION_TIME = 1,
+       PERF_TOOL_USER_TIME = 2,
+       PERF_TOOL_SYSTEM_TIME = 3,
+
+       PERF_TOOL_MAX,
 };
 
 /** struct evsel - event selector
index 24997925ae00d66a95c58b2afdac3d54894661f3..8778575f6bfa23c7a1a86c841a10bba1a19e6273 100644 (file)
@@ -402,7 +402,9 @@ static int add_event_tool(struct list_head *list, int *idx,
        if (!evsel)
                return -ENOMEM;
        evsel->tool_event = tool_event;
-       if (tool_event == PERF_TOOL_DURATION_TIME) {
+       if (tool_event == PERF_TOOL_DURATION_TIME
+           || tool_event == PERF_TOOL_USER_TIME
+           || tool_event == PERF_TOOL_SYSTEM_TIME) {
                free((char *)evsel->unit);
                evsel->unit = strdup("ns");
        }
index 5b6e4b5249cffe81bf6f274df1eef119fa14c739..3a9ce96c8bce13156e4ea432ebbe8acc25bc3815 100644 (file)
@@ -353,6 +353,8 @@ alignment-faults                            { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_AL
 emulation-faults                               { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS); }
 dummy                                          { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_DUMMY); }
 duration_time                                  { return tool(yyscanner, PERF_TOOL_DURATION_TIME); }
+user_time                                              { return tool(yyscanner, PERF_TOOL_USER_TIME); }
+system_time                                            { return tool(yyscanner, PERF_TOOL_SYSTEM_TIME); }
 bpf-output                                     { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_BPF_OUTPUT); }
 cgroup-switches                                        { return sym(yyscanner, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CGROUP_SWITCHES); }