perf kwork top: Add -C/--cpu -i/--input -n/--name -s/--sort --time options
authorYang Jihong <yangjihong1@huawei.com>
Sat, 12 Aug 2023 08:49:14 +0000 (08:49 +0000)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 12 Sep 2023 20:31:59 +0000 (17:31 -0300)
Provide the following options for perf kwork top:

1. -C, --cpu <cpu> list of cpus to profile
2. -i, --input <file> input file name
3. -n, --name <name> event name to profile
4. -s, --sort <key[,key2...]> sort by key(s): rate, runtime, tid
5. --time <str> Time span for analysis (start,stop)

Example usage:

  # perf kwork top -h

   Usage: perf kwork top [<options>]

      -C, --cpu <cpu>       list of cpus to profile
      -i, --input <file>    input file name
      -n, --name <name>     event name to profile
      -s, --sort <key[,key2...]>
                            sort by key(s): rate, runtime, tid
          --time <str>      Time span for analysis (start,stop)

  # perf kwork top -C 2,4,5

  Total  :  51226.940 ms, 3 cpus
  %Cpu(s):  92.59% id,   0.00% hi,   0.09% si
  %Cpu2   [|                                4.61%]
  %Cpu4   [                                 0.01%]
  %Cpu5   [|||||                           17.31%]

        PID    %CPU           RUNTIME  COMMMAND
    ----------------------------------------------------
          0   99.98      17073.515 ms  swapper/4
          0   95.17      16250.874 ms  swapper/2
          0   82.62      14108.577 ms  swapper/5
       4342   21.70       3708.358 ms  perf
         16    0.13         22.296 ms  rcu_preempt
         75    0.02          4.261 ms  kworker/2:1
         98    0.01          2.540 ms  jbd2/sda-8
         61    0.01          3.404 ms  kcompactd0
         87    0.00          0.145 ms  kworker/5:1H
         73    0.00          0.596 ms  kworker/5:1
         41    0.00          0.041 ms  ksoftirqd/5
         40    0.00          0.718 ms  migration/5
         64    0.00          0.115 ms  kworker/4:1
         35    0.00          0.556 ms  migration/4
        353    0.00          1.143 ms  sshd
         26    0.00          1.665 ms  ksoftirqd/2
         25    0.00          0.662 ms  migration/2

  # perf kwork top -i perf.data

  Total  : 136601.588 ms, 8 cpus
  %Cpu(s):  95.66% id,   0.04% hi,   0.05% si
  %Cpu0   [                                 0.02%]
  %Cpu1   [                                 0.01%]
  %Cpu2   [|                                4.61%]
  %Cpu3   [                                 0.04%]
  %Cpu4   [                                 0.01%]
  %Cpu5   [|||||                           17.31%]
  %Cpu6   [                                 0.51%]
  %Cpu7   [|||                             11.42%]

        PID    %CPU           RUNTIME  COMMMAND
    ----------------------------------------------------
          0   99.98      17073.515 ms  swapper/4
          0   99.98      17072.173 ms  swapper/1
          0   99.93      17064.229 ms  swapper/3
          0   99.62      17011.013 ms  swapper/0
          0   99.47      16985.180 ms  swapper/6
          0   95.17      16250.874 ms  swapper/2
          0   88.51      15111.684 ms  swapper/7
          0   82.62      14108.577 ms  swapper/5
       4342   33.00       5644.045 ms  perf
       4344    0.43         74.351 ms  perf
         16    0.13         22.296 ms  rcu_preempt
       4345    0.05         10.093 ms  perf
       4343    0.05          8.769 ms  perf
       4341    0.02          4.882 ms  perf
       4095    0.02          4.605 ms  kworker/7:1
         75    0.02          4.261 ms  kworker/2:1
        120    0.01          1.909 ms  systemd-journal
         98    0.01          2.540 ms  jbd2/sda-8
         61    0.01          3.404 ms  kcompactd0
        667    0.01          2.542 ms  kworker/u16:2
       4340    0.00          1.052 ms  kworker/7:2
         97    0.00          0.489 ms  kworker/7:1H
         51    0.00          0.209 ms  ksoftirqd/7
         50    0.00          0.646 ms  migration/7
         76    0.00          0.753 ms  kworker/6:1
         45    0.00          0.572 ms  migration/6
         87    0.00          0.145 ms  kworker/5:1H
         73    0.00          0.596 ms  kworker/5:1
         41    0.00          0.041 ms  ksoftirqd/5
         40    0.00          0.718 ms  migration/5
         64    0.00          0.115 ms  kworker/4:1
         35    0.00          0.556 ms  migration/4
        353    0.00          2.600 ms  sshd
         74    0.00          0.205 ms  kworker/3:1
         33    0.00          1.576 ms  kworker/3:0H
         30    0.00          0.996 ms  migration/3
         26    0.00          1.665 ms  ksoftirqd/2
         25    0.00          0.662 ms  migration/2
        397    0.00          0.057 ms  kworker/1:1
         20    0.00          1.005 ms  migration/1
       2909    0.00          1.053 ms  kworker/0:2
         17    0.00          0.720 ms  migration/0
         15    0.00          0.039 ms  ksoftirqd/0

  # perf kwork top -n perf

  Total  : 136601.588 ms, 8 cpus
  %Cpu(s):  95.66% id,   0.04% hi,   0.05% si
  %Cpu0   [                                 0.01%]
  %Cpu1   [                                 0.00%]
  %Cpu2   [|                                4.44%]
  %Cpu3   [                                 0.00%]
  %Cpu4   [                                 0.00%]
  %Cpu5   [                                 0.00%]
  %Cpu6   [                                 0.49%]
  %Cpu7   [|||                             11.38%]

        PID    %CPU           RUNTIME  COMMMAND
    ----------------------------------------------------
       4342   15.74       2695.516 ms  perf
       4344    0.43         74.351 ms  perf
       4345    0.05         10.093 ms  perf
       4343    0.05          8.769 ms  perf
       4341    0.02          4.882 ms  perf

  # perf kwork top -s tid

  Total  : 136601.588 ms, 8 cpus
  %Cpu(s):  95.66% id,   0.04% hi,   0.05% si
  %Cpu0   [                                 0.02%]
  %Cpu1   [                                 0.01%]
  %Cpu2   [|                                4.61%]
  %Cpu3   [                                 0.04%]
  %Cpu4   [                                 0.01%]
  %Cpu5   [|||||                           17.31%]
  %Cpu6   [                                 0.51%]
  %Cpu7   [|||                             11.42%]

        PID    %CPU           RUNTIME  COMMMAND
    ----------------------------------------------------
          0   99.62      17011.013 ms  swapper/0
          0   99.98      17072.173 ms  swapper/1
          0   95.17      16250.874 ms  swapper/2
          0   99.93      17064.229 ms  swapper/3
          0   99.98      17073.515 ms  swapper/4
          0   82.62      14108.577 ms  swapper/5
          0   99.47      16985.180 ms  swapper/6
          0   88.51      15111.684 ms  swapper/7
         15    0.00          0.039 ms  ksoftirqd/0
         16    0.13         22.296 ms  rcu_preempt
         17    0.00          0.720 ms  migration/0
         20    0.00          1.005 ms  migration/1
         25    0.00          0.662 ms  migration/2
         26    0.00          1.665 ms  ksoftirqd/2
         30    0.00          0.996 ms  migration/3
         33    0.00          1.576 ms  kworker/3:0H
         35    0.00          0.556 ms  migration/4
         40    0.00          0.718 ms  migration/5
         41    0.00          0.041 ms  ksoftirqd/5
         45    0.00          0.572 ms  migration/6
         50    0.00          0.646 ms  migration/7
         51    0.00          0.209 ms  ksoftirqd/7
         61    0.01          3.404 ms  kcompactd0
         64    0.00          0.115 ms  kworker/4:1
         73    0.00          0.596 ms  kworker/5:1
         74    0.00          0.205 ms  kworker/3:1
         75    0.02          4.261 ms  kworker/2:1
         76    0.00          0.753 ms  kworker/6:1
         87    0.00          0.145 ms  kworker/5:1H
         97    0.00          0.489 ms  kworker/7:1H
         98    0.01          2.540 ms  jbd2/sda-8
        120    0.01          1.909 ms  systemd-journal
        353    0.00          2.600 ms  sshd
        397    0.00          0.057 ms  kworker/1:1
        667    0.01          2.542 ms  kworker/u16:2
       2909    0.00          1.053 ms  kworker/0:2
       4095    0.02          4.605 ms  kworker/7:1
       4340    0.00          1.052 ms  kworker/7:2
       4341    0.02          4.882 ms  perf
       4342   33.00       5644.045 ms  perf
       4343    0.05          8.769 ms  perf
       4344    0.43         74.351 ms  perf
       4345    0.05         10.093 ms  perf

  # perf kwork top --time 128800,

  Total  :  53495.122 ms, 8 cpus
  %Cpu(s):  94.71% id,   0.09% hi,   0.09% si
  %Cpu0   [                                 0.07%]
  %Cpu1   [                                 0.04%]
  %Cpu2   [||                               8.49%]
  %Cpu3   [                                 0.09%]
  %Cpu4   [                                 0.02%]
  %Cpu5   [                                 0.06%]
  %Cpu6   [                                 0.12%]
  %Cpu7   [||||||                          21.24%]

        PID    %CPU           RUNTIME  COMMMAND
    ----------------------------------------------------
          0   99.96       3981.363 ms  swapper/4
          0   99.94       3978.955 ms  swapper/1
          0   99.91       9329.375 ms  swapper/5
          0   99.87       4906.829 ms  swapper/3
          0   99.86       9028.064 ms  swapper/6
          0   98.67       3928.161 ms  swapper/0
          0   91.17       8388.432 ms  swapper/2
          0   78.65       7125.602 ms  swapper/7
       4342   29.42       2675.198 ms  perf
         16    0.18         16.817 ms  rcu_preempt
       4345    0.09          8.183 ms  perf
       4344    0.04          4.290 ms  perf
       4343    0.03          2.844 ms  perf
        353    0.03          2.600 ms  sshd
       4095    0.02          2.702 ms  kworker/7:1
        120    0.02          1.909 ms  systemd-journal
         98    0.02          2.540 ms  jbd2/sda-8
         61    0.02          1.886 ms  kcompactd0
        667    0.02          1.011 ms  kworker/u16:2
         75    0.02          2.693 ms  kworker/2:1
       4341    0.01          1.838 ms  perf
         30    0.01          0.788 ms  migration/3
         26    0.01          1.665 ms  ksoftirqd/2
         20    0.01          0.752 ms  migration/1
       2909    0.01          0.604 ms  kworker/0:2
       4340    0.00          0.635 ms  kworker/7:2
         97    0.00          0.214 ms  kworker/7:1H
         51    0.00          0.209 ms  ksoftirqd/7
         50    0.00          0.646 ms  migration/7
         76    0.00          0.602 ms  kworker/6:1
         45    0.00          0.366 ms  migration/6
         87    0.00          0.145 ms  kworker/5:1H
         40    0.00          0.446 ms  migration/5
         35    0.00          0.318 ms  migration/4
         74    0.00          0.205 ms  kworker/3:1
         33    0.00          0.080 ms  kworker/3:0H
         25    0.00          0.448 ms  migration/2
        397    0.00          0.057 ms  kworker/1:1
         17    0.00          0.365 ms  migration/0

Reviewed-by: Ian Rogers <irogers@google.com>
Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Sandipan Das <sandipan.das@amd.com>
Link: https://lore.kernel.org/r/20230812084917.169338-14-yangjihong1@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-kwork.txt
tools/perf/builtin-kwork.c

index 0601fcb0feea1fd8c1136142b1544dd557bb8b53..34d6c285e5271035049317c70502f3bf1ede598a 100644 (file)
@@ -178,6 +178,32 @@ OPTIONS for 'perf kwork timehist'
        stop time is not given (i.e, time string is 'x.y,') then analysis goes
        to end of file.
 
+OPTIONS for 'perf kwork top'
+---------------------------------
+
+-C::
+--cpu::
+       Only show events for the given CPU(s) (comma separated list).
+
+-i::
+--input::
+       Input file name. (default: perf.data unless stdin is a fifo)
+
+-n::
+--name::
+       Only show events for the given name.
+
+-s::
+--sort::
+       Sort by key(s): rate, runtime, tid
+
+--time::
+       Only analyze samples within given time window: <start>,<stop>. Times
+       have the format seconds.microseconds. If start is not given (i.e., time
+       string is ',x.y') then analysis starts at the beginning of the file. If
+       stop time is not given (i.e, time string is 'x.y,') then analysis goes
+       to end of file.
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
index c741cc1a543f9714fb40d0412ed13d16e06fc243..d5949ff4bd15a7208cc60c03093b20adfcbd2541 100644 (file)
@@ -146,6 +146,24 @@ static int cpu_usage_cmp(struct kwork_work *l, struct kwork_work *r)
        return 0;
 }
 
+static int id_or_cpu_r_cmp(struct kwork_work *l, struct kwork_work *r)
+{
+       if (l->id < r->id)
+               return 1;
+       if (l->id > r->id)
+               return -1;
+
+       if (l->id != 0)
+               return 0;
+
+       if (l->cpu < r->cpu)
+               return 1;
+       if (l->cpu > r->cpu)
+               return -1;
+
+       return 0;
+}
+
 static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
                               const char *tok, struct list_head *list)
 {
@@ -174,6 +192,10 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
                .name = "rate",
                .cmp  = cpu_usage_cmp,
        };
+       static struct sort_dimension tid_sort_dimension = {
+               .name = "tid",
+               .cmp  = id_or_cpu_r_cmp,
+       };
        struct sort_dimension *available_sorts[] = {
                &id_sort_dimension,
                &max_sort_dimension,
@@ -181,6 +203,7 @@ static int sort_dimension__add(struct perf_kwork *kwork __maybe_unused,
                &runtime_sort_dimension,
                &avg_sort_dimension,
                &rate_sort_dimension,
+               &tid_sort_dimension,
        };
 
        if (kwork->report == KWORK_REPORT_LATENCY)
@@ -381,6 +404,17 @@ static void profile_update_timespan(struct perf_kwork *kwork,
                kwork->timeend = sample->time;
 }
 
+static bool profile_name_match(struct perf_kwork *kwork,
+                              struct kwork_work *work)
+{
+       if (kwork->profile_name && work->name &&
+           (strcmp(work->name, kwork->profile_name) != 0)) {
+               return false;
+       }
+
+       return true;
+}
+
 static bool profile_event_match(struct perf_kwork *kwork,
                                struct kwork_work *work,
                                struct perf_sample *sample)
@@ -396,10 +430,14 @@ static bool profile_event_match(struct perf_kwork *kwork,
            ((ptime->end != 0) && (ptime->end < time)))
                return false;
 
-       if ((kwork->profile_name != NULL) &&
-           (work->name != NULL) &&
-           (strcmp(work->name, kwork->profile_name) != 0))
+       /*
+        * report top needs to collect the runtime of all tasks to
+        * calculate the load of each core.
+        */
+       if ((kwork->report != KWORK_REPORT_TOP) &&
+           !profile_name_match(kwork, work)) {
                return false;
+       }
 
        profile_update_timespan(kwork, sample);
        return true;
@@ -2070,6 +2108,9 @@ static void top_merge_tasks(struct perf_kwork *kwork)
                rb_erase_cached(node, &class->work_root);
                data = rb_entry(node, struct kwork_work, node);
 
+               if (!profile_name_match(kwork, data))
+                       continue;
+
                cpu = data->cpu;
                merged_work = find_work_by_id(&merged_root, data->id,
                                              data->id == 0 ? cpu : -1);
@@ -2329,6 +2370,16 @@ int cmd_kwork(int argc, const char **argv)
        OPT_PARENT(kwork_options)
        };
        const struct option top_options[] = {
+       OPT_STRING('s', "sort", &kwork.sort_order, "key[,key2...]",
+                  "sort by key(s): rate, runtime, tid"),
+       OPT_STRING('C', "cpu", &kwork.cpu_list, "cpu",
+                  "list of cpus to profile"),
+       OPT_STRING('n', "name", &kwork.profile_name, "name",
+                  "event name to profile"),
+       OPT_STRING(0, "time", &kwork.time_str, "str",
+                  "Time span for analysis (start,stop)"),
+       OPT_STRING('i', "input", &input_name, "file",
+                  "input file name"),
        OPT_PARENT(kwork_options)
        };
        const char *kwork_usage[] = {