perf kwork: Implement BPF trace
authorYang Jihong <yangjihong1@huawei.com>
Sat, 9 Jul 2022 01:50:30 +0000 (09:50 +0800)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Tue, 26 Jul 2022 19:31:54 +0000 (16:31 -0300)
'perf record' generates perf.data, which generates extra interrupts
for hard disk, amount of data to be collected increases with time.

Using eBPF trace can process the data in kernel, which solves the
preceding two problems.

Add -b/--use-bpf option for latency and report to support
tracing kwork events using eBPF:

1. Create bpf prog and attach to tracepoints,
2. Start tracing after command is entered,
3. After user hit "ctrl+c", stop tracing and report,
4. Support CPU and name filtering.

This commit implements the framework code and
does not add specific event support.

Test cases:

  # perf kwork rep -h

   Usage: perf kwork report [<options>]

      -b, --use-bpf         Use BPF to measure kwork runtime
      -C, --cpu <cpu>       list of cpus to profile
      -i, --input <file>    input file name
      -n, --name <name>     event name to profile
      -s, --sort <key[,key2...]>
                            sort by key(s): runtime, max, count
      -S, --with-summary    Show summary with statistics
          --time <str>      Time span for analysis (start,stop)

  # perf kwork lat -h

   Usage: perf kwork latency [<options>]

      -b, --use-bpf         Use BPF to measure kwork latency
      -C, --cpu <cpu>       list of cpus to profile
      -i, --input <file>    input file name
      -n, --name <name>     event name to profile
      -s, --sort <key[,key2...]>
                            sort by key(s): avg, max, count
          --time <str>      Time span for analysis (start,stop)

  # perf kwork lat -b
  Unsupported bpf trace class irq

  # perf kwork rep -b
  Unsupported bpf trace class irq

Signed-off-by: Yang Jihong <yangjihong1@huawei.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Clarke <pc@us.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lore.kernel.org/r/20220709015033.38326-15-yangjihong1@huawei.com
[ Simplify work_findnew() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/Documentation/perf-kwork.txt
tools/perf/Makefile.perf
tools/perf/builtin-kwork.c
tools/perf/util/Build
tools/perf/util/bpf_kwork.c [new file with mode: 0644]
tools/perf/util/bpf_skel/kwork_trace.bpf.c [new file with mode: 0644]
tools/perf/util/kwork.h

index 51c1625bacaee80247f3b210044efbf4f813225f..3c36324712b6e0feeb5b9386d58b2f5f4ba45876 100644 (file)
@@ -26,7 +26,9 @@ There are several variants of 'perf kwork':
     Example usage:
         perf kwork record -- sleep 1
         perf kwork report
+        perf kwork report -b
         perf kwork latency
+        perf kwork latency -b
         perf kwork timehist
 
    By default it shows the individual work events such as irq, workqeueu,
@@ -73,6 +75,10 @@ OPTIONS
 OPTIONS for 'perf kwork report'
 ----------------------------
 
+-b::
+--use-bpf::
+       Use BPF to measure kwork runtime
+
 -C::
 --cpu::
        Only show events for the given CPU(s) (comma separated list).
@@ -103,6 +109,10 @@ OPTIONS for 'perf kwork report'
 OPTIONS for 'perf kwork latency'
 ----------------------------
 
+-b::
+--use-bpf::
+       Use BPF to measure kwork latency
+
 -C::
 --cpu::
        Only show events for the given CPU(s) (comma separated list).
index 8f0b1fb39984fb7b75ee11d18307ca4f93c1c90c..d2083a247f7394ab13ec69ead91b977744ab92b4 100644 (file)
@@ -1029,6 +1029,7 @@ SKELETONS := $(SKEL_OUT)/bpf_prog_profiler.skel.h
 SKELETONS += $(SKEL_OUT)/bperf_leader.skel.h $(SKEL_OUT)/bperf_follower.skel.h
 SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h
 SKELETONS += $(SKEL_OUT)/off_cpu.skel.h
+SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h
 
 $(SKEL_TMP_OUT) $(LIBBPF_OUTPUT):
        $(Q)$(MKDIR) -p $@
index f1d773947627b600811e05da15c8987c28be4399..fb8c63656ad897674b34ec895784b145b39f2b20 100644 (file)
@@ -329,17 +329,15 @@ static struct kwork_work *work_findnew(struct rb_root_cached *root,
                                       struct kwork_work *key,
                                       struct list_head *sort_list)
 {
-       struct kwork_work *work = NULL;
+       struct kwork_work *work = work_search(root, key, sort_list);
 
-       work = work_search(root, key, sort_list);
        if (work != NULL)
                return work;
 
        work = work_new(key);
-       if (work == NULL)
-               return NULL;
+       if (work)
+               work_insert(root, work, sort_list);
 
-       work_insert(root, work, sort_list);
        return work;
 }
 
@@ -1429,13 +1427,69 @@ static void process_skipped_events(struct perf_kwork *kwork,
        }
 }
 
+struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+                                      struct kwork_class *class,
+                                      struct kwork_work *key)
+{
+       struct kwork_work *work = NULL;
+
+       work = work_new(key);
+       if (work == NULL)
+               return NULL;
+
+       work_insert(&class->work_root, work, &kwork->cmp_id);
+       return work;
+}
+
+static void sig_handler(int sig)
+{
+       /*
+        * Simply capture termination signal so that
+        * the program can continue after pause returns
+        */
+       pr_debug("Captuer signal %d\n", sig);
+}
+
+static int perf_kwork__report_bpf(struct perf_kwork *kwork)
+{
+       int ret;
+
+       signal(SIGINT, sig_handler);
+       signal(SIGTERM, sig_handler);
+
+       ret = perf_kwork__trace_prepare_bpf(kwork);
+       if (ret)
+               return -1;
+
+       printf("Starting trace, Hit <Ctrl+C> to stop and report\n");
+
+       perf_kwork__trace_start();
+
+       /*
+        * a simple pause, wait here for stop signal
+        */
+       pause();
+
+       perf_kwork__trace_finish();
+
+       perf_kwork__report_read_bpf(kwork);
+
+       perf_kwork__report_cleanup_bpf();
+
+       return 0;
+}
+
 static int perf_kwork__report(struct perf_kwork *kwork)
 {
        int ret;
        struct rb_node *next;
        struct kwork_work *work;
 
-       ret = perf_kwork__read_events(kwork);
+       if (kwork->use_bpf)
+               ret = perf_kwork__report_bpf(kwork);
+       else
+               ret = perf_kwork__read_events(kwork);
+
        if (ret != 0)
                return -1;
 
@@ -1668,6 +1722,10 @@ int cmd_kwork(int argc, const char **argv)
                   "input file name"),
        OPT_BOOLEAN('S', "with-summary", &kwork.summary,
                    "Show summary with statistics"),
+#ifdef HAVE_BPF_SKEL
+       OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf,
+                   "Use BPF to measure kwork runtime"),
+#endif
        OPT_PARENT(kwork_options)
        };
        const struct option latency_options[] = {
@@ -1681,6 +1739,10 @@ int cmd_kwork(int argc, const char **argv)
                   "Time span for analysis (start,stop)"),
        OPT_STRING('i', "input", &input_name, "file",
                   "input file name"),
+#ifdef HAVE_BPF_SKEL
+       OPT_BOOLEAN('b', "use-bpf", &kwork.use_bpf,
+                   "Use BPF to measure kwork latency"),
+#endif
        OPT_PARENT(kwork_options)
        };
        const struct option timehist_options[] = {
index a51267d88ca901ac1f85670c6252fbb75a58cca4..66ad30cf65ec34d135f5bb32025cb3d8c70f6579 100644 (file)
@@ -148,6 +148,7 @@ perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_counter_cgroup.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_ftrace.o
 perf-$(CONFIG_PERF_BPF_SKEL) += bpf_off_cpu.o
+perf-$(CONFIG_PERF_BPF_SKEL) += bpf_kwork.o
 perf-$(CONFIG_BPF_PROLOGUE) += bpf-prologue.o
 perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c
new file mode 100644 (file)
index 0000000..433bfad
--- /dev/null
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * bpf_kwork.c
+ *
+ * Copyright (c) 2022  Huawei Inc,  Yang Jihong <yangjihong1@huawei.com>
+ */
+
+#include <time.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <linux/time64.h>
+
+#include "util/debug.h"
+#include "util/kwork.h"
+
+#include <bpf/bpf.h>
+
+#include "util/bpf_skel/kwork_trace.skel.h"
+
+/*
+ * This should be in sync with "util/kwork_trace.bpf.c"
+ */
+#define MAX_KWORKNAME 128
+
+struct work_key {
+       u32 type;
+       u32 cpu;
+       u64 id;
+};
+
+struct report_data {
+       u64 nr;
+       u64 total_time;
+       u64 max_time;
+       u64 max_time_start;
+       u64 max_time_end;
+};
+
+struct kwork_class_bpf {
+       struct kwork_class *class;
+
+       void (*load_prepare)(struct perf_kwork *kwork);
+       int  (*get_work_name)(struct work_key *key, char **ret_name);
+};
+
+static struct kwork_trace_bpf *skel;
+
+static struct timespec ts_start;
+static struct timespec ts_end;
+
+void perf_kwork__trace_start(void)
+{
+       clock_gettime(CLOCK_MONOTONIC, &ts_start);
+       skel->bss->enabled = 1;
+}
+
+void perf_kwork__trace_finish(void)
+{
+       clock_gettime(CLOCK_MONOTONIC, &ts_end);
+       skel->bss->enabled = 0;
+}
+
+static struct kwork_class_bpf *
+kwork_class_bpf_supported_list[KWORK_CLASS_MAX] = {
+       [KWORK_CLASS_IRQ]       = NULL,
+       [KWORK_CLASS_SOFTIRQ]   = NULL,
+       [KWORK_CLASS_WORKQUEUE] = NULL,
+};
+
+static bool valid_kwork_class_type(enum kwork_class_type type)
+{
+       return type >= 0 && type < KWORK_CLASS_MAX ? true : false;
+}
+
+static int setup_filters(struct perf_kwork *kwork)
+{
+       u8 val = 1;
+       int i, nr_cpus, key, fd;
+       struct perf_cpu_map *map;
+
+       if (kwork->cpu_list != NULL) {
+               fd = bpf_map__fd(skel->maps.perf_kwork_cpu_filter);
+               if (fd < 0) {
+                       pr_debug("Invalid cpu filter fd\n");
+                       return -1;
+               }
+
+               map = perf_cpu_map__new(kwork->cpu_list);
+               if (map == NULL) {
+                       pr_debug("Invalid cpu_list\n");
+                       return -1;
+               }
+
+               nr_cpus = libbpf_num_possible_cpus();
+               for (i = 0; i < perf_cpu_map__nr(map); i++) {
+                       struct perf_cpu cpu = perf_cpu_map__cpu(map, i);
+
+                       if (cpu.cpu >= nr_cpus) {
+                               perf_cpu_map__put(map);
+                               pr_err("Requested cpu %d too large\n", cpu.cpu);
+                               return -1;
+                       }
+                       bpf_map_update_elem(fd, &cpu.cpu, &val, BPF_ANY);
+               }
+               perf_cpu_map__put(map);
+
+               skel->bss->has_cpu_filter = 1;
+       }
+
+       if (kwork->profile_name != NULL) {
+               if (strlen(kwork->profile_name) >= MAX_KWORKNAME) {
+                       pr_err("Requested name filter %s too large, limit to %d\n",
+                              kwork->profile_name, MAX_KWORKNAME - 1);
+                       return -1;
+               }
+
+               fd = bpf_map__fd(skel->maps.perf_kwork_name_filter);
+               if (fd < 0) {
+                       pr_debug("Invalid name filter fd\n");
+                       return -1;
+               }
+
+               key = 0;
+               bpf_map_update_elem(fd, &key, kwork->profile_name, BPF_ANY);
+
+               skel->bss->has_name_filter = 1;
+       }
+
+       return 0;
+}
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork)
+{
+       struct bpf_program *prog;
+       struct kwork_class *class;
+       struct kwork_class_bpf *class_bpf;
+       enum kwork_class_type type;
+
+       skel = kwork_trace_bpf__open();
+       if (!skel) {
+               pr_debug("Failed to open kwork trace skeleton\n");
+               return -1;
+       }
+
+       /*
+        * set all progs to non-autoload,
+        * then set corresponding progs according to config
+        */
+       bpf_object__for_each_program(prog, skel->obj)
+               bpf_program__set_autoload(prog, false);
+
+       list_for_each_entry(class, &kwork->class_list, list) {
+               type = class->type;
+               if (!valid_kwork_class_type(type) ||
+                   (kwork_class_bpf_supported_list[type] == NULL)) {
+                       pr_err("Unsupported bpf trace class %s\n", class->name);
+                       goto out;
+               }
+
+               class_bpf = kwork_class_bpf_supported_list[type];
+               class_bpf->class = class;
+
+               if (class_bpf->load_prepare != NULL)
+                       class_bpf->load_prepare(kwork);
+       }
+
+       if (kwork_trace_bpf__load(skel)) {
+               pr_debug("Failed to load kwork trace skeleton\n");
+               goto out;
+       }
+
+       if (setup_filters(kwork))
+               goto out;
+
+       if (kwork_trace_bpf__attach(skel)) {
+               pr_debug("Failed to attach kwork trace skeleton\n");
+               goto out;
+       }
+
+       return 0;
+
+out:
+       kwork_trace_bpf__destroy(skel);
+       return -1;
+}
+
+static int add_work(struct perf_kwork *kwork,
+                   struct work_key *key,
+                   struct report_data *data)
+{
+       struct kwork_work *work;
+       struct kwork_class_bpf *bpf_trace;
+       struct kwork_work tmp = {
+               .id = key->id,
+               .name = NULL,
+               .cpu = key->cpu,
+       };
+       enum kwork_class_type type = key->type;
+
+       if (!valid_kwork_class_type(type)) {
+               pr_debug("Invalid class type %d to add work\n", type);
+               return -1;
+       }
+
+       bpf_trace = kwork_class_bpf_supported_list[type];
+       tmp.class = bpf_trace->class;
+
+       if ((bpf_trace->get_work_name != NULL) &&
+           (bpf_trace->get_work_name(key, &tmp.name)))
+               return -1;
+
+       work = perf_kwork_add_work(kwork, tmp.class, &tmp);
+       if (work == NULL)
+               return -1;
+
+       if (kwork->report == KWORK_REPORT_RUNTIME) {
+               work->nr_atoms = data->nr;
+               work->total_runtime = data->total_time;
+               work->max_runtime = data->max_time;
+               work->max_runtime_start = data->max_time_start;
+               work->max_runtime_end = data->max_time_end;
+       } else if (kwork->report == KWORK_REPORT_LATENCY) {
+               work->nr_atoms = data->nr;
+               work->total_latency = data->total_time;
+               work->max_latency = data->max_time;
+               work->max_latency_start = data->max_time_start;
+               work->max_latency_end = data->max_time_end;
+       } else {
+               pr_debug("Invalid bpf report type %d\n", kwork->report);
+               return -1;
+       }
+
+       kwork->timestart = (u64)ts_start.tv_sec * NSEC_PER_SEC + ts_start.tv_nsec;
+       kwork->timeend = (u64)ts_end.tv_sec * NSEC_PER_SEC + ts_end.tv_nsec;
+
+       return 0;
+}
+
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork)
+{
+       struct report_data data;
+       struct work_key key = {
+               .type = 0,
+               .cpu  = 0,
+               .id   = 0,
+       };
+       struct work_key prev = {
+               .type = 0,
+               .cpu  = 0,
+               .id   = 0,
+       };
+       int fd = bpf_map__fd(skel->maps.perf_kwork_report);
+
+       if (fd < 0) {
+               pr_debug("Invalid report fd\n");
+               return -1;
+       }
+
+       while (!bpf_map_get_next_key(fd, &prev, &key)) {
+               if ((bpf_map_lookup_elem(fd, &key, &data)) != 0) {
+                       pr_debug("Failed to lookup report elem\n");
+                       return -1;
+               }
+
+               if ((data.nr != 0) && (add_work(kwork, &key, &data) != 0))
+                       return -1;
+
+               prev = key;
+       }
+       return 0;
+}
+
+void perf_kwork__report_cleanup_bpf(void)
+{
+       kwork_trace_bpf__destroy(skel);
+}
diff --git a/tools/perf/util/bpf_skel/kwork_trace.bpf.c b/tools/perf/util/bpf_skel/kwork_trace.bpf.c
new file mode 100644 (file)
index 0000000..36112be
--- /dev/null
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (c) 2022, Huawei
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define KWORK_COUNT 100
+#define MAX_KWORKNAME 128
+
+/*
+ * This should be in sync with "util/kwork.h"
+ */
+enum kwork_class_type {
+       KWORK_CLASS_IRQ,
+       KWORK_CLASS_SOFTIRQ,
+       KWORK_CLASS_WORKQUEUE,
+       KWORK_CLASS_MAX,
+};
+
+struct work_key {
+       __u32 type;
+       __u32 cpu;
+       __u64 id;
+};
+
+struct report_data {
+       __u64 nr;
+       __u64 total_time;
+       __u64 max_time;
+       __u64 max_time_start;
+       __u64 max_time_end;
+};
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(struct work_key));
+       __uint(value_size, MAX_KWORKNAME);
+       __uint(max_entries, KWORK_COUNT);
+} perf_kwork_names SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(struct work_key));
+       __uint(value_size, sizeof(__u64));
+       __uint(max_entries, KWORK_COUNT);
+} perf_kwork_time SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(struct work_key));
+       __uint(value_size, sizeof(struct report_data));
+       __uint(max_entries, KWORK_COUNT);
+} perf_kwork_report SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_HASH);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, sizeof(__u8));
+       __uint(max_entries, 1);
+} perf_kwork_cpu_filter SEC(".maps");
+
+struct {
+       __uint(type, BPF_MAP_TYPE_ARRAY);
+       __uint(key_size, sizeof(__u32));
+       __uint(value_size, MAX_KWORKNAME);
+       __uint(max_entries, 1);
+} perf_kwork_name_filter SEC(".maps");
+
+int enabled = 0;
+int has_cpu_filter = 0;
+int has_name_filter = 0;
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
index 6a06194304b83e8398aed7ddbef9915655c5c6e9..320c0a6d2e086c23eb870f36013ec9ff9089cb76 100644 (file)
@@ -203,6 +203,7 @@ struct perf_kwork {
        const char *sort_order;
        bool show_callchain;
        unsigned int max_stack;
+       bool use_bpf;
 
        /*
         * statistics
@@ -219,4 +220,38 @@ struct perf_kwork {
        u64 nr_skipped_events[KWORK_TRACE_MAX + 1];
 };
 
+struct kwork_work *perf_kwork_add_work(struct perf_kwork *kwork,
+                                      struct kwork_class *class,
+                                      struct kwork_work *key);
+
+#ifdef HAVE_BPF_SKEL
+
+int perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork);
+int perf_kwork__report_read_bpf(struct perf_kwork *kwork);
+void perf_kwork__report_cleanup_bpf(void);
+
+void perf_kwork__trace_start(void);
+void perf_kwork__trace_finish(void);
+
+#else  /* !HAVE_BPF_SKEL */
+
+static inline int
+perf_kwork__trace_prepare_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+       return -1;
+}
+
+static inline int
+perf_kwork__report_read_bpf(struct perf_kwork *kwork __maybe_unused)
+{
+       return -1;
+}
+
+static inline void perf_kwork__report_cleanup_bpf(void) {}
+
+static inline void perf_kwork__trace_start(void) {}
+static inline void perf_kwork__trace_finish(void) {}
+
+#endif  /* HAVE_BPF_SKEL */
+
 #endif  /* PERF_UTIL_KWORK_H */