perf record: Handle argument change in sched_switch
authorNamhyung Kim <namhyung@kernel.org>
Wed, 18 May 2022 22:47:23 +0000 (15:47 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Thu, 26 May 2022 15:36:57 +0000 (12:36 -0300)
Recently sched_switch tracepoint added a new argument for prev_state,
but it's hard to handle the change in a BPF program.  Instead, we can
check the function prototype in BTF before loading the program.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Ian Rogers <irogers@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Blake Jones <blakejones@google.com>
Cc: Hao Luo <haoluo@google.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Milian Wolff <milian.wolff@kdab.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Song Liu <songliubraving@fb.com>
Cc: bpf@vger.kernel.org
Link: https://lore.kernel.org/r/20220518224725.742882-5-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
tools/perf/util/bpf_off_cpu.c
tools/perf/util/bpf_skel/off_cpu.bpf.c

index b5e2d038da50dd83911c7f0e1f1d5d960a019eac..874856c55101da78f947d7b62202bfac037c043b 100644 (file)
@@ -89,6 +89,33 @@ static void off_cpu_finish(void *arg __maybe_unused)
        off_cpu_bpf__destroy(skel);
 }
 
+/* v5.18 kernel added prev_state arg, so it needs to check the signature */
+static void check_sched_switch_args(void)
+{
+       const struct btf *btf = bpf_object__btf(skel->obj);
+       const struct btf_type *t1, *t2, *t3;
+       u32 type_id;
+
+       type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+                                        BTF_KIND_TYPEDEF);
+       if ((s32)type_id < 0)
+               return;
+
+       t1 = btf__type_by_id(btf, type_id);
+       if (t1 == NULL)
+               return;
+
+       t2 = btf__type_by_id(btf, t1->type);
+       if (t2 == NULL || !btf_is_ptr(t2))
+               return;
+
+       t3 = btf__type_by_id(btf, t2->type);
+       if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
+               /* new format: pass prev_state as 4th arg */
+               skel->rodata->has_prev_state = true;
+       }
+}
+
 int off_cpu_prepare(struct evlist *evlist, struct target *target)
 {
        int err, fd, i;
@@ -117,6 +144,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target)
        }
 
        set_max_rlimit();
+       check_sched_switch_args();
 
        err = off_cpu_bpf__load(skel);
        if (err) {
index 78cdcc8ff8635e10aa1c4d716f64221930b21060..986d7db6e75d22f7db9020c5c0f1ff7fa6041ed6 100644 (file)
@@ -72,6 +72,8 @@ int enabled = 0;
 int has_cpu = 0;
 int has_task = 0;
 
+const volatile bool has_prev_state = false;
+
 /*
  * Old kernel used to call it task_struct->state and now it's '__state'.
  * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state)
        return 1;
 }
 
-SEC("tp_btf/sched_switch")
-int on_switch(u64 *ctx)
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+                       struct task_struct *next, int state)
 {
        __u64 ts;
-       int state;
        __u32 stack_id;
-       struct task_struct *prev, *next;
        struct tstamp_data *pelem;
 
-       if (!enabled)
-               return 0;
-
-       prev = (struct task_struct *)ctx[1];
-       next = (struct task_struct *)ctx[2];
-       state = get_task_state(prev);
-
        ts = bpf_ktime_get_ns();
 
        if (!can_record(prev, state))
@@ -180,4 +173,24 @@ next:
        return 0;
 }
 
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+       struct task_struct *prev, *next;
+       int prev_state;
+
+       if (!enabled)
+               return 0;
+
+       prev = (struct task_struct *)ctx[1];
+       next = (struct task_struct *)ctx[2];
+
+       if (has_prev_state)
+               prev_state = (int)ctx[3];
+       else
+               prev_state = get_task_state(prev);
+
+       return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";