bpftool: Add {i,d}tlb_misses support for bpftool profile
authorYonghong Song <yhs@fb.com>
Thu, 19 Nov 2020 07:30:39 +0000 (23:30 -0800)
committerDaniel Borkmann <daniel@iogearbox.net>
Fri, 20 Nov 2020 14:50:38 +0000 (15:50 +0100)
Commit 47c09d6a9f67("bpftool: Introduce "prog profile" command")
introduced "bpftool prog profile" command which can be used
to profile bpf program with metrics like # of instructions,

This patch added support for itlb_misses and dtlb_misses.
During an internal bpf program performance evaluation,
I found these two metrics are also very useful. The following
is an example output:

 $ bpftool prog profile id 324 duration 3 cycles itlb_misses

           1885029 run_cnt
        5134686073 cycles
            306893 itlb_misses

 $ bpftool prog profile id 324 duration 3 cycles dtlb_misses

           1827382 run_cnt
        4943593648 cycles
           5975636 dtlb_misses

 $ bpftool prog profile id 324 duration 3 cycles llc_misses

           1836527 run_cnt
        5019612972 cycles
           4161041 llc_misses

From the above, we can see quite some dtlb misses, 3 dtlb misses
perf prog run. This might be something worth further investigation.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20201119073039.4060095-1-yhs@fb.com
tools/bpf/bpftool/prog.c

index acdb2c245f0a4643e6deb3f77801aba5cfab6ce9..1fe3ba255badd1697a95015220eb4543bf6b27f1 100644 (file)
@@ -1717,6 +1717,34 @@ struct profile_metric {
                .ratio_desc = "LLC misses per million insns",
                .ratio_mul = 1e6,
        },
+       {
+               .name = "itlb_misses",
+               .attr = {
+                       .type = PERF_TYPE_HW_CACHE,
+                       .config =
+                               PERF_COUNT_HW_CACHE_ITLB |
+                               (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                               (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+                       .exclude_user = 1
+               },
+               .ratio_metric = 2,
+               .ratio_desc = "itlb misses per million insns",
+               .ratio_mul = 1e6,
+       },
+       {
+               .name = "dtlb_misses",
+               .attr = {
+                       .type = PERF_TYPE_HW_CACHE,
+                       .config =
+                               PERF_COUNT_HW_CACHE_DTLB |
+                               (PERF_COUNT_HW_CACHE_OP_READ << 8) |
+                               (PERF_COUNT_HW_CACHE_RESULT_MISS << 16),
+                       .exclude_user = 1
+               },
+               .ratio_metric = 2,
+               .ratio_desc = "dtlb misses per million insns",
+               .ratio_mul = 1e6,
+       },
 };
 
 static __u64 profile_total_count;
@@ -2109,7 +2137,7 @@ static int do_help(int argc, char **argv)
                "                 struct_ops | fentry | fexit | freplace | sk_lookup }\n"
                "       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
                "                        flow_dissector }\n"
-               "       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
+               "       METRIC := { cycles | instructions | l1d_loads | llc_misses | itlb_misses | dtlb_misses }\n"
                "       " HELP_SPEC_OPTIONS "\n"
                "",
                bin_name, argv[-2]);