perf pmus: Allow just core PMU scanning
authorIan Rogers <irogers@google.com>
Sat, 27 May 2023 07:22:05 +0000 (00:22 -0700)
committerArnaldo Carvalho de Melo <acme@redhat.com>
Sat, 27 May 2023 12:42:00 +0000 (09:42 -0300)
Scanning all PMUs is expensive as all PMUs sysfs entries are loaded,
benchmarking shows more than 4x the cost:

```
$ perf bench internals pmu-scan -i 1000
Computing performance of sysfs PMU event scan for 1000 times
  Average core PMU scanning took: 989.231 usec (+- 1.535 usec)
  Average PMU scanning took: 4309.425 usec (+- 74.322 usec)
```

Add new perf_pmus__scan_core routine that scans just core
PMUs. Replace perf_pmus__scan calls with perf_pmus__scan_core when
non-core PMUs are being ignored.

Reviewed-by: Kan Liang <kan.liang@linux.intel.com>
Signed-off-by: Ian Rogers <irogers@google.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Ali Saidi <alisaidi@amazon.com>
Cc: Athira Rajeev <atrajeev@linux.vnet.ibm.com>
Cc: Dmitrii Dolgov <9erthalion6@gmail.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: James Clark <james.clark@arm.com>
Cc: Jing Zhang <renyu.zj@linux.alibaba.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: John Garry <john.g.garry@oracle.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Kang Minchul <tegongkang@gmail.com>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Madhavan Srinivasan <maddy@linux.ibm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Ming Wang <wangming01@loongson.cn>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: Rob Herring <robh@kernel.org>
Cc: Sandipan Das <sandipan.das@amd.com>
Cc: Sean Christopherson <seanjc@google.com>
Cc: Suzuki Poulouse <suzuki.poulose@arm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Xing Zhengjun <zhengjun.xing@linux.intel.com>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: https://lore.kernel.org/r/20230527072210.2900565-30-irogers@google.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
14 files changed:
tools/perf/arch/arm64/util/pmu.c
tools/perf/arch/x86/util/evlist.c
tools/perf/arch/x86/util/perf_regs.c
tools/perf/bench/pmu-scan.c
tools/perf/tests/pmu-events.c
tools/perf/util/cputopo.c
tools/perf/util/header.c
tools/perf/util/mem-events.c
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/pmus.c
tools/perf/util/pmus.h
tools/perf/util/print-events.c

index 2504d43a39a7b963921723f4b7ee105fca6d2f25..561de0cb6b95880dfb0af23b9c769a27366846e1 100644 (file)
@@ -11,10 +11,7 @@ static struct perf_pmu *pmu__find_core_pmu(void)
 {
        struct perf_pmu *pmu = NULL;
 
-       while ((pmu = perf_pmus__scan(pmu))) {
-               if (!is_pmu_core(pmu->name))
-                       continue;
-
+       while ((pmu = perf_pmus__scan_core(pmu))) {
                /*
                 * The cpumap should cover all CPUs. Otherwise, some CPUs may
                 * not support some events or have different event IDs.
index 03240c640c7fa06653331d839a813194be393cc6..8a6a0b98b9763e3b9b74f44a2b4b388cce7bd999 100644 (file)
@@ -33,13 +33,10 @@ static int ___evlist__add_default_attrs(struct evlist *evlist,
                        continue;
                }
 
-               while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+               while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                        struct perf_cpu_map *cpus;
                        struct evsel *evsel;
 
-                       if (!pmu->is_core)
-                               continue;
-
                        evsel = evsel__new(attrs + i);
                        if (evsel == NULL)
                                goto out_delete_partial_list;
index befa7f3659b9e4b56f782b07c9abee090592a576..116384f19baf1887778461cc39cd845aaf99dba8 100644 (file)
@@ -300,11 +300,9 @@ uint64_t arch__intr_reg_mask(void)
                 * The same register set is supported among different hybrid PMUs.
                 * Only check the first available one.
                 */
-               while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                       if (pmu->is_core) {
-                               type = pmu->type;
-                               break;
-                       }
+               while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+                       type = pmu->type;
+                       break;
                }
                attr.config |= type << PERF_PMU_TYPE_SHIFT;
        }
index 51cae2d0335302d724c43a703820241660e90885..c7d207f8e13c2309587e00fd2e4e670df79f7159 100644 (file)
@@ -22,6 +22,7 @@ struct pmu_scan_result {
        int nr_aliases;
        int nr_formats;
        int nr_caps;
+       bool is_core;
 };
 
 static const struct option options[] = {
@@ -53,6 +54,7 @@ static int save_result(void)
                r = results + nr_pmus;
 
                r->name = strdup(pmu->name);
+               r->is_core = pmu->is_core;
                r->nr_caps = pmu->nr_caps;
 
                r->nr_aliases = 0;
@@ -72,7 +74,7 @@ static int save_result(void)
        return 0;
 }
 
-static int check_result(void)
+static int check_result(bool core_only)
 {
        struct pmu_scan_result *r;
        struct perf_pmu *pmu;
@@ -81,6 +83,9 @@ static int check_result(void)
 
        for (int i = 0; i < nr_pmus; i++) {
                r = &results[i];
+               if (core_only && !r->is_core)
+                       continue;
+
                pmu = perf_pmus__find(r->name);
                if (pmu == NULL) {
                        pr_err("Cannot find PMU %s\n", r->name);
@@ -130,7 +135,6 @@ static int run_pmu_scan(void)
        struct timeval start, end, diff;
        double time_average, time_stddev;
        u64 runtime_us;
-       unsigned int i;
        int ret;
 
        init_stats(&stats);
@@ -142,26 +146,30 @@ static int run_pmu_scan(void)
                return -1;
        }
 
-       for (i = 0; i < iterations; i++) {
-               gettimeofday(&start, NULL);
-               perf_pmus__scan(NULL);
-               gettimeofday(&end, NULL);
-
-               timersub(&end, &start, &diff);
-               runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
-               update_stats(&stats, runtime_us);
-
-               ret = check_result();
-               perf_pmus__destroy();
-               if (ret < 0)
-                       break;
+       for (int j = 0; j < 2; j++) {
+               bool core_only = (j == 0);
+
+               for (unsigned int i = 0; i < iterations; i++) {
+                       gettimeofday(&start, NULL);
+                       if (core_only)
+                               perf_pmus__scan_core(NULL);
+                       else
+                               perf_pmus__scan(NULL);
+                       gettimeofday(&end, NULL);
+                       timersub(&end, &start, &diff);
+                       runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
+                       update_stats(&stats, runtime_us);
+
+                       ret = check_result(core_only);
+                       perf_pmus__destroy();
+                       if (ret < 0)
+                               break;
+               }
+               time_average = avg_stats(&stats);
+               time_stddev = stddev_stats(&stats);
+               pr_info("  Average%s PMU scanning took: %.3f usec (+- %.3f usec)\n",
+                       core_only ? " core" : "", time_average, time_stddev);
        }
-
-       time_average = avg_stats(&stats);
-       time_stddev = stddev_stats(&stats);
-       pr_info("  Average PMU scanning took: %.3f usec (+- %.3f usec)\n",
-               time_average, time_stddev);
-
        delete_result();
        return 0;
 }
index 64ecb7845af484e4c7c3b366bfb264fe2268838d..64383fc34ef1b5b02eab5405593c4a52edc7cad6 100644 (file)
@@ -709,12 +709,9 @@ static int test__aliases(struct test_suite *test __maybe_unused,
        struct perf_pmu *pmu = NULL;
        unsigned long i;
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                int count = 0;
 
-               if (!is_pmu_core(pmu->name))
-                       continue;
-
                if (list_empty(&pmu->format)) {
                        pr_debug2("skipping testing core PMU %s\n", pmu->name);
                        continue;
index 4578c26747e1bf4de46e3926975a97103abd7224..729142ec9a9adca977499240efba5edf9eb5b3a6 100644 (file)
@@ -477,10 +477,9 @@ struct hybrid_topology *hybrid_topology__new(void)
        if (!perf_pmus__has_hybrid())
                return NULL;
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               if (pmu->is_core)
-                       nr++;
-       }
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
+               nr++;
+
        if (nr == 0)
                return NULL;
 
@@ -489,10 +488,7 @@ struct hybrid_topology *hybrid_topology__new(void)
                return NULL;
 
        tp->nr = nr;
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               if (!pmu->is_core)
-                       continue;
-
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                if (load_hybrid_node(&tp->nodes[i], pmu)) {
                        hybrid_topology__delete(tp);
                        return NULL;
index e6d8ecd7a08e6aae5f2529f3fa071fd2e083e8ea..2dde3ca20de53a72d8415e9ca42bbdb7c14963c1 100644 (file)
@@ -1607,10 +1607,7 @@ static int write_pmu_caps(struct feat_fd *ff,
         */
        if (perf_pmus__has_hybrid()) {
                pmu = NULL;
-               while ((pmu = perf_pmus__scan(pmu))) {
-                       if (!pmu->is_core)
-                               continue;
-
+               while ((pmu = perf_pmus__scan_core(pmu))) {
                        ret = __write_pmu_caps(ff, pmu, true);
                        if (ret < 0)
                                return ret;
index 08ac3ea2e366f8fcbaaa8c0730ebdcc55f43c6b3..c5596230a3082e8f8f88ec5caa996ef5e4aac70b 100644 (file)
@@ -136,10 +136,7 @@ int perf_mem_events__init(void)
                } else {
                        struct perf_pmu *pmu = NULL;
 
-                       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                               if (!pmu->is_core)
-                                       continue;
-
+                       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                                scnprintf(sysfs_name, sizeof(sysfs_name),
                                          e->sysfs_name, pmu->name);
                                e->supported |= perf_mem_event__supported(mnt, sysfs_name);
@@ -176,10 +173,7 @@ static void perf_mem_events__print_unsupport_hybrid(struct perf_mem_event *e,
        char sysfs_name[100];
        struct perf_pmu *pmu = NULL;
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               if (!pmu->is_core)
-                       continue;
-
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                scnprintf(sysfs_name, sizeof(sysfs_name), e->sysfs_name,
                          pmu->name);
                if (!perf_mem_event__supported(mnt, sysfs_name)) {
@@ -217,9 +211,7 @@ int perf_mem_events__record_args(const char **rec_argv, int *argv_nr,
                                return -1;
                        }
 
-                       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                               if (!pmu->is_core)
-                                       continue;
+                       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                                rec_argv[i++] = "-e";
                                s = perf_mem_events__name(j, pmu->name);
                                if (s) {
index be544f948be22671908d6cb7578ddc974a61ba93..e0c3f2037477379f8e5de103291639f81c368c09 100644 (file)
@@ -453,15 +453,12 @@ int parse_events_add_cache(struct list_head *list, int *idx, const char *name,
        const char *config_name = get_config_name(head_config);
        const char *metric_id = get_config_metric_id(head_config);
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+       /* Legacy cache events are only supported by core PMUs. */
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                LIST_HEAD(config_terms);
                struct perf_event_attr attr;
                int ret;
 
-               /* Skip unsupported PMUs. */
-               if (!perf_pmu__supports_legacy_cache(pmu))
-                       continue;
-
                if (parse_events__filter_pmu(parse_state, pmu))
                        continue;
 
@@ -1481,12 +1478,10 @@ int parse_events_add_numeric(struct parse_events_state *parse_state,
                return __parse_events_add_numeric(parse_state, list, /*pmu=*/NULL,
                                                  type, config, head_config);
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
+       /* Wildcards on numeric values are only supported by core PMUs. */
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                int ret;
 
-               if (!perf_pmu__supports_wildcard_numeric(pmu))
-                       continue;
-
                if (parse_events__filter_pmu(parse_state, pmu))
                        continue;
 
index 05056305fb58aef52f65846524e25490ac8df6d0..7102084dd3aad2992cdd76d3d8b2235c1e4d04e1 100644 (file)
@@ -1427,21 +1427,11 @@ bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu)
        return pmu->is_core;
 }
 
-bool perf_pmu__supports_wildcard_numeric(const struct perf_pmu *pmu)
-{
-       return pmu->is_core;
-}
-
 bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu)
 {
        return !is_pmu_hybrid(pmu->name);
 }
 
-bool perf_pmu__is_mem_pmu(const struct perf_pmu *pmu)
-{
-       return pmu->is_core;
-}
-
 bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name)
 {
        struct perf_pmu_alias *alias;
index f1f3e8a2e00ebb67167518bc93500d0dbb9aaf04..02fec0a7d4c82eb7b4a5949a54fb5d8347325a2a 100644 (file)
@@ -223,9 +223,7 @@ void perf_pmu__del_formats(struct list_head *formats);
 bool is_pmu_core(const char *name);
 bool is_pmu_hybrid(const char *name);
 bool perf_pmu__supports_legacy_cache(const struct perf_pmu *pmu);
-bool perf_pmu__supports_wildcard_numeric(const struct perf_pmu *pmu);
 bool perf_pmu__auto_merge_stats(const struct perf_pmu *pmu);
-bool perf_pmu__is_mem_pmu(const struct perf_pmu *pmu);
 bool perf_pmu__have_event(const struct perf_pmu *pmu, const char *name);
 
 FILE *perf_pmu__open_file(struct perf_pmu *pmu, const char *name);
index 4ef4fecd335fd03e7d1f89ede3348258a9ac7221..de7fc36519c99f10a299d1d3c1aea1e25a4f0bb9 100644 (file)
@@ -87,7 +87,7 @@ static struct perf_pmu *perf_pmu__find2(int dirfd, const char *name)
 }
 
 /* Add all pmus in sysfs to pmu list: */
-static void pmu_read_sysfs(void)
+static void pmu_read_sysfs(bool core_only)
 {
        int fd;
        DIR *dir;
@@ -104,6 +104,8 @@ static void pmu_read_sysfs(void)
        while ((dent = readdir(dir))) {
                if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, ".."))
                        continue;
+               if (core_only && !is_pmu_core(dent->d_name))
+                       continue;
                /* add to static LIST_HEAD(core_pmus) or LIST_HEAD(other_pmus): */
                perf_pmu__find2(fd, dent->d_name);
        }
@@ -135,7 +137,7 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
        bool use_core_pmus = !pmu || pmu->is_core;
 
        if (!pmu) {
-               pmu_read_sysfs();
+               pmu_read_sysfs(/*core_only=*/false);
                pmu = list_prepare_entry(pmu, &core_pmus, list);
        }
        if (use_core_pmus) {
@@ -150,6 +152,18 @@ struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu)
        return NULL;
 }
 
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu)
+{
+       if (!pmu) {
+               pmu_read_sysfs(/*core_only=*/true);
+               pmu = list_prepare_entry(pmu, &core_pmus, list);
+       }
+       list_for_each_entry_continue(pmu, &core_pmus, list)
+               return pmu;
+
+       return NULL;
+}
+
 const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str)
 {
        struct perf_pmu *pmu = NULL;
@@ -176,10 +190,10 @@ int perf_pmus__num_mem_pmus(void)
        struct perf_pmu *pmu = NULL;
        int count = 0;
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               if (perf_pmu__is_mem_pmu(pmu))
-                       count++;
-       }
+       /* All core PMUs are for mem events. */
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL)
+               count++;
+
        return count;
 }
 
@@ -421,8 +435,8 @@ bool perf_pmus__has_hybrid(void)
        if (!hybrid_scanned) {
                struct perf_pmu *pmu = NULL;
 
-               while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-                       if (pmu->is_core && is_pmu_hybrid(pmu->name)) {
+               while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
+                       if (is_pmu_hybrid(pmu->name)) {
                                has_hybrid = true;
                                break;
                        }
index 2a771d9f8da7a97be487ff3f567cacde69221902..9de0222ed52bc88650d9e8ae3c9c9be04b77f11f 100644 (file)
@@ -11,6 +11,7 @@ struct perf_pmu *perf_pmus__find(const char *name);
 struct perf_pmu *perf_pmus__find_by_type(unsigned int type);
 
 struct perf_pmu *perf_pmus__scan(struct perf_pmu *pmu);
+struct perf_pmu *perf_pmus__scan_core(struct perf_pmu *pmu);
 
 const struct perf_pmu *perf_pmus__pmu_for_pmu_filter(const char *str);
 
index 9cee7bb7a56177c9cee04ff28c5792cf45fdc462..7a5f873927200e499cc015914665d55498dd7960 100644 (file)
@@ -272,12 +272,11 @@ int print_hwcache_events(const struct print_callbacks *print_cb, void *print_sta
        struct perf_pmu *pmu = NULL;
        const char *event_type_descriptor = event_type_descriptors[PERF_TYPE_HW_CACHE];
 
-       while ((pmu = perf_pmus__scan(pmu)) != NULL) {
-               /*
-                * Skip uncore PMUs for performance. PERF_TYPE_HW_CACHE type
-                * attributes can accept software PMUs in the extended type, so
-                * also skip.
-                */
+       /*
+        * Only print core PMUs, skipping uncore for performance and
+        * PERF_TYPE_SOFTWARE that can succeed in opening legacy cache evenst.
+        */
+       while ((pmu = perf_pmus__scan_core(pmu)) != NULL) {
                if (pmu->is_uncore || pmu->type == PERF_TYPE_SOFTWARE)
                        continue;