perf pmu: Lazily compute default config
authorIan Rogers <irogers@google.com>
Thu, 12 Oct 2023 17:56:45 +0000 (10:56 -0700)
committerNamhyung Kim <namhyung@kernel.org>
Tue, 17 Oct 2023 19:40:50 +0000 (12:40 -0700)
The default config is computed during creation of the PMU and may do
things like scanning sysfs, when the PMU may just be used as part of
scanning. Change default_config to perf_event_attr_init_default, a
callback that is used when a default config needs initializing. This
avoids holding onto the memory for a perf_event_attr and copying.

On a tigerlake laptop running the pmu-scan benchmark:

Before:
Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
  Average core PMU scanning took: 28.780 usec (+- 0.503 usec)
  Average PMU scanning took: 283.480 usec (+- 18.471 usec)
Number of openat syscalls: 30,227

After:
Running 'internals/pmu-scan' benchmark:
Computing performance of sysfs PMU event scan for 100 times
  Average core PMU scanning took: 27.880 usec (+- 0.169 usec)
  Average PMU scanning took: 245.260 usec (+- 15.758 usec)
Number of openat syscalls: 28,914

Over 3 runs it is a nearly 12% reduction in execution time and a 4.3%
of openat calls.

Signed-off-by: Ian Rogers <irogers@google.com>
Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: James Clark <james.clark@arm.com>
Cc: Suzuki K Poulose <suzuki.poulose@arm.com>
Cc: Yang Jihong <yangjihong1@huawei.com>
Cc: Will Deacon <will@kernel.org>
Cc: Leo Yan <leo.yan@linaro.org>
Cc: Mike Leach <mike.leach@linaro.org>
Cc: Jing Zhang <renyu.zj@linux.alibaba.com>
Cc: Kajol Jain <kjain@linux.ibm.com>
Cc: Thomas Richter <tmricht@linux.ibm.com>
Cc: Kan Liang <kan.liang@linux.intel.com>
Cc: John Garry <john.g.garry@oracle.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: coresight@lists.linaro.org
Link: https://lore.kernel.org/r/20231012175645.1849503-8-irogers@google.com
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/arm/util/pmu.c
tools/perf/arch/arm64/util/arm-spe.c
tools/perf/arch/x86/util/intel-pt.c
tools/perf/arch/x86/util/pmu.c
tools/perf/util/arm-spe.h
tools/perf/util/cs-etm.h
tools/perf/util/intel-pt.h
tools/perf/util/parse-events.c
tools/perf/util/pmu.c
tools/perf/util/pmu.h

index b8d6a953fd7423e161db97dec6b304d66fa17ee6..16bba74f048bfc3e0561471102b2d94d0608d66e 100644 (file)
@@ -917,16 +917,9 @@ out:
  * (CFG_CHG and evsel__set_config_if_unset()). If no default is set then user
  * changes aren't tracked.
  */
-struct perf_event_attr *
-cs_etm_get_default_config(struct perf_pmu *pmu __maybe_unused)
+void
+cs_etm_get_default_config(const struct perf_pmu *pmu __maybe_unused,
+                         struct perf_event_attr *attr)
 {
-       struct perf_event_attr *attr;
-
-       attr = zalloc(sizeof(struct perf_event_attr));
-       if (!attr)
-               return NULL;
-
        attr->sample_period = 1;
-
-       return attr;
 }
index f25f68f84a949aa1182c325ac25eab8bfc34b8ca..7f3af3b97f3bac2a35faddc2a0a0ad180198a618 100644 (file)
@@ -20,12 +20,12 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
        if (!strcmp(pmu->name, CORESIGHT_ETM_PMU_NAME)) {
                /* add ETM default config here */
                pmu->selectable = true;
-               pmu->default_config = cs_etm_get_default_config(pmu);
+               pmu->perf_event_attr_init_default = cs_etm_get_default_config;
 #if defined(__aarch64__)
        } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) {
                pmu->selectable = true;
                pmu->is_uncore = false;
-               pmu->default_config = arm_spe_pmu_default_config(pmu);
+               pmu->perf_event_attr_init_default = arm_spe_pmu_default_config;
        } else if (strstarts(pmu->name, HISI_PTT_PMU_NAME)) {
                pmu->selectable = true;
 #endif
index 08a76734ccd2ddb37e1d77e74aca5c756a659335..e3acc739bd0027b214a4aa5296e81bfcac3afba7 100644 (file)
@@ -113,6 +113,25 @@ arm_spe_snapshot_resolve_auxtrace_defaults(struct record_opts *opts,
        }
 }
 
+static __u64 arm_spe_pmu__sample_period(const struct perf_pmu *arm_spe_pmu)
+{
+       static __u64 sample_period;
+
+       if (sample_period)
+               return sample_period;
+
+       /*
+        * If kernel driver doesn't advertise a minimum,
+        * use max allowable by PMSIDR_EL1.INTERVAL
+        */
+       if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
+                               &sample_period) != 1) {
+               pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
+               sample_period = 4096;
+       }
+       return sample_period;
+}
+
 static int arm_spe_recording_options(struct auxtrace_record *itr,
                                     struct evlist *evlist,
                                     struct record_opts *opts)
@@ -136,7 +155,7 @@ static int arm_spe_recording_options(struct auxtrace_record *itr,
                                return -EINVAL;
                        }
                        evsel->core.attr.freq = 0;
-                       evsel->core.attr.sample_period = arm_spe_pmu->default_config->sample_period;
+                       evsel->core.attr.sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
                        evsel->needs_auxtrace_mmap = true;
                        arm_spe_evsel = evsel;
                        opts->full_auxtrace = true;
@@ -495,26 +514,8 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
        return &sper->itr;
 }
 
-struct perf_event_attr
-*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu)
+void
+arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu, struct perf_event_attr *attr)
 {
-       struct perf_event_attr *attr;
-
-       attr = zalloc(sizeof(struct perf_event_attr));
-       if (!attr) {
-               pr_err("arm_spe default config cannot allocate a perf_event_attr\n");
-               return NULL;
-       }
-
-       /*
-        * If kernel driver doesn't advertise a minimum,
-        * use max allowable by PMSIDR_EL1.INTERVAL
-        */
-       if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu",
-                                 &attr->sample_period) != 1) {
-               pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n");
-               attr->sample_period = 4096;
-       }
-
-       return attr;
+       attr->sample_period = arm_spe_pmu__sample_period(arm_spe_pmu);
 }
index 6d6cd8f9133cb1709e5456dd22b31b574f56fbe3..fa0c718b9e7277f0374356bf5d46b603f19ed7ca 100644 (file)
@@ -60,7 +60,7 @@ struct intel_pt_recording {
        size_t                          priv_size;
 };
 
-static int intel_pt_parse_terms_with_default(struct perf_pmu *pmu,
+static int intel_pt_parse_terms_with_default(const struct perf_pmu *pmu,
                                             const char *str,
                                             u64 *config)
 {
@@ -84,7 +84,7 @@ out_free:
        return err;
 }
 
-static int intel_pt_parse_terms(struct perf_pmu *pmu, const char *str, u64 *config)
+static int intel_pt_parse_terms(const struct perf_pmu *pmu, const char *str, u64 *config)
 {
        *config = 0;
        return intel_pt_parse_terms_with_default(pmu, str, config);
@@ -177,7 +177,7 @@ static int intel_pt_pick_bit(int bits, int target)
        return pick;
 }
 
-static u64 intel_pt_default_config(struct perf_pmu *intel_pt_pmu)
+static u64 intel_pt_default_config(const struct perf_pmu *intel_pt_pmu)
 {
        char buf[256];
        int mtc, mtc_periods = 0, mtc_period;
@@ -256,18 +256,17 @@ static int intel_pt_parse_snapshot_options(struct auxtrace_record *itr,
        return 0;
 }
 
-struct perf_event_attr *
-intel_pt_pmu_default_config(struct perf_pmu *intel_pt_pmu)
+void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
+                                struct perf_event_attr *attr)
 {
-       struct perf_event_attr *attr;
+       static u64 config;
+       static bool initialized;
 
-       attr = zalloc(sizeof(struct perf_event_attr));
-       if (!attr)
-               return NULL;
-
-       attr->config = intel_pt_default_config(intel_pt_pmu);
-
-       return attr;
+       if (!initialized) {
+               config = intel_pt_default_config(intel_pt_pmu);
+               initialized = true;
+       }
+       attr->config = config;
 }
 
 static const char *intel_pt_find_filter(struct evlist *evlist,
index 949b3e2c67bdc52b05d4655d01d875e1baaa0805..469555ae9b3c2d77328e11ba27e575948611eeaa 100644 (file)
@@ -23,7 +23,7 @@ void perf_pmu__arch_init(struct perf_pmu *pmu __maybe_unused)
        if (!strcmp(pmu->name, INTEL_PT_PMU_NAME)) {
                pmu->auxtrace = true;
                pmu->selectable = true;
-               pmu->default_config = intel_pt_pmu_default_config(pmu);
+               pmu->perf_event_attr_init_default = intel_pt_pmu_default_config;
        }
        if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME)) {
                pmu->auxtrace = true;
index 98d3235781c3c0ffbfb5714eae259297d19ecbcb..4f4900c18f3e232bd02feb1341d321744cd19284 100644 (file)
@@ -27,5 +27,7 @@ struct auxtrace_record *arm_spe_recording_init(int *err,
 int arm_spe_process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session);
 
-struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu);
+void arm_spe_pmu_default_config(const struct perf_pmu *arm_spe_pmu,
+                               struct perf_event_attr *attr);
+
 #endif
index 7cca3788791760befb494ee0237cd09436576237..4696267a32f0c9caedc190c3b83443ab1ccb0907 100644 (file)
@@ -242,7 +242,7 @@ struct cs_etm_packet_queue {
 
 int cs_etm__process_auxtrace_info(union perf_event *event,
                                  struct perf_session *session);
-struct perf_event_attr *cs_etm_get_default_config(struct perf_pmu *pmu);
+void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
 
 enum cs_etm_pid_fmt {
        CS_ETM_PIDFMT_NONE,
index c7d6068e3a6bbd42dec3a7a807655f9aa57b2f8f..18fd0be52e6c367193a78a1b24d8ee52159dee84 100644 (file)
@@ -42,6 +42,7 @@ struct auxtrace_record *intel_pt_recording_init(int *err);
 int intel_pt_process_auxtrace_info(union perf_event *event,
                                   struct perf_session *session);
 
-struct perf_event_attr *intel_pt_pmu_default_config(struct perf_pmu *pmu);
+void intel_pt_pmu_default_config(const struct perf_pmu *intel_pt_pmu,
+                                struct perf_event_attr *attr);
 
 #endif
index 23c027cf20aefedf1cc22cc3dbd51960f4b71f1d..aa2f5c6fc7fc24f205b9c88012dfd8016fdf9b3e 100644 (file)
@@ -1418,11 +1418,10 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
        }
        fix_raw(&parsed_terms, pmu);
 
-       if (pmu->default_config) {
-               memcpy(&attr, pmu->default_config, sizeof(struct perf_event_attr));
-       } else {
-               memset(&attr, 0, sizeof(attr));
-       }
+       memset(&attr, 0, sizeof(attr));
+       if (pmu->perf_event_attr_init_default)
+               pmu->perf_event_attr_init_default(pmu, &attr);
+
        attr.type = pmu->type;
 
        if (list_empty(&parsed_terms.terms)) {
@@ -1466,7 +1465,8 @@ int parse_events_add_pmu(struct parse_events_state *parse_state,
         * When using default config, record which bits of attr->config were
         * changed by the user.
         */
-       if (pmu->default_config && get_config_chgs(pmu, &parsed_terms, &config_terms)) {
+       if (pmu->perf_event_attr_init_default &&
+           get_config_chgs(pmu, &parsed_terms, &config_terms)) {
                parse_events_terms__exit(&parsed_terms);
                return -ENOMEM;
        }
index 8ef675ea7bdd4030c9e32e6cda90ac59603de63d..a967d25e899b731d9404142189d81f1ef818b0ca 100644 (file)
@@ -1402,7 +1402,7 @@ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
                     struct parse_events_terms *head_terms,
                     struct parse_events_error *err)
 {
-       bool zero = !!pmu->default_config;
+       bool zero = !!pmu->perf_event_attr_init_default;
 
        return perf_pmu__config_terms(pmu, attr, head_terms, zero, err);
 }
@@ -2064,7 +2064,6 @@ void perf_pmu__delete(struct perf_pmu *pmu)
 
        perf_cpu_map__put(pmu->cpus);
 
-       zfree(&pmu->default_config);
        zfree(&pmu->name);
        zfree(&pmu->alias_name);
        zfree(&pmu->id);
index 5a05131aa4ce1876a58236c4eeb5141a0a643935..d2895d415f08fbf941bfd1bfa52f371307228e09 100644 (file)
@@ -92,10 +92,11 @@ struct perf_pmu {
         */
        int max_precise;
        /**
-        * @default_config: Optional default perf_event_attr determined in
-        * architecture specific code.
+        * @perf_event_attr_init_default: Optional function to default
+        * initialize PMU specific parts of the perf_event_attr.
         */
-       struct perf_event_attr *default_config;
+       void (*perf_event_attr_init_default)(const struct perf_pmu *pmu,
+                                            struct perf_event_attr *attr);
        /**
         * @cpus: Empty or the contents of either of:
         * <sysfs>/bus/event_source/devices/<name>/cpumask.