perf: arm_cspmu: ampere_cspmu: Add support for Ampere SoC PMU
authorIlkka Koskinen <ilkka@os.amperecomputing.com>
Wed, 13 Sep 2023 23:39:41 +0000 (16:39 -0700)
committerWill Deacon <will@kernel.org>
Tue, 10 Oct 2023 18:10:54 +0000 (19:10 +0100)
Ampere SoC PMU follows CoreSight PMU architecture. It uses implementation
specific registers to filter events rather than PMEVFILTnR registers.

Signed-off-by: Ilkka Koskinen <ilkka@os.amperecomputing.com>
Link: https://lore.kernel.org/r/20230913233941.9814-5-ilkka@os.amperecomputing.com
[will: Include linux/io.h in ampere_cspmu.c for writel()]
Signed-off-by: Will Deacon <will@kernel.org>
Documentation/admin-guide/perf/ampere_cspmu.rst [new file with mode: 0644]
drivers/perf/arm_cspmu/Kconfig
drivers/perf/arm_cspmu/Makefile
drivers/perf/arm_cspmu/ampere_cspmu.c [new file with mode: 0644]
drivers/perf/arm_cspmu/arm_cspmu.c
drivers/perf/arm_cspmu/arm_cspmu.h

diff --git a/Documentation/admin-guide/perf/ampere_cspmu.rst b/Documentation/admin-guide/perf/ampere_cspmu.rst
new file mode 100644 (file)
index 0000000..94f93f5
--- /dev/null
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+Ampere SoC Performance Monitoring Unit (PMU)
+============================================
+
+Ampere SoC PMU is a generic PMU IP that follows Arm CoreSight PMU architecture.
+Therefore, the driver is implemented as a submodule of arm_cspmu driver. At the
+first phase it's used for counting MCU events on AmpereOne.
+
+
+MCU PMU events
+--------------
+
+The PMU driver supports setting filters for "rank", "bank", and "threshold".
+Note, that the filters are per PMU instance rather than per event.
+
+
+Example for perf tool use::
+
+  / # perf list ampere
+
+    ampere_mcu_pmu_0/act_sent/                         [Kernel PMU event]
+    <...>
+    ampere_mcu_pmu_1/rd_sent/                          [Kernel PMU event]
+    <...>
+
+  / # perf stat -a -e ampere_mcu_pmu_0/act_sent,bank=5,rank=3,threshold=2/,ampere_mcu_pmu_1/rd_sent/ \
+        sleep 1
index d5f787d22234d45c247d38478828d0225ed4fdae..6f4e28fc84a2c8cf86b4238afa8c08f056334caf 100644 (file)
@@ -17,3 +17,13 @@ config NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU
        help
          Provides NVIDIA specific attributes for performance monitoring unit
          (PMU) devices based on ARM CoreSight PMU architecture.
+
+config AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       tristate "Ampere Coresight Architecture PMU"
+       depends on  ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU
+       help
+         Provides Ampere specific attributes for performance monitoring unit
+         (PMU) devices based on ARM CoreSight PMU architecture.
+
+         In the first phase, the driver enables support on MCU PMU used in
+         AmpereOne SoC family.
index 0309d2ff264a1819258d3027f172f5591c9bd4d2..220a734efd54e68bf2bd1a458af3713c2d9dab31 100644 (file)
@@ -3,6 +3,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 obj-$(CONFIG_ARM_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += arm_cspmu_module.o
+
 arm_cspmu_module-y := arm_cspmu.o
 
 obj-$(CONFIG_NVIDIA_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += nvidia_cspmu.o
+obj-$(CONFIG_AMPERE_CORESIGHT_PMU_ARCH_SYSTEM_PMU) += ampere_cspmu.o
diff --git a/drivers/perf/arm_cspmu/ampere_cspmu.c b/drivers/perf/arm_cspmu/ampere_cspmu.c
new file mode 100644 (file)
index 0000000..f146a45
--- /dev/null
@@ -0,0 +1,272 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ampere SoC PMU (Performance Monitor Unit)
+ *
+ * Copyright (c) 2023, Ampere Computing LLC
+ */
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/topology.h>
+
+#include "arm_cspmu.h"
+
+#define PMAUXR0                0xD80
+#define PMAUXR1                0xD84
+#define PMAUXR2                0xD88
+#define PMAUXR3                0xD8C
+
+#define to_ampere_cspmu_ctx(cspmu)     ((struct ampere_cspmu_ctx *)(cspmu->impl.ctx))
+
+struct ampere_cspmu_ctx {
+       const char *name;
+       struct attribute **event_attr;
+       struct attribute **format_attr;
+};
+
+static DEFINE_IDA(mcu_pmu_ida);
+
+#define SOC_PMU_EVENT_ATTR_EXTRACTOR(_name, _config, _start, _end)        \
+       static inline u32 get_##_name(const struct perf_event *event)     \
+       {                                                                 \
+               return FIELD_GET(GENMASK_ULL(_end, _start),               \
+                                event->attr._config);                    \
+       }                                                                 \
+
+SOC_PMU_EVENT_ATTR_EXTRACTOR(event, config, 0, 8);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(threshold, config1, 0, 7);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(rank, config1, 8, 23);
+SOC_PMU_EVENT_ATTR_EXTRACTOR(bank, config1, 24, 55);
+
+static struct attribute *ampereone_mcu_pmu_event_attrs[] = {
+       ARM_CSPMU_EVENT_ATTR(cycle_count,               0x00),
+       ARM_CSPMU_EVENT_ATTR(act_sent,                  0x01),
+       ARM_CSPMU_EVENT_ATTR(pre_sent,                  0x02),
+       ARM_CSPMU_EVENT_ATTR(rd_sent,                   0x03),
+       ARM_CSPMU_EVENT_ATTR(rda_sent,                  0x04),
+       ARM_CSPMU_EVENT_ATTR(wr_sent,                   0x05),
+       ARM_CSPMU_EVENT_ATTR(wra_sent,                  0x06),
+       ARM_CSPMU_EVENT_ATTR(pd_entry_vld,              0x07),
+       ARM_CSPMU_EVENT_ATTR(sref_entry_vld,            0x08),
+       ARM_CSPMU_EVENT_ATTR(prea_sent,                 0x09),
+       ARM_CSPMU_EVENT_ATTR(pre_sb_sent,               0x0a),
+       ARM_CSPMU_EVENT_ATTR(ref_sent,                  0x0b),
+       ARM_CSPMU_EVENT_ATTR(rfm_sent,                  0x0c),
+       ARM_CSPMU_EVENT_ATTR(ref_sb_sent,               0x0d),
+       ARM_CSPMU_EVENT_ATTR(rfm_sb_sent,               0x0e),
+       ARM_CSPMU_EVENT_ATTR(rd_rda_sent,               0x0f),
+       ARM_CSPMU_EVENT_ATTR(wr_wra_sent,               0x10),
+       ARM_CSPMU_EVENT_ATTR(raw_hazard,                0x11),
+       ARM_CSPMU_EVENT_ATTR(war_hazard,                0x12),
+       ARM_CSPMU_EVENT_ATTR(waw_hazard,                0x13),
+       ARM_CSPMU_EVENT_ATTR(rar_hazard,                0x14),
+       ARM_CSPMU_EVENT_ATTR(raw_war_waw_hazard,        0x15),
+       ARM_CSPMU_EVENT_ATTR(hprd_lprd_wr_req_vld,      0x16),
+       ARM_CSPMU_EVENT_ATTR(lprd_req_vld,              0x17),
+       ARM_CSPMU_EVENT_ATTR(hprd_req_vld,              0x18),
+       ARM_CSPMU_EVENT_ATTR(hprd_lprd_req_vld,         0x19),
+       ARM_CSPMU_EVENT_ATTR(prefetch_tgt,              0x1a),
+       ARM_CSPMU_EVENT_ATTR(wr_req_vld,                0x1b),
+       ARM_CSPMU_EVENT_ATTR(partial_wr_req_vld,        0x1c),
+       ARM_CSPMU_EVENT_ATTR(rd_retry,                  0x1d),
+       ARM_CSPMU_EVENT_ATTR(wr_retry,                  0x1e),
+       ARM_CSPMU_EVENT_ATTR(retry_gnt,                 0x1f),
+       ARM_CSPMU_EVENT_ATTR(rank_change,               0x20),
+       ARM_CSPMU_EVENT_ATTR(dir_change,                0x21),
+       ARM_CSPMU_EVENT_ATTR(rank_dir_change,           0x22),
+       ARM_CSPMU_EVENT_ATTR(rank_active,               0x23),
+       ARM_CSPMU_EVENT_ATTR(rank_idle,                 0x24),
+       ARM_CSPMU_EVENT_ATTR(rank_pd,                   0x25),
+       ARM_CSPMU_EVENT_ATTR(rank_sref,                 0x26),
+       ARM_CSPMU_EVENT_ATTR(queue_fill_gt_thresh,      0x27),
+       ARM_CSPMU_EVENT_ATTR(queue_rds_gt_thresh,       0x28),
+       ARM_CSPMU_EVENT_ATTR(queue_wrs_gt_thresh,       0x29),
+       ARM_CSPMU_EVENT_ATTR(phy_updt_complt,           0x2a),
+       ARM_CSPMU_EVENT_ATTR(tz_fail,                   0x2b),
+       ARM_CSPMU_EVENT_ATTR(dram_errc,                 0x2c),
+       ARM_CSPMU_EVENT_ATTR(dram_errd,                 0x2d),
+       ARM_CSPMU_EVENT_ATTR(read_data_return,          0x32),
+       ARM_CSPMU_EVENT_ATTR(chi_wr_data_delta,         0x33),
+       ARM_CSPMU_EVENT_ATTR(zq_start,                  0x34),
+       ARM_CSPMU_EVENT_ATTR(zq_latch,                  0x35),
+       ARM_CSPMU_EVENT_ATTR(wr_fifo_full,              0x36),
+       ARM_CSPMU_EVENT_ATTR(info_fifo_full,            0x37),
+       ARM_CSPMU_EVENT_ATTR(cmd_fifo_full,             0x38),
+       ARM_CSPMU_EVENT_ATTR(dfi_nop,                   0x39),
+       ARM_CSPMU_EVENT_ATTR(dfi_cmd,                   0x3a),
+       ARM_CSPMU_EVENT_ATTR(rd_run_len,                0x3b),
+       ARM_CSPMU_EVENT_ATTR(wr_run_len,                0x3c),
+
+       ARM_CSPMU_EVENT_ATTR(cycles, ARM_CSPMU_EVT_CYCLES_DEFAULT),
+       NULL,
+};
+
+static struct attribute *ampereone_mcu_format_attrs[] = {
+       ARM_CSPMU_FORMAT_EVENT_ATTR,
+       ARM_CSPMU_FORMAT_ATTR(threshold, "config1:0-7"),
+       ARM_CSPMU_FORMAT_ATTR(rank, "config1:8-23"),
+       ARM_CSPMU_FORMAT_ATTR(bank, "config1:24-55"),
+       NULL,
+};
+
+static struct attribute **
+ampere_cspmu_get_event_attrs(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->event_attr;
+}
+
+static struct attribute **
+ampere_cspmu_get_format_attrs(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->format_attr;
+}
+
+static const char *
+ampere_cspmu_get_name(const struct arm_cspmu *cspmu)
+{
+       const struct ampere_cspmu_ctx *ctx = to_ampere_cspmu_ctx(cspmu);
+
+       return ctx->name;
+}
+
+static u32 ampere_cspmu_event_filter(const struct perf_event *event)
+{
+       /*
+        * PMEVFILTR or PMCCFILTR aren't used in Ampere SoC PMU but are marked
+        * as RES0. Make sure, PMCCFILTR is written zero.
+        */
+       return 0;
+}
+
+static void ampere_cspmu_set_ev_filter(struct arm_cspmu *cspmu,
+                                      struct hw_perf_event *hwc,
+                                      u32 filter)
+{
+       struct perf_event *event;
+       unsigned int idx;
+       u32 threshold, rank, bank;
+
+       /*
+        * At this point, all the events have the same filter settings.
+        * Therefore, take the first event and use its configuration.
+        */
+       idx = find_first_bit(cspmu->hw_events.used_ctrs,
+                            cspmu->cycle_counter_logical_idx);
+
+       event = cspmu->hw_events.events[idx];
+
+       threshold       = get_threshold(event);
+       rank            = get_rank(event);
+       bank            = get_bank(event);
+
+       writel(threshold, cspmu->base0 + PMAUXR0);
+       writel(rank, cspmu->base0 + PMAUXR1);
+       writel(bank, cspmu->base0 + PMAUXR2);
+}
+
+static int ampere_cspmu_validate_configs(struct perf_event *event,
+                                        struct perf_event *event2)
+{
+       if (get_threshold(event) != get_threshold(event2) ||
+           get_rank(event) != get_rank(event2) ||
+           get_bank(event) != get_bank(event2))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int ampere_cspmu_validate_event(struct arm_cspmu *cspmu,
+                                      struct perf_event *new)
+{
+       struct perf_event *curr, *leader = new->group_leader;
+       unsigned int idx;
+       int ret;
+
+       ret = ampere_cspmu_validate_configs(new, leader);
+       if (ret)
+               return ret;
+
+       /* We compare the global filter settings to the existing events */
+       idx = find_first_bit(cspmu->hw_events.used_ctrs,
+                            cspmu->cycle_counter_logical_idx);
+
+       /* This is the first event, thus any configuration is fine */
+       if (idx == cspmu->cycle_counter_logical_idx)
+               return 0;
+
+       curr = cspmu->hw_events.events[idx];
+
+       return ampere_cspmu_validate_configs(curr, new);
+}
+
+static char *ampere_cspmu_format_name(const struct arm_cspmu *cspmu,
+                                     const char *name_pattern)
+{
+       struct device *dev = cspmu->dev;
+       int id;
+
+       id = ida_alloc(&mcu_pmu_ida, GFP_KERNEL);
+       if (id < 0)
+               return ERR_PTR(id);
+
+       return devm_kasprintf(dev, GFP_KERNEL, name_pattern, id);
+}
+
+static int ampere_cspmu_init_ops(struct arm_cspmu *cspmu)
+{
+       struct device *dev = cspmu->dev;
+       struct ampere_cspmu_ctx *ctx;
+       struct arm_cspmu_impl_ops *impl_ops = &cspmu->impl.ops;
+
+       ctx = devm_kzalloc(dev, sizeof(struct ampere_cspmu_ctx), GFP_KERNEL);
+       if (!ctx)
+               return -ENOMEM;
+
+       ctx->event_attr = ampereone_mcu_pmu_event_attrs;
+       ctx->format_attr = ampereone_mcu_format_attrs;
+       ctx->name = ampere_cspmu_format_name(cspmu, "ampere_mcu_pmu_%d");
+       if (IS_ERR_OR_NULL(ctx->name))
+               return ctx->name ? PTR_ERR(ctx->name) : -ENOMEM;
+
+       cspmu->impl.ctx = ctx;
+
+       impl_ops->event_filter          = ampere_cspmu_event_filter;
+       impl_ops->set_ev_filter         = ampere_cspmu_set_ev_filter;
+       impl_ops->validate_event        = ampere_cspmu_validate_event;
+       impl_ops->get_name              = ampere_cspmu_get_name;
+       impl_ops->get_event_attrs       = ampere_cspmu_get_event_attrs;
+       impl_ops->get_format_attrs      = ampere_cspmu_get_format_attrs;
+
+       return 0;
+}
+
+/* Match all Ampere Coresight PMU devices */
+static const struct arm_cspmu_impl_match ampere_cspmu_param = {
+       .pmiidr_val     = ARM_CSPMU_IMPL_ID_AMPERE,
+       .module         = THIS_MODULE,
+       .impl_init_ops  = ampere_cspmu_init_ops
+};
+
+static int __init ampere_cspmu_init(void)
+{
+       int ret;
+
+       ret = arm_cspmu_impl_register(&ampere_cspmu_param);
+       if (ret)
+               pr_err("ampere_cspmu backend registration error: %d\n", ret);
+
+       return ret;
+}
+
+static void __exit ampere_cspmu_exit(void)
+{
+       arm_cspmu_impl_unregister(&ampere_cspmu_param);
+}
+
+module_init(ampere_cspmu_init);
+module_exit(ampere_cspmu_exit);
+
+MODULE_LICENSE("GPL");
index 1ba00d640352f7e1da406bdd64ca02dc2e71e338..0e3fe00d741d28678e019129068aae14d950ff69 100644 (file)
@@ -383,6 +383,14 @@ static struct arm_cspmu_impl_match impl_match[] = {
                .module         = NULL,
                .impl_init_ops  = NULL,
        },
+       {
+               .module_name    = "ampere_cspmu",
+               .pmiidr_val     = ARM_CSPMU_IMPL_ID_AMPERE,
+               .pmiidr_mask    = ARM_CSPMU_PMIIDR_IMPLEMENTER,
+               .module         = NULL,
+               .impl_init_ops  = NULL,
+       },
+
        {0}
 };
 
index a30c8372214c2403f1aa17609808bb5f58bb16d1..2fe723555a6b3a66cba30f1cd6aa94a4ced64511 100644 (file)
@@ -71,6 +71,7 @@
 
 /* JEDEC-assigned JEP106 identification code */
 #define ARM_CSPMU_IMPL_ID_NVIDIA       0x36B
+#define ARM_CSPMU_IMPL_ID_AMPERE       0xA16
 
 struct arm_cspmu;