coresight: etm4x: Save/restore state across CPU low power states
authorAndrew Murray <andrew.murray@arm.com>
Mon, 4 Nov 2019 18:12:38 +0000 (11:12 -0700)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Mon, 4 Nov 2019 20:57:51 +0000 (21:57 +0100)
Some hardware will ignore bit TRCPDCR.PU which is used to signal
to hardware that power should not be removed from the trace unit.
Let's mitigate against this by conditionally saving and restoring
the trace unit state when the CPU enters low power states.

This patchset introduces a firmware property named
'arm,coresight-loses-context-with-cpu' - when this is present the
hardware state will be conditionally saved and restored.

A module parameter 'pm_save_enable' is also introduced which can
be configured to override the firmware property. This can be set
to never allow save/restore or to conditionally allow it (only for
self-hosted). The default value is determined by firmware.

We avoid saving the hardware state when self-hosted coresight isn't
in use to reduce PM latency - we can't determine this by reading the
claim tags (TRCCLAIMCLR) as these are 'trace' registers which need
power and clocking, something we can't easily provide in the PM
context. Therefore we rely on the existing drvdata->mode internal
state that is set when self-hosted coresight is used (and powered).

Signed-off-by: Andrew Murray <andrew.murray@arm.com>
Reviewed-by: Suzuki K Poulose <suzuki.poulose@arm.com>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Link: https://lore.kernel.org/r/20191104181251.26732-2-mathieu.poirier@linaro.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
drivers/hwtracing/coresight/coresight-etm4x.c
drivers/hwtracing/coresight/coresight-etm4x.h
drivers/hwtracing/coresight/coresight.c
include/linux/coresight.h

index a128b5063f46c9571b7076bd478c3f344f65a052..4cecabdd051b3f3264864480b373c8fb985799b5 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/stat.h>
 #include <linux/clk.h>
 #include <linux/cpu.h>
+#include <linux/cpu_pm.h>
 #include <linux/coresight.h>
 #include <linux/coresight-pmu.h>
 #include <linux/pm_wakeup.h>
@@ -26,6 +27,7 @@
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
 #include <linux/pm_runtime.h>
+#include <linux/property.h>
 #include <asm/sections.h>
 #include <asm/local.h>
 #include <asm/virt.h>
@@ -37,6 +39,15 @@ static int boot_enable;
 module_param(boot_enable, int, 0444);
 MODULE_PARM_DESC(boot_enable, "Enable tracing on boot");
 
+#define PARAM_PM_SAVE_FIRMWARE   0 /* save self-hosted state as per firmware */
+#define PARAM_PM_SAVE_NEVER      1 /* never save any state */
+#define PARAM_PM_SAVE_SELF_HOSTED 2 /* save self-hosted state only */
+
+static int pm_save_enable = PARAM_PM_SAVE_FIRMWARE;
+module_param(pm_save_enable, int, 0444);
+MODULE_PARM_DESC(pm_save_enable,
+       "Save/restore state on power down: 1 = never, 2 = self-hosted");
+
 /* The number of ETMv4 currently registered */
 static int etm4_count;
 static struct etmv4_drvdata *etmdrvdata[NR_CPUS];
@@ -54,6 +65,14 @@ static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
        isb();
 }
 
+static void etm4_os_lock(struct etmv4_drvdata *drvdata)
+{
+       /* Writing 0x1 to TRCOSLAR locks the trace registers */
+       writel_relaxed(0x1, drvdata->base + TRCOSLAR);
+       drvdata->os_unlock = false;
+       isb();
+}
+
 static bool etm4_arch_supported(u8 arch)
 {
        /* Mask out the minor version number */
@@ -1085,6 +1104,288 @@ static void etm4_init_trace_id(struct etmv4_drvdata *drvdata)
        drvdata->trcid = coresight_get_trace_id(drvdata->cpu);
 }
 
+#ifdef CONFIG_CPU_PM
+static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
+{
+       int i, ret = 0;
+       struct etmv4_save_state *state;
+       struct device *etm_dev = &drvdata->csdev->dev;
+
+       /*
+        * As recommended by 3.4.1 ("The procedure when powering down the PE")
+        * of ARM IHI 0064D
+        */
+       dsb(sy);
+       isb();
+
+       CS_UNLOCK(drvdata->base);
+
+       /* Lock the OS lock to disable trace and external debugger access */
+       etm4_os_lock(drvdata);
+
+       /* wait for TRCSTATR.PMSTABLE to go up */
+       if (coresight_timeout(drvdata->base, TRCSTATR,
+                             TRCSTATR_PMSTABLE_BIT, 1)) {
+               dev_err(etm_dev,
+                       "timeout while waiting for PM Stable Status\n");
+               etm4_os_unlock(drvdata);
+               ret = -EBUSY;
+               goto out;
+       }
+
+       state = drvdata->save_state;
+
+       state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
+       state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
+       state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
+       state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
+       state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
+       state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
+       state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
+       state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
+       state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
+       state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
+       state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
+       state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
+       state->trcqctlr = readl(drvdata->base + TRCQCTLR);
+
+       state->trcvictlr = readl(drvdata->base + TRCVICTLR);
+       state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
+       state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
+       state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
+       state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
+       state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
+       state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
+
+       for (i = 0; i < drvdata->nrseqstate; i++)
+               state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
+
+       state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
+       state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
+       state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
+
+       for (i = 0; i < drvdata->nr_cntr; i++) {
+               state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
+               state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
+               state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
+       }
+
+       for (i = 0; i < drvdata->nr_resource * 2; i++)
+               state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
+
+       for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+               state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
+               state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
+               state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
+       }
+
+       for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+               state->trcacvr[i] = readl(drvdata->base + TRCACVRn(i));
+               state->trcacatr[i] = readl(drvdata->base + TRCACATRn(i));
+       }
+
+       /*
+        * Data trace stream is architecturally prohibited for A profile cores
+        * so we don't save (or later restore) trcdvcvr and trcdvcmr - As per
+        * section 1.3.4 ("Possible functional configurations of an ETMv4 trace
+        * unit") of ARM IHI 0064D.
+        */
+
+       for (i = 0; i < drvdata->numcidc; i++)
+               state->trccidcvr[i] = readl(drvdata->base + TRCCIDCVRn(i));
+
+       for (i = 0; i < drvdata->numvmidc; i++)
+               state->trcvmidcvr[i] = readl(drvdata->base + TRCVMIDCVRn(i));
+
+       state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
+       state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
+
+       state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
+       state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR1);
+
+       state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
+
+       state->trcpdcr = readl(drvdata->base + TRCPDCR);
+
+       /* wait for TRCSTATR.IDLE to go up */
+       if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
+               dev_err(etm_dev,
+                       "timeout while waiting for Idle Trace Status\n");
+               etm4_os_unlock(drvdata);
+               ret = -EBUSY;
+               goto out;
+       }
+
+       drvdata->state_needs_restore = true;
+
+       /*
+        * Power can be removed from the trace unit now. We do this to
+        * potentially save power on systems that respect the TRCPDCR_PU
+        * despite requesting software to save/restore state.
+        */
+       writel_relaxed((state->trcpdcr & ~TRCPDCR_PU),
+                       drvdata->base + TRCPDCR);
+
+out:
+       CS_LOCK(drvdata->base);
+       return ret;
+}
+
+static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
+{
+       int i;
+       struct etmv4_save_state *state = drvdata->save_state;
+
+       CS_UNLOCK(drvdata->base);
+
+       writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+
+       writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
+       writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
+       writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
+       writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
+       writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
+       writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
+       writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
+       writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
+       writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
+       writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
+       writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
+       writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
+       writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
+
+       writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
+       writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
+       writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
+       writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
+       writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
+       writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
+       writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
+
+       for (i = 0; i < drvdata->nrseqstate; i++)
+               writel_relaxed(state->trcseqevr[i],
+                              drvdata->base + TRCSEQEVRn(i));
+
+       writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
+       writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
+       writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
+
+       for (i = 0; i < drvdata->nr_cntr; i++) {
+               writel_relaxed(state->trccntrldvr[i],
+                              drvdata->base + TRCCNTRLDVRn(i));
+               writel_relaxed(state->trccntctlr[i],
+                              drvdata->base + TRCCNTCTLRn(i));
+               writel_relaxed(state->trccntvr[i],
+                              drvdata->base + TRCCNTVRn(i));
+       }
+
+       for (i = 0; i < drvdata->nr_resource * 2; i++)
+               writel_relaxed(state->trcrsctlr[i],
+                              drvdata->base + TRCRSCTLRn(i));
+
+       for (i = 0; i < drvdata->nr_ss_cmp; i++) {
+               writel_relaxed(state->trcssccr[i],
+                              drvdata->base + TRCSSCCRn(i));
+               writel_relaxed(state->trcsscsr[i],
+                              drvdata->base + TRCSSCSRn(i));
+               writel_relaxed(state->trcsspcicr[i],
+                              drvdata->base + TRCSSPCICRn(i));
+       }
+
+       for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
+               writel_relaxed(state->trcacvr[i],
+                              drvdata->base + TRCACVRn(i));
+               writel_relaxed(state->trcacatr[i],
+                              drvdata->base + TRCACATRn(i));
+       }
+
+       for (i = 0; i < drvdata->numcidc; i++)
+               writel_relaxed(state->trccidcvr[i],
+                              drvdata->base + TRCCIDCVRn(i));
+
+       for (i = 0; i < drvdata->numvmidc; i++)
+               writel_relaxed(state->trcvmidcvr[i],
+                              drvdata->base + TRCVMIDCVRn(i));
+
+       writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
+       writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
+
+       writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
+       writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR1);
+
+       writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+
+       writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR);
+
+       drvdata->state_needs_restore = false;
+
+       /*
+        * As recommended by section 4.3.7 ("Synchronization when using the
+        * memory-mapped interface") of ARM IHI 0064D
+        */
+       dsb(sy);
+       isb();
+
+       /* Unlock the OS lock to re-enable trace and external debug access */
+       etm4_os_unlock(drvdata);
+       CS_LOCK(drvdata->base);
+}
+
+static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
+                             void *v)
+{
+       struct etmv4_drvdata *drvdata;
+       unsigned int cpu = smp_processor_id();
+
+       if (!etmdrvdata[cpu])
+               return NOTIFY_OK;
+
+       drvdata = etmdrvdata[cpu];
+
+       if (!drvdata->save_state)
+               return NOTIFY_OK;
+
+       if (WARN_ON_ONCE(drvdata->cpu != cpu))
+               return NOTIFY_BAD;
+
+       switch (cmd) {
+       case CPU_PM_ENTER:
+               /* save the state if self-hosted coresight is in use */
+               if (local_read(&drvdata->mode))
+                       if (etm4_cpu_save(drvdata))
+                               return NOTIFY_BAD;
+               break;
+       case CPU_PM_EXIT:
+               /* fallthrough */
+       case CPU_PM_ENTER_FAILED:
+               if (drvdata->state_needs_restore)
+                       etm4_cpu_restore(drvdata);
+               break;
+       default:
+               return NOTIFY_DONE;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block etm4_cpu_pm_nb = {
+       .notifier_call = etm4_cpu_pm_notify,
+};
+
+static int etm4_cpu_pm_register(void)
+{
+       return cpu_pm_register_notifier(&etm4_cpu_pm_nb);
+}
+
+static void etm4_cpu_pm_unregister(void)
+{
+       cpu_pm_unregister_notifier(&etm4_cpu_pm_nb);
+}
+#else
+static int etm4_cpu_pm_register(void) { return 0; }
+static void etm4_cpu_pm_unregister(void) { }
+#endif
+
 static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 {
        int ret;
@@ -1101,6 +1402,17 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
        dev_set_drvdata(dev, drvdata);
 
+       if (pm_save_enable == PARAM_PM_SAVE_FIRMWARE)
+               pm_save_enable = coresight_loses_context_with_cpu(dev) ?
+                              PARAM_PM_SAVE_SELF_HOSTED : PARAM_PM_SAVE_NEVER;
+
+       if (pm_save_enable != PARAM_PM_SAVE_NEVER) {
+               drvdata->save_state = devm_kmalloc(dev,
+                               sizeof(struct etmv4_save_state), GFP_KERNEL);
+               if (!drvdata->save_state)
+                       return -ENOMEM;
+       }
+
        /* Validity for the resource is already checked by the AMBA core */
        base = devm_ioremap_resource(dev, res);
        if (IS_ERR(base))
@@ -1135,6 +1447,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
                if (ret < 0)
                        goto err_arch_supported;
                hp_online = ret;
+
+               ret = etm4_cpu_pm_register();
+               if (ret)
+                       goto err_arch_supported;
        }
 
        cpus_read_unlock();
@@ -1185,6 +1501,8 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
 err_arch_supported:
        if (--etm4_count == 0) {
+               etm4_cpu_pm_unregister();
+
                cpuhp_remove_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING);
                if (hp_online)
                        cpuhp_remove_state_nocalls(hp_online);
index 4523f10ddd0fd038746ea788e9628d0f3d14a4e2..546d790cb01b32d682a334d4dd47914000d49a50 100644 (file)
                                         ETM_MODE_EXCL_USER)
 
 #define TRCSTATR_IDLE_BIT              0
+#define TRCSTATR_PMSTABLE_BIT          1
 #define ETM_DEFAULT_ADDR_COMP          0
 
 /* PowerDown Control Register bits */
@@ -281,6 +282,65 @@ struct etmv4_config {
        u32                             ext_inp;
 };
 
+/**
+ * struct etm4_save_state - state to be preserved when ETM is without power
+ */
+struct etmv4_save_state {
+       u32     trcprgctlr;
+       u32     trcprocselr;
+       u32     trcconfigr;
+       u32     trcauxctlr;
+       u32     trceventctl0r;
+       u32     trceventctl1r;
+       u32     trcstallctlr;
+       u32     trctsctlr;
+       u32     trcsyncpr;
+       u32     trcccctlr;
+       u32     trcbbctlr;
+       u32     trctraceidr;
+       u32     trcqctlr;
+
+       u32     trcvictlr;
+       u32     trcviiectlr;
+       u32     trcvissctlr;
+       u32     trcvipcssctlr;
+       u32     trcvdctlr;
+       u32     trcvdsacctlr;
+       u32     trcvdarcctlr;
+
+       u32     trcseqevr[ETM_MAX_SEQ_STATES];
+       u32     trcseqrstevr;
+       u32     trcseqstr;
+       u32     trcextinselr;
+       u32     trccntrldvr[ETMv4_MAX_CNTR];
+       u32     trccntctlr[ETMv4_MAX_CNTR];
+       u32     trccntvr[ETMv4_MAX_CNTR];
+
+       u32     trcrsctlr[ETM_MAX_RES_SEL * 2];
+
+       u32     trcssccr[ETM_MAX_SS_CMP];
+       u32     trcsscsr[ETM_MAX_SS_CMP];
+       u32     trcsspcicr[ETM_MAX_SS_CMP];
+
+       u64     trcacvr[ETM_MAX_SINGLE_ADDR_CMP];
+       u64     trcacatr[ETM_MAX_SINGLE_ADDR_CMP];
+       u64     trccidcvr[ETMv4_MAX_CTXID_CMP];
+       u32     trcvmidcvr[ETM_MAX_VMID_CMP];
+       u32     trccidcctlr0;
+       u32     trccidcctlr1;
+       u32     trcvmidcctlr0;
+       u32     trcvmidcctlr1;
+
+       u32     trcclaimset;
+
+       u32     cntr_val[ETMv4_MAX_CNTR];
+       u32     seq_state;
+       u32     vinst_ctrl;
+       u32     ss_status[ETM_MAX_SS_CMP];
+
+       u32     trcpdcr;
+};
+
 /**
  * struct etm4_drvdata - specifics associated to an ETM component
  * @base:       Memory mapped base address for this component.
@@ -336,6 +396,8 @@ struct etmv4_config {
  * @atbtrig:   If the implementation can support ATB triggers
  * @lpoverride:        If the implementation can support low-power state over.
  * @config:    structure holding configuration parameters.
+ * @save_state:        State to be preserved across power loss
+ * @state_needs_restore: True when there is context to restore after PM exit
  */
 struct etmv4_drvdata {
        void __iomem                    *base;
@@ -381,6 +443,8 @@ struct etmv4_drvdata {
        bool                            atbtrig;
        bool                            lpoverride;
        struct etmv4_config             config;
+       struct etmv4_save_state         *save_state;
+       bool                            state_needs_restore;
 };
 
 /* Address comparator access types */
index 6453c67a4d010f77509fff9256ad34c3b889ec53..e6ca899fea4e2b7a308cc8c1192cd338243f13a3 100644 (file)
@@ -1308,6 +1308,12 @@ static inline int coresight_search_device_idx(struct coresight_dev_list *dict,
        return -ENOENT;
 }
 
+bool coresight_loses_context_with_cpu(struct device *dev)
+{
+       return fwnode_property_present(dev_fwnode(dev),
+                                      "arm,coresight-loses-context-with-cpu");
+}
+
 /*
  * coresight_alloc_device_name - Get an index for a given device in the
  * device index list specific to a driver. An index is allocated for a
index a2b68823717bc7269e4de9e8e9cef2df57743312..44e552de419cac06ddbc7024b5fcac17316fce01 100644 (file)
@@ -285,6 +285,8 @@ extern void coresight_disclaim_device(void __iomem *base);
 extern void coresight_disclaim_device_unlocked(void __iomem *base);
 extern char *coresight_alloc_device_name(struct coresight_dev_list *devs,
                                         struct device *dev);
+
+extern bool coresight_loses_context_with_cpu(struct device *dev);
 #else
 static inline struct coresight_device *
 coresight_register(struct coresight_desc *desc) { return NULL; }
@@ -307,6 +309,10 @@ static inline int coresight_claim_device(void __iomem *base)
 static inline void coresight_disclaim_device(void __iomem *base) {}
 static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
 
+static inline bool coresight_loses_context_with_cpu(struct device *dev)
+{
+       return false;
+}
 #endif
 
 extern int coresight_get_cpu(struct device *dev);