powercap/drivers/dtpm: Add CPU energy model based support
authorDaniel Lezcano <daniel.lezcano@linaro.org>
Tue, 8 Dec 2020 16:41:45 +0000 (17:41 +0100)
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>
Tue, 22 Dec 2020 18:50:40 +0000 (19:50 +0100)
With the powercap dtpm controller, we are able to plug devices with
power limitation features in the tree.

The following patch introduces the CPU power limitation based on the
energy model and the performance states.

The power limitation is done at the performance domain level. If some
CPUs are unplugged, the corresponding power will be subtracted from
the performance domain total power.

It is up to the platform to initialize the dtpm tree and add the CPU.

Here is an example to create a simple tree with one root node called
"pkg" and the CPU's performance domains.

static int dtpm_register_pkg(struct dtpm_descr *descr)
{
struct dtpm *pkg;
int ret;

pkg = dtpm_alloc(NULL);
if (!pkg)
return -ENOMEM;

ret = dtpm_register(descr->name, pkg, descr->parent);
if (ret)
return ret;

return dtpm_register_cpu(pkg);
}

static struct dtpm_descr descr = {
.name = "pkg",
.init = dtpm_register_pkg,
};
DTPM_DECLARE(descr);

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Reviewed-by: Lukasz Luba <lukasz.luba@arm.com>
Tested-by: Lukasz Luba <lukasz.luba@arm.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
drivers/powercap/Kconfig
drivers/powercap/Makefile
drivers/powercap/dtpm_cpu.c [new file with mode: 0644]
include/linux/cpuhotplug.h
include/linux/dtpm.h

index cc1953bd8bedd90c3d5a944ef46035bd674cc2c5..20b4325c6161cbc2c39f29e3338beaa83f85b724 100644 (file)
@@ -49,4 +49,11 @@ config DTPM
        help
          This enables support for the power capping for the dynamic
          thermal power management userspace engine.
+
+config DTPM_CPU
+       bool "Add CPU power capping based on the energy model"
+       depends on DTPM && ENERGY_MODEL
+       help
+         This enables support for CPU power limitation based on
+         energy model.
 endif
index 6482ac52054d43aaa90098feca5403fdcc77c162..fabcf388a8d39fc5b7152de9f3c2ca651fab5986 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_DTPM) += dtpm.o
+obj-$(CONFIG_DTPM_CPU) += dtpm_cpu.o
 obj-$(CONFIG_POWERCAP) += powercap_sys.o
 obj-$(CONFIG_INTEL_RAPL_CORE) += intel_rapl_common.o
 obj-$(CONFIG_INTEL_RAPL) += intel_rapl_msr.o
diff --git a/drivers/powercap/dtpm_cpu.c b/drivers/powercap/dtpm_cpu.c
new file mode 100644 (file)
index 0000000..6933c78
--- /dev/null
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2020 Linaro Limited
+ *
+ * Author: Daniel Lezcano <daniel.lezcano@linaro.org>
+ *
+ * The DTPM CPU is based on the energy model. It hooks the CPU in the
+ * DTPM tree which in turns update the power number by propagating the
+ * power number from the CPU energy model information to the parents.
+ *
+ * The association between the power and the performance state, allows
+ * to set the power of the CPU at the OPP granularity.
+ *
+ * The CPU hotplug is supported and the power numbers will be updated
+ * if a CPU is hot plugged / unplugged.
+ */
+#include <linux/cpumask.h>
+#include <linux/cpufreq.h>
+#include <linux/cpuhotplug.h>
+#include <linux/dtpm.h>
+#include <linux/energy_model.h>
+#include <linux/pm_qos.h>
+#include <linux/slab.h>
+#include <linux/units.h>
+
+static struct dtpm *__parent;
+
+static DEFINE_PER_CPU(struct dtpm *, dtpm_per_cpu);
+
+struct dtpm_cpu {
+       struct freq_qos_request qos_req;
+       int cpu;
+};
+
+/*
+ * When a new CPU is inserted at hotplug or boot time, add the power
+ * contribution and update the dtpm tree.
+ */
+static int power_add(struct dtpm *dtpm, struct em_perf_domain *em)
+{
+       u64 power_min, power_max;
+
+       power_min = em->table[0].power;
+       power_min *= MICROWATT_PER_MILLIWATT;
+       power_min += dtpm->power_min;
+
+       power_max = em->table[em->nr_perf_states - 1].power;
+       power_max *= MICROWATT_PER_MILLIWATT;
+       power_max += dtpm->power_max;
+
+       return dtpm_update_power(dtpm, power_min, power_max);
+}
+
+/*
+ * When a CPU is unplugged, remove its power contribution from the
+ * dtpm tree.
+ */
+static int power_sub(struct dtpm *dtpm, struct em_perf_domain *em)
+{
+       u64 power_min, power_max;
+
+       power_min = em->table[0].power;
+       power_min *= MICROWATT_PER_MILLIWATT;
+       power_min = dtpm->power_min - power_min;
+
+       power_max = em->table[em->nr_perf_states - 1].power;
+       power_max *= MICROWATT_PER_MILLIWATT;
+       power_max = dtpm->power_max - power_max;
+
+       return dtpm_update_power(dtpm, power_min, power_max);
+}
+
+static u64 set_pd_power_limit(struct dtpm *dtpm, u64 power_limit)
+{
+       struct dtpm_cpu *dtpm_cpu = dtpm->private;
+       struct em_perf_domain *pd;
+       struct cpumask cpus;
+       unsigned long freq;
+       u64 power;
+       int i, nr_cpus;
+
+       pd = em_cpu_get(dtpm_cpu->cpu);
+
+       cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
+
+       nr_cpus = cpumask_weight(&cpus);
+
+       for (i = 0; i < pd->nr_perf_states; i++) {
+
+               power = pd->table[i].power * MICROWATT_PER_MILLIWATT * nr_cpus;
+
+               if (power > power_limit)
+                       break;
+       }
+
+       freq = pd->table[i - 1].frequency;
+
+       freq_qos_update_request(&dtpm_cpu->qos_req, freq);
+
+       power_limit = pd->table[i - 1].power *
+               MICROWATT_PER_MILLIWATT * nr_cpus;
+
+       return power_limit;
+}
+
+static u64 get_pd_power_uw(struct dtpm *dtpm)
+{
+       struct dtpm_cpu *dtpm_cpu = dtpm->private;
+       struct em_perf_domain *pd;
+       struct cpumask cpus;
+       unsigned long freq;
+       int i, nr_cpus;
+
+       pd = em_cpu_get(dtpm_cpu->cpu);
+       freq = cpufreq_quick_get(dtpm_cpu->cpu);
+       cpumask_and(&cpus, cpu_online_mask, to_cpumask(pd->cpus));
+       nr_cpus = cpumask_weight(&cpus);
+
+       for (i = 0; i < pd->nr_perf_states; i++) {
+
+               if (pd->table[i].frequency < freq)
+                       continue;
+
+               return pd->table[i].power *
+                       MICROWATT_PER_MILLIWATT * nr_cpus;
+       }
+
+       return 0;
+}
+
+static void pd_release(struct dtpm *dtpm)
+{
+       struct dtpm_cpu *dtpm_cpu = dtpm->private;
+
+       if (freq_qos_request_active(&dtpm_cpu->qos_req))
+               freq_qos_remove_request(&dtpm_cpu->qos_req);
+
+       kfree(dtpm_cpu);
+}
+
+static struct dtpm_ops dtpm_ops = {
+       .set_power_uw = set_pd_power_limit,
+       .get_power_uw = get_pd_power_uw,
+       .release = pd_release,
+};
+
+static int cpuhp_dtpm_cpu_offline(unsigned int cpu)
+{
+       struct cpufreq_policy *policy;
+       struct em_perf_domain *pd;
+       struct dtpm *dtpm;
+
+       policy = cpufreq_cpu_get(cpu);
+
+       if (!policy)
+               return 0;
+
+       pd = em_cpu_get(cpu);
+       if (!pd)
+               return -EINVAL;
+
+       dtpm = per_cpu(dtpm_per_cpu, cpu);
+
+       power_sub(dtpm, pd);
+
+       if (cpumask_weight(policy->cpus) != 1)
+               return 0;
+
+       for_each_cpu(cpu, policy->related_cpus)
+               per_cpu(dtpm_per_cpu, cpu) = NULL;
+
+       dtpm_unregister(dtpm);
+
+       return 0;
+}
+
+static int cpuhp_dtpm_cpu_online(unsigned int cpu)
+{
+       struct dtpm *dtpm;
+       struct dtpm_cpu *dtpm_cpu;
+       struct cpufreq_policy *policy;
+       struct em_perf_domain *pd;
+       char name[CPUFREQ_NAME_LEN];
+       int ret = -ENOMEM;
+
+       policy = cpufreq_cpu_get(cpu);
+
+       if (!policy)
+               return 0;
+
+       pd = em_cpu_get(cpu);
+       if (!pd)
+               return -EINVAL;
+
+       dtpm = per_cpu(dtpm_per_cpu, cpu);
+       if (dtpm)
+               return power_add(dtpm, pd);
+
+       dtpm = dtpm_alloc(&dtpm_ops);
+       if (!dtpm)
+               return -EINVAL;
+
+       dtpm_cpu = kzalloc(sizeof(dtpm_cpu), GFP_KERNEL);
+       if (!dtpm_cpu)
+               goto out_kfree_dtpm;
+
+       dtpm->private = dtpm_cpu;
+       dtpm_cpu->cpu = cpu;
+
+       for_each_cpu(cpu, policy->related_cpus)
+               per_cpu(dtpm_per_cpu, cpu) = dtpm;
+
+       sprintf(name, "cpu%d", dtpm_cpu->cpu);
+
+       ret = dtpm_register(name, dtpm, __parent);
+       if (ret)
+               goto out_kfree_dtpm_cpu;
+
+       ret = power_add(dtpm, pd);
+       if (ret)
+               goto out_dtpm_unregister;
+
+       ret = freq_qos_add_request(&policy->constraints,
+                                  &dtpm_cpu->qos_req, FREQ_QOS_MAX,
+                                  pd->table[pd->nr_perf_states - 1].frequency);
+       if (ret)
+               goto out_power_sub;
+
+       return 0;
+
+out_power_sub:
+       power_sub(dtpm, pd);
+
+out_dtpm_unregister:
+       dtpm_unregister(dtpm);
+       dtpm_cpu = NULL;
+       dtpm = NULL;
+
+out_kfree_dtpm_cpu:
+       for_each_cpu(cpu, policy->related_cpus)
+               per_cpu(dtpm_per_cpu, cpu) = NULL;
+       kfree(dtpm_cpu);
+
+out_kfree_dtpm:
+       kfree(dtpm);
+       return ret;
+}
+
+int dtpm_register_cpu(struct dtpm *parent)
+{
+       __parent = parent;
+
+       return cpuhp_setup_state(CPUHP_AP_DTPM_CPU_ONLINE,
+                                "dtpm_cpu:online",
+                                cpuhp_dtpm_cpu_online,
+                                cpuhp_dtpm_cpu_offline);
+}
index 0042ef362511d431ff74d322373db1abe0c6726a..ee09a39627d6ecf8af667c58cc968144b5b8af45 100644 (file)
@@ -193,6 +193,7 @@ enum cpuhp_state {
        CPUHP_AP_ONLINE_DYN_END         = CPUHP_AP_ONLINE_DYN + 30,
        CPUHP_AP_X86_HPET_ONLINE,
        CPUHP_AP_X86_KVM_CLK_ONLINE,
+       CPUHP_AP_DTPM_CPU_ONLINE,
        CPUHP_AP_ACTIVE,
        CPUHP_ONLINE,
 };
index 7a1d0b50e334f4d5d8d06bc771d14a0a82865d9e..e80a332e3d8a1f5dd1c099434a42bccba502a7d1 100644 (file)
@@ -72,4 +72,6 @@ void dtpm_unregister(struct dtpm *dtpm);
 
 int dtpm_register(const char *name, struct dtpm *dtpm, struct dtpm *parent);
 
+int dtpm_register_cpu(struct dtpm *parent);
+
 #endif