From 82586a721595fde2c1bc0dac7deed2fe9d42c106 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 13 Apr 2022 17:30:32 +0200
Subject: [PATCH] PM: runtime: Avoid device usage count underflows

A PM-runtime device usage count underflow is potentially critical,
because it may cause a device to be suspended when it is expected to
be operational.  It is also a programming problem that would be good
to catch and warn about.

For this reason, (1) make rpm_check_suspend_allowed() return an error
when the device usage count is negative to prevent devices from being
suspended in that case, (2) introduce rpm_drop_usage_count() that will
detect device usage count underflows, warn about them and fix them up,
and (3) use it to drop the usage count in a few places instead of
atomic_dec_and_test().

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/base/power/runtime.c | 44 ++++++++++++++++++++++++++++++------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index d4059e6ffeaec..23bea81160e5c 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -263,7 +263,7 @@ static int rpm_check_suspend_allowed(struct device *dev)
 		retval = -EINVAL;
 	else if (dev->power.disable_depth > 0)
 		retval = -EACCES;
-	else if (atomic_read(&dev->power.usage_count) > 0)
+	else if (atomic_read(&dev->power.usage_count))
 		retval = -EAGAIN;
 	else if (!dev->power.ignore_children &&
 			atomic_read(&dev->power.child_count))
@@ -1039,13 +1039,33 @@ int pm_schedule_suspend(struct device *dev, unsigned int delay)
 }
 EXPORT_SYMBOL_GPL(pm_schedule_suspend);
 
+static int rpm_drop_usage_count(struct device *dev)
+{
+	int ret;
+
+	ret = atomic_sub_return(1, &dev->power.usage_count);
+	if (ret >= 0)
+		return ret;
+
+	/*
+	 * Because rpm_resume() does not check the usage counter, it will resume
+	 * the device even if the usage counter is 0 or negative, so it is
+	 * sufficient to increment the usage counter here to reverse the change
+	 * made above.
+	 */
+	atomic_inc(&dev->power.usage_count);
+	dev_warn(dev, "Runtime PM usage count underflow!\n");
+	return -EINVAL;
+}
+
 /**
  * __pm_runtime_idle - Entry point for runtime idle operations.
  * @dev: Device to send idle notification for.
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out an idle
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out an idle
  * notification, either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1057,7 +1077,10 @@ int __pm_runtime_idle(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count)) {
+		retval = rpm_drop_usage_count(dev);
+		if (retval < 0) {
+			return retval;
+		} else if (retval > 0) {
 			trace_rpm_usage_rcuidle(dev, rpmflags);
 			return 0;
 		}
@@ -1079,7 +1102,8 @@ EXPORT_SYMBOL_GPL(__pm_runtime_idle);
  * @rpmflags: Flag bits.
  *
  * If the RPM_GET_PUT flag is set, decrement the device's usage count and
- * return immediately if it is larger than zero.  Then carry out a suspend,
+ * return immediately if it is larger than zero (if it becomes negative, log a
+ * warning, increment it, and return an error).  Then carry out a suspend,
  * either synchronous or asynchronous.
  *
  * This routine may be called in atomic context if the RPM_ASYNC flag is set,
@@ -1091,7 +1115,10 @@ int __pm_runtime_suspend(struct device *dev, int rpmflags)
 	int retval;
 
 	if (rpmflags & RPM_GET_PUT) {
-		if (!atomic_dec_and_test(&dev->power.usage_count)) {
+		retval = rpm_drop_usage_count(dev);
+		if (retval < 0) {
+			return retval;
+		} else if (retval > 0) {
 			trace_rpm_usage_rcuidle(dev, rpmflags);
 			return 0;
 		}
@@ -1527,14 +1554,17 @@ EXPORT_SYMBOL_GPL(pm_runtime_forbid);
  */
 void pm_runtime_allow(struct device *dev)
 {
+	int ret;
+
 	spin_lock_irq(&dev->power.lock);
 	if (dev->power.runtime_auto)
 		goto out;
 
 	dev->power.runtime_auto = true;
-	if (atomic_dec_and_test(&dev->power.usage_count))
+	ret = rpm_drop_usage_count(dev);
+	if (ret == 0)
 		rpm_idle(dev, RPM_AUTO | RPM_ASYNC);
-	else
+	else if (ret > 0)
 		trace_rpm_usage_rcuidle(dev, RPM_AUTO | RPM_ASYNC);
 
  out:
-- 
2.30.2