!context_tracking_enabled())
return 0;
- prev_ctx = this_cpu_read(context_tracking.state);
+ prev_ctx = __ct_state();
if (prev_ctx != CONTEXT_KERNEL)
ct_user_exit(prev_ctx);
__ct_user_exit(CONTEXT_GUEST);
}
-/**
- * ct_state() - return the current context tracking state if known
- *
- * Returns the current cpu's context tracking state if context tracking
- * is enabled. If context tracking is disabled, returns
- * CONTEXT_DISABLED. This should be used primarily for debugging.
- */
-static __always_inline enum ctx_state ct_state(void)
-{
- return context_tracking_enabled() ?
- this_cpu_read(context_tracking.state) : CONTEXT_DISABLED;
-}
+#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
+
#else
static inline void user_enter(void) { }
static inline void user_exit(void) { }
static inline void user_enter_irqoff(void) { }
static inline void user_exit_irqoff(void) { }
-static inline enum ctx_state exception_enter(void) { return 0; }
+static inline int exception_enter(void) { return 0; }
static inline void exception_exit(enum ctx_state prev_ctx) { }
-static inline enum ctx_state ct_state(void) { return CONTEXT_DISABLED; }
+static inline int ct_state(void) { return -1; }
static __always_inline bool context_tracking_guest_enter(void) { return false; }
static inline void context_tracking_guest_exit(void) { }
-
+#define CT_WARN_ON(cond) do { } while (0)
#endif /* !CONFIG_CONTEXT_TRACKING_USER */
-#define CT_WARN_ON(cond) WARN_ON(context_tracking_enabled() && (cond))
-
#ifdef CONFIG_CONTEXT_TRACKING_USER_FORCE
extern void context_tracking_init(void);
#else
*/
static __always_inline bool rcu_dynticks_curr_cpu_in_eqs(void)
{
- return !(arch_atomic_read(this_cpu_ptr(&context_tracking.dynticks)) & 0x1);
+ return !(arch_atomic_read(this_cpu_ptr(&context_tracking.state)) & RCU_DYNTICKS_IDX);
}
/*
- * Increment the current CPU's context_tracking structure's ->dynticks field
+ * Increment the current CPU's context_tracking structure's ->state field
* with ordering. Return the new value.
*/
-static __always_inline unsigned long rcu_dynticks_inc(int incby)
+static __always_inline unsigned long ct_state_inc(int incby)
{
- return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.dynticks));
+ return arch_atomic_add_return(incby, this_cpu_ptr(&context_tracking.state));
}
#else
#include <linux/static_key.h>
#include <linux/context_tracking_irq.h>
+/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
+#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
+
enum ctx_state {
- CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */
- CONTEXT_KERNEL = 0,
- CONTEXT_USER,
- CONTEXT_GUEST,
+ CONTEXT_DISABLED = -1, /* returned by ct_state() if unknown */
+ CONTEXT_KERNEL = 0,
+ CONTEXT_IDLE = 1,
+ CONTEXT_USER = 2,
+ CONTEXT_GUEST = 3,
+ CONTEXT_MAX = 4,
};
-/* Offset to allow distinguishing irq vs. task-based idle entry/exit. */
-#define DYNTICK_IRQ_NONIDLE ((LONG_MAX / 2) + 1)
+/* Even value for idle, else odd. */
+#define RCU_DYNTICKS_IDX CONTEXT_MAX
+
+#define CT_STATE_MASK (CONTEXT_MAX - 1)
+#define CT_DYNTICKS_MASK (~CT_STATE_MASK)
struct context_tracking {
#ifdef CONFIG_CONTEXT_TRACKING_USER
*/
bool active;
int recursion;
- enum ctx_state state;
+#endif
+#ifdef CONFIG_CONTEXT_TRACKING
+ atomic_t state;
#endif
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
- atomic_t dynticks; /* Even value for idle, else odd. */
long dynticks_nesting; /* Track process nesting level. */
long dynticks_nmi_nesting; /* Track irq/NMI nesting level. */
#endif
#ifdef CONFIG_CONTEXT_TRACKING
DECLARE_PER_CPU(struct context_tracking, context_tracking);
+
+static __always_inline int __ct_state(void)
+{
+ return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_STATE_MASK;
+}
#endif
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
static __always_inline int ct_dynticks(void)
{
- return atomic_read(this_cpu_ptr(&context_tracking.dynticks));
+ return atomic_read(this_cpu_ptr(&context_tracking.state)) & CT_DYNTICKS_MASK;
}
static __always_inline int ct_dynticks_cpu(int cpu)
{
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
- return atomic_read(&ct->dynticks);
+ return atomic_read(&ct->state) & CT_DYNTICKS_MASK;
}
static __always_inline int ct_dynticks_cpu_acquire(int cpu)
{
struct context_tracking *ct = per_cpu_ptr(&context_tracking, cpu);
- return atomic_read_acquire(&ct->dynticks);
+ return atomic_read_acquire(&ct->state) & CT_DYNTICKS_MASK;
}
static __always_inline long ct_dynticks_nesting(void)
return context_tracking_enabled() && __this_cpu_read(context_tracking.active);
}
+/**
+ * ct_state() - return the current context tracking state if known
+ *
+ * Returns the current cpu's context tracking state if context tracking
+ * is enabled. If context tracking is disabled, returns
+ * CONTEXT_DISABLED. This should be used primarily for debugging.
+ */
+static __always_inline int ct_state(void)
+{
+ int ret;
+
+ if (!context_tracking_enabled())
+ return CONTEXT_DISABLED;
+
+ preempt_disable();
+ ret = __ct_state();
+ preempt_enable();
+
+ return ret;
+}
+
#else
static __always_inline bool context_tracking_enabled(void) { return false; }
static __always_inline bool context_tracking_enabled_cpu(int cpu) { return false; }
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
.dynticks_nesting = 1,
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
- .dynticks = ATOMIC_INIT(1),
#endif
+ .state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
};
EXPORT_SYMBOL_GPL(context_tracking);
* RCU is watching prior to the call to this function and is no longer
* watching upon return.
*/
-static noinstr void rcu_dynticks_eqs_enter(void)
+static noinstr void ct_kernel_exit_state(int offset)
{
int seq;
* next idle sojourn.
*/
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
- seq = rcu_dynticks_inc(1);
+ seq = ct_state_inc(offset);
// RCU is no longer watching. Better be in extended quiescent state!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
}
/*
* called from an extended quiescent state, that is, RCU is not watching
* prior to the call to this function and is watching upon return.
*/
-static noinstr void rcu_dynticks_eqs_exit(void)
+static noinstr void ct_kernel_enter_state(int offset)
{
int seq;
* and we also must force ordering with the next RCU read-side
* critical section.
*/
- seq = rcu_dynticks_inc(1);
+ seq = ct_state_inc(offset);
// RCU is now watching. Better not be in an extended quiescent state!
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
- WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
+ WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
}
/*
* the possibility of usermode upcalls having messed up our count
* of interrupt nesting level during the prior busy period.
*/
-static void noinstr rcu_eqs_enter(bool user)
+static void noinstr ct_kernel_exit(bool user, int offset)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
rcu_preempt_deferred_qs(current);
- // instrumentation for the noinstr rcu_dynticks_eqs_enter()
- instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+ // instrumentation for the noinstr ct_kernel_exit_state()
+ instrument_atomic_write(&ct->state, sizeof(ct->state));
instrumentation_end();
WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
// RCU is watching here ...
- rcu_dynticks_eqs_enter();
+ ct_kernel_exit_state(offset);
// ... but is no longer watching here.
rcu_dynticks_task_enter();
}
* allow for the possibility of usermode upcalls messing up our count of
* interrupt nesting level during the busy period that is just now starting.
*/
-static void noinstr rcu_eqs_exit(bool user)
+static void noinstr ct_kernel_enter(bool user, int offset)
{
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
long oldval;
}
rcu_dynticks_task_exit();
// RCU is not watching here ...
- rcu_dynticks_eqs_exit();
+ ct_kernel_enter_state(offset);
// ... but is watching here.
instrumentation_begin();
- // instrumentation for the noinstr rcu_dynticks_eqs_exit()
- instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+ // instrumentation for the noinstr ct_kernel_enter_state()
+ instrument_atomic_write(&ct->state, sizeof(ct->state));
trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
* ct_nmi_exit - inform RCU of exit from NMI context
*
* If we are returning from the outermost NMI handler that interrupted an
- * RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
+ * RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
* to let the RCU grace-period handling know that the CPU is back to
* being RCU-idle.
*
trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
- // instrumentation for the noinstr rcu_dynticks_eqs_enter()
- instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+ // instrumentation for the noinstr ct_kernel_exit_state()
+ instrument_atomic_write(&ct->state, sizeof(ct->state));
instrumentation_end();
// RCU is watching here ...
- rcu_dynticks_eqs_enter();
+ ct_kernel_exit_state(RCU_DYNTICKS_IDX);
// ... but is no longer watching here.
if (!in_nmi())
/**
* ct_nmi_enter - inform RCU of entry to NMI context
*
- * If the CPU was idle from RCU's viewpoint, update ct->dynticks and
+ * If the CPU was idle from RCU's viewpoint, update ct->state and
* ct->dynticks_nmi_nesting to let the RCU grace-period handling know
* that the CPU is active. This implementation permits nested NMIs, as
* long as the nesting level does not overflow an int. (You will probably
rcu_dynticks_task_exit();
// RCU is not watching here ...
- rcu_dynticks_eqs_exit();
+ ct_kernel_enter_state(RCU_DYNTICKS_IDX);
// ... but is watching here.
instrumentation_begin();
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
- instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
- // instrumentation for the noinstr rcu_dynticks_eqs_exit()
- instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
+ instrument_atomic_read(&ct->state, sizeof(ct->state));
+ // instrumentation for the noinstr ct_kernel_enter_state()
+ instrument_atomic_write(&ct->state, sizeof(ct->state));
incby = 1;
} else if (!in_nmi()) {
void noinstr ct_idle_enter(void)
{
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
- rcu_eqs_enter(false);
+ ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
}
EXPORT_SYMBOL_GPL(ct_idle_enter);
unsigned long flags;
raw_local_irq_save(flags);
- rcu_eqs_exit(false);
+ ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
raw_local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(ct_idle_exit);
local_irq_restore(flags);
}
#else
-static __always_inline void rcu_eqs_enter(bool user) { }
-static __always_inline void rcu_eqs_exit(bool user) { }
+static __always_inline void ct_kernel_exit(bool user, int offset) { }
+static __always_inline void ct_kernel_enter(bool user, int offset) { }
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
#ifdef CONFIG_CONTEXT_TRACKING_USER
*/
void noinstr __ct_user_enter(enum ctx_state state)
{
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
lockdep_assert_irqs_disabled();
/* Kernel threads aren't supposed to go to userspace */
if (!context_tracking_recursion_enter())
return;
- if ( __this_cpu_read(context_tracking.state) != state) {
- if (__this_cpu_read(context_tracking.active)) {
+ if (__ct_state() != state) {
+ if (ct->active) {
/*
* At this stage, only low level arch entry code remains and
* then we'll run in userspace. We can assume there won't be
* that will fire and reschedule once we resume in user/guest mode.
*/
rcu_irq_work_resched();
+
/*
* Enter RCU idle mode right before resuming userspace. No use of RCU
* is permitted between this call and rcu_eqs_exit(). This way the
* CPU doesn't need to maintain the tick for RCU maintenance purposes
* when the CPU runs in userspace.
*/
- rcu_eqs_enter(true);
+ ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
+
+ /*
+ * Special case if we only track user <-> kernel transitions for tickless
+ * cputime accounting but we don't support RCU extended quiescent state.
+ * In this we case we don't care about any concurrency/ordering.
+ */
+ if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
+ atomic_set(&ct->state, state);
+ } else {
+ /*
+ * Even if context tracking is disabled on this CPU, because it's outside
+ * the full dynticks mask for example, we still have to keep track of the
+ * context transitions and states to prevent inconsistency on those of
+ * other CPUs.
+ * If a task triggers an exception in userspace, sleep on the exception
+ * handler and then migrate to another CPU, that new CPU must know where
+ * the exception returns by the time we call exception_exit().
+ * This information can only be provided by the previous CPU when it called
+ * exception_enter().
+ * OTOH we can spare the calls to vtime and RCU when context_tracking.active
+ * is false because we know that CPU is not tickless.
+ */
+ if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
+ /* Tracking for vtime only, no concurrent RCU EQS accounting */
+ atomic_set(&ct->state, state);
+ } else {
+ /*
+ * Tracking for vtime and RCU EQS. Make sure we don't race
+ * with NMIs. OTOH we don't care about ordering here since
+ * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+ * ordered.
+ */
+ atomic_add(state, &ct->state);
+ }
}
- /*
- * Even if context tracking is disabled on this CPU, because it's outside
- * the full dynticks mask for example, we still have to keep track of the
- * context transitions and states to prevent inconsistency on those of
- * other CPUs.
- * If a task triggers an exception in userspace, sleep on the exception
- * handler and then migrate to another CPU, that new CPU must know where
- * the exception returns by the time we call exception_exit().
- * This information can only be provided by the previous CPU when it called
- * exception_enter().
- * OTOH we can spare the calls to vtime and RCU when context_tracking.active
- * is false because we know that CPU is not tickless.
- */
- __this_cpu_write(context_tracking.state, state);
}
context_tracking_recursion_exit();
}
*/
void noinstr __ct_user_exit(enum ctx_state state)
{
+ struct context_tracking *ct = this_cpu_ptr(&context_tracking);
+
if (!context_tracking_recursion_enter())
return;
- if (__this_cpu_read(context_tracking.state) == state) {
- if (__this_cpu_read(context_tracking.active)) {
+ if (__ct_state() == state) {
+ if (ct->active) {
/*
* Exit RCU idle mode while entering the kernel because it can
* run a RCU read side critical section anytime.
*/
- rcu_eqs_exit(true);
+ ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
if (state == CONTEXT_USER) {
instrumentation_begin();
vtime_user_exit(current);
trace_user_exit(0);
instrumentation_end();
}
+
+ /*
+ * Special case if we only track user <-> kernel transitions for tickless
+ * cputime accounting but we don't support RCU extended quiescent state.
+ * In this we case we don't care about any concurrency/ordering.
+ */
+ if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
+ atomic_set(&ct->state, CONTEXT_KERNEL);
+
+ } else {
+ if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
+ /* Tracking for vtime only, no concurrent RCU EQS accounting */
+ atomic_set(&ct->state, CONTEXT_KERNEL);
+ } else {
+ /*
+ * Tracking for vtime and RCU EQS. Make sure we don't race
+ * with NMIs. OTOH we don't care about ordering here since
+ * RCU only requires RCU_DYNTICKS_IDX increments to be fully
+ * ordered.
+ */
+ atomic_sub(state, &ct->state);
+ }
}
- __this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
}
context_tracking_recursion_exit();
}