rcu: Add full-sized polling for get_completed*() and poll_state*()
authorPaul E. McKenney <paulmck@kernel.org>
Thu, 28 Jul 2022 22:37:05 +0000 (15:37 -0700)
committerPaul E. McKenney <paulmck@kernel.org>
Wed, 31 Aug 2022 12:08:07 +0000 (05:08 -0700)
The get_completed_synchronize_rcu() and poll_state_synchronize_rcu()
APIs compress the combined expedited and normal grace-period states into a
single unsigned long, which conserves storage, but can miss grace periods
in certain cases involving overlapping normal and expedited grace periods.
Missing the occasional grace period is usually not a problem, but there
are use cases that care about each and every grace period.

This commit therefore adds the first members of the full-state RCU
grace-period polling API, namely the get_completed_synchronize_rcu_full()
and poll_state_synchronize_rcu_full() functions.  These use up to three
times the storage (rcu_gp_oldstate structure instead of unsigned long),
but which are guaranteed not to miss grace periods, at least in situations
where the single-CPU grace-period optimization does not apply.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
include/linux/rcupdate.h
include/linux/rcutiny.h
include/linux/rcutree.h
kernel/rcu/rcutorture.c
kernel/rcu/tiny.c
kernel/rcu/tree.c

index f527f27e643878308bc2f288bad0c07226544ffe..faaa174dfb27c9aa43384058324bdb0e1529024f 100644 (file)
@@ -42,7 +42,10 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func);
 void rcu_barrier_tasks(void);
 void rcu_barrier_tasks_rude(void);
 void synchronize_rcu(void);
+
+struct rcu_gp_oldstate;
 unsigned long get_completed_synchronize_rcu(void);
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
 
 #ifdef CONFIG_PREEMPT_RCU
 
index 62815c0a2dcefc9c53cfc8b3e838bcab7a5c3ee3..1fbff8600d92db75ff393a62eca1c22e23104246 100644 (file)
 
 #include <asm/param.h> /* for HZ */
 
+struct rcu_gp_oldstate {
+       unsigned long rgos_norm;
+};
+
 unsigned long get_state_synchronize_rcu(void);
 unsigned long start_poll_synchronize_rcu(void);
 bool poll_state_synchronize_rcu(unsigned long oldstate);
 
+static inline bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+       return poll_state_synchronize_rcu(rgosp->rgos_norm);
+}
+
 static inline void cond_synchronize_rcu(unsigned long oldstate)
 {
        might_sleep();
index 47eaa4cb0df729a9b7e4ef242ae5a6d153fda057..4ccbc3aa9dc20895b1f45fd05985beb78aa7fcca 100644 (file)
@@ -40,11 +40,19 @@ bool rcu_eqs_special_set(int cpu);
 void rcu_momentary_dyntick_idle(void);
 void kfree_rcu_scheduler_running(void);
 bool rcu_gp_might_be_stalled(void);
+
+struct rcu_gp_oldstate {
+       unsigned long rgos_norm;
+       unsigned long rgos_exp;
+       unsigned long rgos_polled;
+};
+
 unsigned long start_poll_synchronize_rcu_expedited(void);
 void cond_synchronize_rcu_expedited(unsigned long oldstate);
 unsigned long get_state_synchronize_rcu(void);
 unsigned long start_poll_synchronize_rcu(void);
 bool poll_state_synchronize_rcu(unsigned long oldstate);
+bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp);
 void cond_synchronize_rcu(unsigned long oldstate);
 
 bool rcu_is_idle_cpu(int cpu);
index d8e1b270a065f9d5133f2b3a053da5c7f5e0e3fe..b31e6ed64d1b96d0a788e78d21fdad07243d047f 100644 (file)
@@ -336,8 +336,10 @@ struct rcu_torture_ops {
        void (*cond_sync_exp)(unsigned long oldstate);
        unsigned long (*get_gp_state)(void);
        unsigned long (*get_gp_completed)(void);
+       void (*get_gp_completed_full)(struct rcu_gp_oldstate *rgosp);
        unsigned long (*start_gp_poll)(void);
        bool (*poll_gp_state)(unsigned long oldstate);
+       bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp);
        void (*cond_sync)(unsigned long oldstate);
        call_rcu_func_t call;
        void (*cb_barrier)(void);
@@ -503,8 +505,10 @@ static struct rcu_torture_ops rcu_ops = {
        .exp_sync               = synchronize_rcu_expedited,
        .get_gp_state           = get_state_synchronize_rcu,
        .get_gp_completed       = get_completed_synchronize_rcu,
+       .get_gp_completed_full  = get_completed_synchronize_rcu_full,
        .start_gp_poll          = start_poll_synchronize_rcu,
        .poll_gp_state          = poll_state_synchronize_rcu,
+       .poll_gp_state_full     = poll_state_synchronize_rcu_full,
        .cond_sync              = cond_synchronize_rcu,
        .get_gp_state_exp       = get_state_synchronize_rcu,
        .start_gp_poll_exp      = start_poll_synchronize_rcu_expedited,
@@ -1212,6 +1216,7 @@ rcu_torture_writer(void *arg)
        bool boot_ended;
        bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
        unsigned long cookie;
+       struct rcu_gp_oldstate cookie_full;
        int expediting = 0;
        unsigned long gp_snap;
        int i;
@@ -1277,6 +1282,10 @@ rcu_torture_writer(void *arg)
                                }
                                cur_ops->readunlock(idx);
                        }
+                       if (cur_ops->get_gp_completed_full && cur_ops->poll_gp_state_full) {
+                               cur_ops->get_gp_completed_full(&cookie_full);
+                               WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full));
+                       }
                        switch (synctype[torture_random(&rand) % nsynctypes]) {
                        case RTWS_DEF_FREE:
                                rcu_torture_writer_state = RTWS_DEF_FREE;
index f0561ee16b9c25bc82ce26cd750a1a70962e058b..435edc785412c3e2e08ef8f93434e556c2e32a9c 100644 (file)
@@ -183,6 +183,16 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
 }
 EXPORT_SYMBOL_GPL(call_rcu);
 
+/*
+ * Store a grace-period-counter "cookie".  For more information,
+ * see the Tree RCU header comment.
+ */
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+       rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
+}
+EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
+
 /*
  * Return a grace-period-counter "cookie".  For more information,
  * see the Tree RCU header comment.
index 79aea7df4345e417ec4ed75646902f9eb2227f1a..d47c9b6d8106617db1f821e72bfa4e59c4ed6ac0 100644 (file)
@@ -3522,6 +3522,22 @@ void synchronize_rcu(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
+/**
+ * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie
+ * @rgosp: Place to put state cookie
+ *
+ * Stores into @rgosp a value that will always be treated by functions
+ * like poll_state_synchronize_rcu_full() as a cookie whose grace period
+ * has already completed.
+ */
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+       rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
+       rgosp->rgos_exp = RCU_GET_STATE_COMPLETED;
+       rgosp->rgos_polled = RCU_GET_STATE_COMPLETED;
+}
+EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
+
 /**
  * get_state_synchronize_rcu - Snapshot current RCU state
  *
@@ -3580,7 +3596,7 @@ unsigned long start_poll_synchronize_rcu(void)
 EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
 
 /**
- * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period
+ * poll_state_synchronize_rcu - Has the specified RCU grace period completed?
  *
  * @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu()
  *
@@ -3595,9 +3611,10 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
  * But counter wrap is harmless.  If the counter wraps, we have waited for
  * more than a billion grace periods (and way more on a 64-bit system!).
  * Those needing to keep oldstate values for very long time periods
- * (many hours even on 32-bit systems) should check them occasionally
- * and either refresh them or set a flag indicating that the grace period
- * has completed.
+ * (many hours even on 32-bit systems) should check them occasionally and
+ * either refresh them or set a flag indicating that the grace period has
+ * completed.  Alternatively, they can use get_completed_synchronize_rcu()
+ * to get a guaranteed-completed grace-period state.
  *
  * This function provides the same memory-ordering guarantees that
  * would be provided by a synchronize_rcu() that was invoked at the call
@@ -3615,6 +3632,57 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
 
+/**
+ * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed?
+ * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full()
+ *
+ * If a full RCU grace period has elapsed since the earlier call from
+ * which *rgosp was obtained, return @true, otherwise return @false.
+ * If @false is returned, it is the caller's responsibility to invoke this
+ * function later on until it does return @true.  Alternatively, the caller
+ * can explicitly wait for a grace period, for example, by passing @rgosp
+ * to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
+ *
+ * Yes, this function does not take counter wrap into account.
+ * But counter wrap is harmless.  If the counter wraps, we have waited
+ * for more than a billion grace periods (and way more on a 64-bit
+ * system!).  Those needing to keep rcu_gp_oldstate values for very
+ * long time periods (many hours even on 32-bit systems) should check
+ * them occasionally and either refresh them or set a flag indicating
+ * that the grace period has completed.  Alternatively, they can use
+ * get_completed_synchronize_rcu_full() to get a guaranteed-completed
+ * grace-period state.
+ *
+ * This function provides the same memory-ordering guarantees that would
+ * be provided by a synchronize_rcu() that was invoked at the call to
+ * the function that provided @rgosp, and that returned at the end of this
+ * function.  And this guarantee requires that the root rcu_node structure's
+ * ->gp_seq field be checked instead of that of the rcu_state structure.
+ * The problem is that the just-ending grace-period's callbacks can be
+ * invoked between the time that the root rcu_node structure's ->gp_seq
+ * field is updated and the time that the rcu_state structure's ->gp_seq
+ * field is updated.  Therefore, if a single synchronize_rcu() is to
+ * cause a subsequent poll_state_synchronize_rcu_full() to return @true,
+ * then the root rcu_node structure is the one that needs to be polled.
+ */
+bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       smp_mb(); // Order against root rcu_node structure grace-period cleanup.
+       if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED ||
+           rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) ||
+           rgosp->rgos_exp == RCU_GET_STATE_COMPLETED ||
+           rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp) ||
+           rgosp->rgos_polled == RCU_GET_STATE_COMPLETED ||
+           rcu_seq_done_exact(&rcu_state.gp_seq_polled, rgosp->rgos_polled)) {
+               smp_mb(); /* Ensure GP ends before subsequent accesses. */
+               return true;
+       }
+       return false;
+}
+EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full);
+
 /**
  * cond_synchronize_rcu - Conditionally wait for an RCU grace period
  *