*  |--------------------------------------------------------------------------|
  *  |                           SEGCBLIST_RCU_CORE   |                         |
  *  |                           SEGCBLIST_LOCKING    |                         |
+ *  |                           SEGCBLIST_OFFLOADED  |                         |
+ *  |                           SEGCBLIST_KTHREAD_CB |                         |
+ *  |                           SEGCBLIST_KTHREAD_GP                           |
+ *  |                                                                          |
+ *  |   CB/GP kthreads handle callbacks holding nocb_lock, local rcu_core()    |
+ *  |   handles callbacks concurrently. Bypass enqueue is enabled.             |
+ *  |   Invoke RCU core so we make sure not to preempt it in the middle with   |
+ *  |   leaving some urgent work unattended within a jiffy.                    |
+ *  ----------------------------------------------------------------------------
+ *                                      |
+ *                                      v
+ *  |--------------------------------------------------------------------------|
+ *  |                           SEGCBLIST_RCU_CORE   |                         |
+ *  |                           SEGCBLIST_LOCKING    |                         |
  *  |                           SEGCBLIST_KTHREAD_CB |                         |
  *  |                           SEGCBLIST_KTHREAD_GP                           |
  *  |                                                                          |
 
  */
 void rcu_segcblist_offload(struct rcu_segcblist *rsclp, bool offload)
 {
-       if (offload) {
+       if (offload)
                rcu_segcblist_set_flags(rsclp, SEGCBLIST_LOCKING | SEGCBLIST_OFFLOADED);
-       } else {
-               rcu_segcblist_set_flags(rsclp, SEGCBLIST_RCU_CORE);
+       else
                rcu_segcblist_clear_flags(rsclp, SEGCBLIST_OFFLOADED);
-       }
 }
 
 /*
 
        unsigned long flags;
        struct rcu_data *rdp = raw_cpu_ptr(&rcu_data);
        struct rcu_node *rnp = rdp->mynode;
+       /*
+        * On RT rcu_core() can be preempted when IRQs aren't disabled.
+        * Therefore this function can race with concurrent NOCB (de-)offloading
+        * on this CPU and the below condition must be considered volatile.
+        * However if we race with:
+        *
+        * _ Offloading:   In the worst case we accelerate or process callbacks
+        *                 concurrently with NOCB kthreads. We are guaranteed to
+        *                 call rcu_nocb_lock() if that happens.
+        *
+        * _ Deoffloading: In the worst case we miss callbacks acceleration or
+        *                 processing. This is fine because the early stage
+        *                 of deoffloading invokes rcu_core() after setting
+        *                 SEGCBLIST_RCU_CORE. So we guarantee that we'll process
+        *                 what could have been dismissed without the need to wait
+        *                 for the next rcu_pending() check in the next jiffy.
+        */
        const bool do_batch = !rcu_segcblist_completely_offloaded(&rdp->cblist);
 
        if (cpu_is_offline(smp_processor_id()))
 
         * will refuse to put anything into the bypass.
         */
        WARN_ON_ONCE(!rcu_nocb_flush_bypass(rdp, NULL, jiffies));
+       /*
+        * Start with invoking rcu_core() early. This way if the current thread
+        * happens to preempt an ongoing call to rcu_core() in the middle,
+        * leaving some work dismissed because rcu_core() still thinks the rdp is
+        * completely offloaded, we are guaranteed a nearby future instance of
+        * rcu_core() to catch up.
+        */
+       rcu_segcblist_set_flags(cblist, SEGCBLIST_RCU_CORE);
+       invoke_rcu_core();
        ret = rdp_offload_toggle(rdp, false, flags);
        swait_event_exclusive(rdp->nocb_state_wq,
                              !rcu_segcblist_test_flags(cblist, SEGCBLIST_KTHREAD_CB |