net: dev: Make rps_lock() disable interrupts.
authorSebastian Andrzej Siewior <bigeasy@linutronix.de>
Fri, 11 Feb 2022 23:38:39 +0000 (00:38 +0100)
committerDavid S. Miller <davem@davemloft.net>
Mon, 14 Feb 2022 13:38:35 +0000 (13:38 +0000)
Disabling interrupts and in the RPS case locking input_pkt_queue is
split into local_irq_disable() and optional spin_lock().

This breaks on PREEMPT_RT because the spinlock_t typed lock can not be
acquired with disabled interrupts.
The sections in which the lock is acquired is usually short in a sense that it
is not causing long und unbounded latiencies. One exception is the
skb_flow_limit() invocation which may invoke a BPF program (and may
require sleeping locks).

By moving local_irq_disable() + spin_lock() into rps_lock(), we can keep
interrupts disabled on !PREEMPT_RT and enabled on PREEMPT_RT kernels.
Without RPS on a PREEMPT_RT kernel, the needed synchronisation happens
as part of local_bh_disable() on the local CPU.
____napi_schedule() is only invoked if sd is from the local CPU. Replace
it with __napi_schedule_irqoff() which already disables interrupts on
PREEMPT_RT as needed. Move this call to rps_ipi_queued() and rename the
function to napi_schedule_rps as suggested by Jakub.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/core/dev.c

index ce5047e8db1f0e2cebba1fba6688f96c34183778..909fb381591084a91f7be2da981d813abb5f4b53 100644 (file)
@@ -216,18 +216,38 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
        return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
-static inline void rps_lock(struct softnet_data *sd)
+static inline void rps_lock_irqsave(struct softnet_data *sd,
+                                   unsigned long *flags)
 {
-#ifdef CONFIG_RPS
-       spin_lock(&sd->input_pkt_queue.lock);
-#endif
+       if (IS_ENABLED(CONFIG_RPS))
+               spin_lock_irqsave(&sd->input_pkt_queue.lock, *flags);
+       else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_save(*flags);
 }
 
-static inline void rps_unlock(struct softnet_data *sd)
+static inline void rps_lock_irq_disable(struct softnet_data *sd)
 {
-#ifdef CONFIG_RPS
-       spin_unlock(&sd->input_pkt_queue.lock);
-#endif
+       if (IS_ENABLED(CONFIG_RPS))
+               spin_lock_irq(&sd->input_pkt_queue.lock);
+       else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_disable();
+}
+
+static inline void rps_unlock_irq_restore(struct softnet_data *sd,
+                                         unsigned long *flags)
+{
+       if (IS_ENABLED(CONFIG_RPS))
+               spin_unlock_irqrestore(&sd->input_pkt_queue.lock, *flags);
+       else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_restore(*flags);
+}
+
+static inline void rps_unlock_irq_enable(struct softnet_data *sd)
+{
+       if (IS_ENABLED(CONFIG_RPS))
+               spin_unlock_irq(&sd->input_pkt_queue.lock);
+       else if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_enable();
 }
 
 static struct netdev_name_node *netdev_name_node_alloc(struct net_device *dev,
@@ -4456,11 +4476,11 @@ static void rps_trigger_softirq(void *data)
  * If yes, queue it to our IPI list and return 1
  * If no, return 0
  */
-static int rps_ipi_queued(struct softnet_data *sd)
+static int napi_schedule_rps(struct softnet_data *sd)
 {
-#ifdef CONFIG_RPS
        struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
 
+#ifdef CONFIG_RPS
        if (sd != mysd) {
                sd->rps_ipi_next = mysd->rps_ipi_list;
                mysd->rps_ipi_list = sd;
@@ -4469,6 +4489,7 @@ static int rps_ipi_queued(struct softnet_data *sd)
                return 1;
        }
 #endif /* CONFIG_RPS */
+       __napi_schedule_irqoff(&mysd->backlog);
        return 0;
 }
 
@@ -4525,9 +4546,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 
        sd = &per_cpu(softnet_data, cpu);
 
-       local_irq_save(flags);
-
-       rps_lock(sd);
+       rps_lock_irqsave(sd, &flags);
        if (!netif_running(skb->dev))
                goto drop;
        qlen = skb_queue_len(&sd->input_pkt_queue);
@@ -4536,26 +4555,21 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 enqueue:
                        __skb_queue_tail(&sd->input_pkt_queue, skb);
                        input_queue_tail_incr_save(sd, qtail);
-                       rps_unlock(sd);
-                       local_irq_restore(flags);
+                       rps_unlock_irq_restore(sd, &flags);
                        return NET_RX_SUCCESS;
                }
 
                /* Schedule NAPI for backlog device
                 * We can use non atomic operation since we own the queue lock
                 */
-               if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state)) {
-                       if (!rps_ipi_queued(sd))
-                               ____napi_schedule(sd, &sd->backlog);
-               }
+               if (!__test_and_set_bit(NAPI_STATE_SCHED, &sd->backlog.state))
+                       napi_schedule_rps(sd);
                goto enqueue;
        }
 
 drop:
        sd->dropped++;
-       rps_unlock(sd);
-
-       local_irq_restore(flags);
+       rps_unlock_irq_restore(sd, &flags);
 
        atomic_long_inc(&skb->dev->rx_dropped);
        kfree_skb(skb);
@@ -5638,8 +5652,7 @@ static void flush_backlog(struct work_struct *work)
        local_bh_disable();
        sd = this_cpu_ptr(&softnet_data);
 
-       local_irq_disable();
-       rps_lock(sd);
+       rps_lock_irq_disable(sd);
        skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->input_pkt_queue);
@@ -5647,8 +5660,7 @@ static void flush_backlog(struct work_struct *work)
                        input_queue_head_incr(sd);
                }
        }
-       rps_unlock(sd);
-       local_irq_enable();
+       rps_unlock_irq_enable(sd);
 
        skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
@@ -5666,16 +5678,14 @@ static bool flush_required(int cpu)
        struct softnet_data *sd = &per_cpu(softnet_data, cpu);
        bool do_flush;
 
-       local_irq_disable();
-       rps_lock(sd);
+       rps_lock_irq_disable(sd);
 
        /* as insertion into process_queue happens with the rps lock held,
         * process_queue access may race only with dequeue
         */
        do_flush = !skb_queue_empty(&sd->input_pkt_queue) ||
                   !skb_queue_empty_lockless(&sd->process_queue);
-       rps_unlock(sd);
-       local_irq_enable();
+       rps_unlock_irq_enable(sd);
 
        return do_flush;
 #endif
@@ -5790,8 +5800,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 
                }
 
-               local_irq_disable();
-               rps_lock(sd);
+               rps_lock_irq_disable(sd);
                if (skb_queue_empty(&sd->input_pkt_queue)) {
                        /*
                         * Inline a custom version of __napi_complete().
@@ -5807,8 +5816,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
                        skb_queue_splice_tail_init(&sd->input_pkt_queue,
                                                   &sd->process_queue);
                }
-               rps_unlock(sd);
-               local_irq_enable();
+               rps_unlock_irq_enable(sd);
        }
 
        return work;