net: rps: change input_queue_tail_incr_save()
authorEric Dumazet <edumazet@google.com>
Fri, 29 Mar 2024 15:42:23 +0000 (15:42 +0000)
committerDavid S. Miller <davem@davemloft.net>
Mon, 1 Apr 2024 10:28:32 +0000 (11:28 +0100)
input_queue_tail_incr_save() is incrementing the sd queue_tail
and save it in the flow last_qtail.

Two issues here :

- no lock protects the write on last_qtail, we should use appropriate
  annotations.

- We can perform this write after releasing the per-cpu backlog lock,
  to decrease this lock hold duration (move away the cache line miss)

Also move input_queue_head_incr() and rps helpers to include/net/rps.h,
while adding rps_ prefix to better reflect their role.

v2: Fixed a build issue (Jakub and kernel build bots)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netdevice.h
include/net/rps.h
net/core/dev.c

index be9f071a340d2ac3a0b02f85d6abe2b61fee0267..2c11c295cf64a8f527064d254450e34dc73a4764 100644 (file)
@@ -3250,21 +3250,6 @@ struct softnet_data {
        call_single_data_t      defer_csd;
 };
 
-static inline void input_queue_head_incr(struct softnet_data *sd)
-{
-#ifdef CONFIG_RPS
-       sd->input_queue_head++;
-#endif
-}
-
-static inline void input_queue_tail_incr_save(struct softnet_data *sd,
-                                             unsigned int *qtail)
-{
-#ifdef CONFIG_RPS
-       *qtail = ++sd->input_queue_tail;
-#endif
-}
-
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 static inline int dev_recursion_level(void)
index 7660243e905b92651a41292e04caf72c5f12f26e..10ca25731c1ef766715fe7ee415ad0b71ec643a8 100644 (file)
@@ -122,4 +122,27 @@ static inline void sock_rps_record_flow(const struct sock *sk)
 #endif
 }
 
+static inline u32 rps_input_queue_tail_incr(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+       return ++sd->input_queue_tail;
+#else
+       return 0;
+#endif
+}
+
+static inline void rps_input_queue_tail_save(u32 *dest, u32 tail)
+{
+#ifdef CONFIG_RPS
+       WRITE_ONCE(*dest, tail);
+#endif
+}
+
+static inline void rps_input_queue_head_incr(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+       sd->input_queue_head++;
+#endif
+}
+
 #endif /* _NET_RPS_H */
index 0a8ccb0451c30a39f8f8b45d26b7e5548b8bfba4..79073bbc9a644049cacf8433310f4641745049e9 100644 (file)
@@ -4611,7 +4611,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                if (unlikely(tcpu != next_cpu) &&
                    (tcpu >= nr_cpu_ids || !cpu_online(tcpu) ||
                     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
-                     rflow->last_qtail)) >= 0)) {
+                     READ_ONCE(rflow->last_qtail))) >= 0)) {
                        tcpu = next_cpu;
                        rflow = set_rps_cpu(dev, skb, rflow, next_cpu);
                }
@@ -4666,7 +4666,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index,
                cpu = READ_ONCE(rflow->cpu);
                if (rflow->filter == filter_id && cpu < nr_cpu_ids &&
                    ((int)(per_cpu(softnet_data, cpu).input_queue_head -
-                          rflow->last_qtail) <
+                          READ_ONCE(rflow->last_qtail)) <
                     (int)(10 * flow_table->mask)))
                        expire = false;
        }
@@ -4801,6 +4801,7 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
        unsigned long flags;
        unsigned int qlen;
        int max_backlog;
+       u32 tail;
 
        reason = SKB_DROP_REASON_DEV_READY;
        if (!netif_running(skb->dev))
@@ -4825,8 +4826,11 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
                                napi_schedule_rps(sd);
                }
                __skb_queue_tail(&sd->input_pkt_queue, skb);
-               input_queue_tail_incr_save(sd, qtail);
+               tail = rps_input_queue_tail_incr(sd);
                backlog_unlock_irq_restore(sd, &flags);
+
+               /* save the tail outside of the critical section */
+               rps_input_queue_tail_save(qtail, tail);
                return NET_RX_SUCCESS;
        }
 
@@ -5904,7 +5908,7 @@ static void flush_backlog(struct work_struct *work)
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->input_pkt_queue);
                        dev_kfree_skb_irq(skb);
-                       input_queue_head_incr(sd);
+                       rps_input_queue_head_incr(sd);
                }
        }
        backlog_unlock_irq_enable(sd);
@@ -5913,7 +5917,7 @@ static void flush_backlog(struct work_struct *work)
                if (skb->dev->reg_state == NETREG_UNREGISTERING) {
                        __skb_unlink(skb, &sd->process_queue);
                        kfree_skb(skb);
-                       input_queue_head_incr(sd);
+                       rps_input_queue_head_incr(sd);
                }
        }
        local_bh_enable();
@@ -6041,7 +6045,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
                        rcu_read_lock();
                        __netif_receive_skb(skb);
                        rcu_read_unlock();
-                       input_queue_head_incr(sd);
+                       rps_input_queue_head_incr(sd);
                        if (++work >= quota)
                                return work;
 
@@ -11455,11 +11459,11 @@ static int dev_cpu_dead(unsigned int oldcpu)
        /* Process offline CPU's input_pkt_queue */
        while ((skb = __skb_dequeue(&oldsd->process_queue))) {
                netif_rx(skb);
-               input_queue_head_incr(oldsd);
+               rps_input_queue_head_incr(oldsd);
        }
        while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) {
                netif_rx(skb);
-               input_queue_head_incr(oldsd);
+               rps_input_queue_head_incr(oldsd);
        }
 
        return 0;