tcp: record last received ipv6 flowlabel
authorDavid Morley <morleyd@google.com>
Fri, 6 Oct 2023 01:18:40 +0000 (01:18 +0000)
committerPaolo Abeni <pabeni@redhat.com>
Tue, 10 Oct 2023 08:02:59 +0000 (10:02 +0200)
In order to better estimate whether a data packet has been
retransmitted or is the result of a TLP, we save the last received
ipv6 flowlabel.

To make space for this field we resize the "ato" field in
inet_connection_sock as the current value of TCP_DELACK_MAX can be
fully contained in 8 bits and add a compile_time_assert ensuring this
field is the required size.

v2: addressed kernel bot feedback about dccp_delack_timer()
v3: addressed build error introduced by commit bbf80d713fe7 ("tcp:
derive delack_max from rto_min")

Signed-off-by: David Morley <morleyd@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Tested-by: David Morley <morleyd@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
include/net/inet_connection_sock.h
include/net/tcp.h
net/dccp/timer.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_timer.c

index 5d2fcc137b8814bf43eb72b1159446093f7da755..d6d9d1c1985a03e8d07e483d8891538a0f2765f8 100644 (file)
@@ -114,7 +114,10 @@ struct inet_connection_sock {
                __u8              quick;         /* Scheduled number of quick acks         */
                __u8              pingpong;      /* The session is interactive             */
                __u8              retry;         /* Number of attempts                     */
-               __u32             ato;           /* Predicted tick of soft clock           */
+               #define ATO_BITS 8
+               __u32             ato:ATO_BITS,  /* Predicted tick of soft clock           */
+                                 lrcv_flowlabel:20, /* last received ipv6 flowlabel       */
+                                 unused:4;
                unsigned long     timeout;       /* Currently scheduled timeout            */
                __u32             lrcvtime;      /* timestamp of last received data packet */
                __u16             last_seg_size; /* Size of last incoming segment          */
index 9eb0a28553119c9812cda8a8c79de68cdb0acc9d..7fdedf5c71f0ca3168cc1bf48757f1d91cc1ab5a 100644 (file)
@@ -131,6 +131,8 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
 
 #define TCP_DELACK_MAX ((unsigned)(HZ/5))      /* maximal time to delay before sending an ACK */
+static_assert((1 << ATO_BITS) > TCP_DELACK_MAX);
+
 #if HZ >= 100
 #define TCP_DELACK_MIN ((unsigned)(HZ/25))     /* minimal time to delay before sending an ACK */
 #define TCP_ATO_MIN    ((unsigned)(HZ/25))
index b3255e87cc7e130bbcbfd1cd4aa98ba03d7cefaf..a4cfb47b60e523bd4a8f1f47cfedc11e633945c6 100644 (file)
@@ -196,8 +196,8 @@ static void dccp_delack_timer(struct timer_list *t)
        if (inet_csk_ack_scheduled(sk)) {
                if (!inet_csk_in_pingpong_mode(sk)) {
                        /* Delayed ACK missed: inflate ATO. */
-                       icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1,
-                                                icsk->icsk_rto);
+                       icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1,
+                                                  icsk->icsk_rto);
                } else {
                        /* Delayed ACK missed: leave pingpong mode and
                         * deflate ATO.
index 9a8b134d8ada9624253f3b3d13e9253b20271675..faabb5a4a3784da6888fb98a44c7291d4a5fb570 100644 (file)
@@ -3756,8 +3756,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
                info->tcpi_options |= TCPI_OPT_SYN_DATA;
 
        info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto);
-       info->tcpi_ato = jiffies_to_usecs(min(icsk->icsk_ack.ato,
-                                             tcp_delack_max(sk)));
+       info->tcpi_ato = jiffies_to_usecs(min_t(u32, icsk->icsk_ack.ato,
+                                               tcp_delack_max(sk)));
        info->tcpi_snd_mss = tp->mss_cache;
        info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss;
 
index 4b8f2e74d71d82e9d321501e8d8853aee2fe9b73..2ae4b211c12583c8ce0337134912620e49a92957 100644 (file)
@@ -778,6 +778,16 @@ new_measure:
        tp->rcvq_space.time = tp->tcp_mstamp;
 }
 
+static void tcp_save_lrcv_flowlabel(struct sock *sk, const struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       if (skb->protocol == htons(ETH_P_IPV6))
+               icsk->icsk_ack.lrcv_flowlabel = ntohl(ip6_flowlabel(ipv6_hdr(skb)));
+#endif
+}
+
 /* There is something which you must keep in mind when you analyze the
  * behavior of the tp->ato delayed ack timeout interval.  When a
  * connection starts up, we want to ack as quickly as possible.  The
@@ -826,6 +836,7 @@ static void tcp_event_data_recv(struct sock *sk, struct sk_buff *skb)
                }
        }
        icsk->icsk_ack.lrcvtime = now;
+       tcp_save_lrcv_flowlabel(sk, skb);
 
        tcp_ecn_check_ce(sk, skb);
 
@@ -4519,6 +4530,9 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
        if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
            sk_rethink_txhash(sk))
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+
+       /* Save last flowlabel after a spurious retrans. */
+       tcp_save_lrcv_flowlabel(sk, skb);
 }
 
 static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
@@ -4835,6 +4849,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb)
        u32 seq, end_seq;
        bool fragstolen;
 
+       tcp_save_lrcv_flowlabel(sk, skb);
        tcp_ecn_check_ce(sk, skb);
 
        if (unlikely(tcp_try_rmem_schedule(sk, skb, skb->truesize))) {
index 3f61c6a70a1ff9b8f310aea30a91306e963216f5..0862b73dd3b5299d2b201e9e93dbef8a0617f75b 100644 (file)
@@ -322,7 +322,7 @@ void tcp_delack_timer_handler(struct sock *sk)
        if (inet_csk_ack_scheduled(sk)) {
                if (!inet_csk_in_pingpong_mode(sk)) {
                        /* Delayed ACK missed: inflate ATO. */
-                       icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
+                       icsk->icsk_ack.ato = min_t(u32, icsk->icsk_ack.ato << 1, icsk->icsk_rto);
                } else {
                        /* Delayed ACK missed: leave pingpong mode and
                         * deflate ATO.