tcp: annotate data-races around tp->window_clamp
authorEric Dumazet <edumazet@google.com>
Thu, 4 Apr 2024 11:42:31 +0000 (11:42 +0000)
committerJakub Kicinski <kuba@kernel.org>
Sat, 6 Apr 2024 05:32:37 +0000 (22:32 -0700)
tp->window_clamp can be read locklessly, add READ_ONCE()
and WRITE_ONCE() annotations.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Jason Xing <kerneljasonxing@gmail.com>
Link: https://lore.kernel.org/r/20240404114231.2195171-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
net/ipv4/syncookies.c
net/ipv4/tcp.c
net/ipv4/tcp_input.c
net/ipv4/tcp_output.c
net/ipv6/syncookies.c
net/mptcp/protocol.c
net/mptcp/sockopt.c

index 500f665f98cbce4a3d681f8e39ecd368fe4013b1..b61d36810fe3fd62b1e5c5885bbaf20185f1abf0 100644 (file)
@@ -462,7 +462,8 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb)
        }
 
        /* Try to redo what tcp_v4_send_synack did. */
-       req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW);
+       req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :
+                               dst_metric(&rt->dst, RTAX_WINDOW);
        /* limit the window selection if the user enforce a smaller rx buffer */
        full_space = tcp_full_space(sk);
        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
index e767721b3a588b5d56567ae7badf5dffcd35a76a..92ee60492314a1483cfbfa2f73d32fcad5632773 100644 (file)
@@ -1721,7 +1721,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
        space = tcp_space_from_win(sk, val);
        if (space > sk->sk_rcvbuf) {
                WRITE_ONCE(sk->sk_rcvbuf, space);
-               tcp_sk(sk)->window_clamp = val;
+               WRITE_ONCE(tcp_sk(sk)->window_clamp, val);
        }
        return 0;
 }
@@ -3379,7 +3379,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
        if (!val) {
                if (sk->sk_state != TCP_CLOSE)
                        return -EINVAL;
-               tp->window_clamp = 0;
+               WRITE_ONCE(tp->window_clamp, 0);
        } else {
                u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
                u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
@@ -3388,7 +3388,7 @@ int tcp_set_window_clamp(struct sock *sk, int val)
                if (new_window_clamp == old_window_clamp)
                        return 0;
 
-               tp->window_clamp = new_window_clamp;
+               WRITE_ONCE(tp->window_clamp, new_window_clamp);
                if (new_window_clamp < old_window_clamp) {
                        /* need to apply the reserved mem provisioning only
                         * when shrinking the window clamp
@@ -4057,7 +4057,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                                      TCP_RTO_MAX / HZ);
                break;
        case TCP_WINDOW_CLAMP:
-               val = tp->window_clamp;
+               val = READ_ONCE(tp->window_clamp);
                break;
        case TCP_INFO: {
                struct tcp_info info;
index 1b6cd384001202df5f8e8e8c73adff0db89ece63..8d44ab5671eacd4bc06647c7cca387a79e346618 100644 (file)
@@ -563,19 +563,20 @@ static void tcp_init_buffer_space(struct sock *sk)
        maxwin = tcp_full_space(sk);
 
        if (tp->window_clamp >= maxwin) {
-               tp->window_clamp = maxwin;
+               WRITE_ONCE(tp->window_clamp, maxwin);
 
                if (tcp_app_win && maxwin > 4 * tp->advmss)
-                       tp->window_clamp = max(maxwin -
-                                              (maxwin >> tcp_app_win),
-                                              4 * tp->advmss);
+                       WRITE_ONCE(tp->window_clamp,
+                                  max(maxwin - (maxwin >> tcp_app_win),
+                                      4 * tp->advmss));
        }
 
        /* Force reservation of one segment. */
        if (tcp_app_win &&
            tp->window_clamp > 2 * tp->advmss &&
            tp->window_clamp + tp->advmss > maxwin)
-               tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss);
+               WRITE_ONCE(tp->window_clamp,
+                          max(2 * tp->advmss, maxwin - tp->advmss));
 
        tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp);
        tp->snd_cwnd_stamp = tcp_jiffies32;
@@ -773,7 +774,8 @@ void tcp_rcv_space_adjust(struct sock *sk)
                        WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
 
                        /* Make the window clamp follow along.  */
-                       tp->window_clamp = tcp_win_from_space(sk, rcvbuf);
+                       WRITE_ONCE(tp->window_clamp,
+                                  tcp_win_from_space(sk, rcvbuf));
                }
        }
        tp->rcvq_space.space = copied;
@@ -6426,7 +6428,8 @@ consume:
 
                if (!tp->rx_opt.wscale_ok) {
                        tp->rx_opt.snd_wscale = tp->rx_opt.rcv_wscale = 0;
-                       tp->window_clamp = min(tp->window_clamp, 65535U);
+                       WRITE_ONCE(tp->window_clamp,
+                                  min(tp->window_clamp, 65535U));
                }
 
                if (tp->rx_opt.saw_tstamp) {
index e3167ad965676facaacd8f82848c52cf966f97c3..9282fafc0e6109f3ac86d1641740f24588b2d75d 100644 (file)
@@ -203,16 +203,17 @@ static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
  * This MUST be enforced by all callers.
  */
 void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
-                              __u32 *rcv_wnd, __u32 *window_clamp,
+                              __u32 *rcv_wnd, __u32 *__window_clamp,
                               int wscale_ok, __u8 *rcv_wscale,
                               __u32 init_rcv_wnd)
 {
        unsigned int space = (__space < 0 ? 0 : __space);
+       u32 window_clamp = READ_ONCE(*__window_clamp);
 
        /* If no clamp set the clamp to the max possible scaled window */
-       if (*window_clamp == 0)
-               (*window_clamp) = (U16_MAX << TCP_MAX_WSCALE);
-       space = min(*window_clamp, space);
+       if (window_clamp == 0)
+               window_clamp = (U16_MAX << TCP_MAX_WSCALE);
+       space = min(window_clamp, space);
 
        /* Quantize space offering to a multiple of mss if possible. */
        if (space > mss)
@@ -239,12 +240,13 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss,
                /* Set window scaling on max possible window */
                space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
                space = max_t(u32, space, READ_ONCE(sysctl_rmem_max));
-               space = min_t(u32, space, *window_clamp);
+               space = min_t(u32, space, window_clamp);
                *rcv_wscale = clamp_t(int, ilog2(space) - 15,
                                      0, TCP_MAX_WSCALE);
        }
        /* Set the clamp no higher than max representable value */
-       (*window_clamp) = min_t(__u32, U16_MAX << (*rcv_wscale), *window_clamp);
+       WRITE_ONCE(*__window_clamp,
+                  min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp));
 }
 EXPORT_SYMBOL(tcp_select_initial_window);
 
@@ -3855,7 +3857,7 @@ static void tcp_connect_init(struct sock *sk)
        tcp_ca_dst_init(sk, dst);
 
        if (!tp->window_clamp)
-               tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
+               WRITE_ONCE(tp->window_clamp, dst_metric(dst, RTAX_WINDOW));
        tp->advmss = tcp_mss_clamp(tp, dst_metric_advmss(dst));
 
        tcp_initialize_rcv_mss(sk);
@@ -3863,7 +3865,7 @@ static void tcp_connect_init(struct sock *sk)
        /* limit the window selection if the user enforce a smaller rx buffer */
        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
            (tp->window_clamp > tcp_full_space(sk) || tp->window_clamp == 0))
-               tp->window_clamp = tcp_full_space(sk);
+               WRITE_ONCE(tp->window_clamp, tcp_full_space(sk));
 
        rcv_wnd = tcp_rwnd_init_bpf(sk);
        if (rcv_wnd == 0)
index 6d8286c299c9d139938ef6751d9958c80d3031e9..bfad1e89b6a6bb99c28b9ef14c142a6c4aeae54b 100644 (file)
@@ -246,7 +246,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
                }
        }
 
-       req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW);
+       req->rsk_window_clamp = READ_ONCE(tp->window_clamp) ? :dst_metric(dst, RTAX_WINDOW);
        /* limit the window selection if the user enforce a smaller rx buffer */
        full_space = tcp_full_space(sk);
        if (sk->sk_userlocks & SOCK_RCVBUF_LOCK &&
index 7e74b812e366ae311f52615e9b304d6fe8b924b8..995b53cd021c84e821f242ce0a679e2bff9c1937 100644 (file)
@@ -2056,7 +2056,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
                                ssk = mptcp_subflow_tcp_sock(subflow);
                                slow = lock_sock_fast(ssk);
                                WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
-                               tcp_sk(ssk)->window_clamp = window_clamp;
+                               WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
                                tcp_cleanup_rbuf(ssk, 1);
                                unlock_sock_fast(ssk, slow);
                        }
index 73fdf423de44eef5d2c3085515ad475bf63fb718..9d5d42a77bcc355468b79fd7492c92a3d7968304 100644 (file)
@@ -1523,7 +1523,7 @@ int mptcp_set_rcvlowat(struct sock *sk, int val)
 
                slow = lock_sock_fast(ssk);
                WRITE_ONCE(ssk->sk_rcvbuf, space);
-               tcp_sk(ssk)->window_clamp = val;
+               WRITE_ONCE(tcp_sk(ssk)->window_clamp, val);
                unlock_sock_fast(ssk, slow);
        }
        return 0;