mptcp: trigger msk processing even for OoO data
authorPaolo Abeni <pabeni@redhat.com>
Mon, 14 Sep 2020 08:01:09 +0000 (10:01 +0200)
committerDavid S. Miller <davem@davemloft.net>
Mon, 14 Sep 2020 20:28:02 +0000 (13:28 -0700)
This is a prerequisite to allow receiving data from multiple
subflows without re-injection.

Instead of dropping the OoO - "future" data in
subflow_check_data_avail(), call into __mptcp_move_skbs()
and let the msk drop that.

To avoid code duplication factor out the mptcp_subflow_discard_data()
helper.

Note that __mptcp_move_skbs() can now find multiple subflows
with data avail (comprising to-be-discarded data), so must
update the byte counter incrementally.

v1 -> v2:
 - fix checkpatch issues (unsigned -> unsigned int)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c

index 854a8b3b9ecd2d1ad9cddca1a55718ee5a80c4e7..95573c6f7762a07fa425dc8ef11254f3749167de 100644 (file)
@@ -167,7 +167,8 @@ static bool mptcp_subflow_dsn_valid(const struct mptcp_sock *msk,
                return true;
 
        subflow->data_avail = 0;
-       return mptcp_subflow_data_available(ssk);
+       mptcp_subflow_data_available(ssk);
+       return subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
 }
 
 static void mptcp_check_data_fin_ack(struct sock *sk)
@@ -313,11 +314,18 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
        struct tcp_sock *tp;
        bool done = false;
 
-       if (!mptcp_subflow_dsn_valid(msk, ssk)) {
-               *bytes = 0;
+       pr_debug("msk=%p ssk=%p data avail=%d valid=%d empty=%d",
+                msk, ssk, subflow->data_avail,
+                mptcp_subflow_dsn_valid(msk, ssk),
+                !skb_peek(&ssk->sk_receive_queue));
+       if (subflow->data_avail == MPTCP_SUBFLOW_OOO_DATA) {
+               mptcp_subflow_discard_data(ssk, subflow->map_data_len);
                return false;
        }
 
+       if (!mptcp_subflow_dsn_valid(msk, ssk))
+               return false;
+
        tp = tcp_sk(ssk);
        do {
                u32 map_remaining, offset;
@@ -376,7 +384,7 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk,
                }
        } while (more_data_avail);
 
-       *bytes = moved;
+       *bytes += moved;
 
        /* If the moves have caught up with the DATA_FIN sequence number
         * it's time to ack the DATA_FIN and change socket state, but
@@ -415,9 +423,17 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk)
 
 void mptcp_data_ready(struct sock *sk, struct sock *ssk)
 {
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
        struct mptcp_sock *msk = mptcp_sk(sk);
+       bool wake;
 
-       set_bit(MPTCP_DATA_READY, &msk->flags);
+       /* move_skbs_to_msk below can legitly clear the data_avail flag,
+        * but we will need later to properly woke the reader, cache its
+        * value
+        */
+       wake = subflow->data_avail == MPTCP_SUBFLOW_DATA_AVAIL;
+       if (wake)
+               set_bit(MPTCP_DATA_READY, &msk->flags);
 
        if (atomic_read(&sk->sk_rmem_alloc) < READ_ONCE(sk->sk_rcvbuf) &&
            move_skbs_to_msk(msk, ssk))
@@ -438,7 +454,8 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
                move_skbs_to_msk(msk, ssk);
        }
 wake:
-       sk->sk_data_ready(sk);
+       if (wake)
+               sk->sk_data_ready(sk);
 }
 
 static void __mptcp_flush_join_list(struct mptcp_sock *msk)
@@ -1281,6 +1298,9 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
                set_bit(MPTCP_DATA_READY, &msk->flags);
        }
 out_err:
+       pr_debug("msk=%p data_ready=%d rx queue empty=%d copied=%d",
+                msk, test_bit(MPTCP_DATA_READY, &msk->flags),
+                skb_queue_empty(&sk->sk_receive_queue), copied);
        mptcp_rcv_space_adjust(msk, copied);
 
        release_sock(sk);
@@ -2308,6 +2328,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
        sock_poll_wait(file, sock, wait);
 
        state = inet_sk_state_load(sk);
+       pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
        if (state == TCP_LISTEN)
                return mptcp_check_readable(msk);
 
index 60b27d44c1846c10f6d1f7020678147795cab974..794997852c5e1d476d98e330030da57ae47614c2 100644 (file)
@@ -268,6 +268,12 @@ mptcp_subflow_rsk(const struct request_sock *rsk)
        return (struct mptcp_subflow_request_sock *)rsk;
 }
 
+enum mptcp_data_avail {
+       MPTCP_SUBFLOW_NODATA,
+       MPTCP_SUBFLOW_DATA_AVAIL,
+       MPTCP_SUBFLOW_OOO_DATA
+};
+
 /* MPTCP subflow context */
 struct mptcp_subflow_context {
        struct  list_head node;/* conn_list of subflows */
@@ -292,10 +298,10 @@ struct mptcp_subflow_context {
                map_valid : 1,
                mpc_map : 1,
                backup : 1,
-               data_avail : 1,
                rx_eof : 1,
                use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
                can_ack : 1;        /* only after processing the remote a key */
+       enum mptcp_data_avail data_avail;
        u32     remote_nonce;
        u64     thmac;
        u32     local_nonce;
@@ -347,6 +353,7 @@ int mptcp_is_enabled(struct net *net);
 void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow,
                                     struct mptcp_options_received *mp_opt);
 bool mptcp_subflow_data_available(struct sock *sk);
+int mptcp_subflow_discard_data(struct sock *sk, unsigned int limit);
 void __init mptcp_subflow_init(void);
 
 /* called with sk socket lock held */
index 53b455c3c22951732f9cae2b048b7f104df77279..071ee54b3c9fe359dd711794ea08f3db1fbcd031 100644 (file)
@@ -816,6 +816,40 @@ static int subflow_read_actor(read_descriptor_t *desc,
        return copy_len;
 }
 
+int mptcp_subflow_discard_data(struct sock *ssk, unsigned int limit)
+{
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+       u32 map_remaining;
+       size_t delta;
+
+       map_remaining = subflow->map_data_len -
+                       mptcp_subflow_get_map_offset(subflow);
+       delta = min_t(size_t, limit, map_remaining);
+
+       /* discard mapped data */
+       pr_debug("discarding %zu bytes, current map len=%d", delta,
+                map_remaining);
+       if (delta) {
+               read_descriptor_t desc = {
+                       .count = delta,
+               };
+               int ret;
+
+               ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
+               if (ret < 0) {
+                       ssk->sk_err = -ret;
+                       return ret;
+               }
+               if (ret < delta)
+                       return 0;
+               if (delta == map_remaining) {
+                       subflow->data_avail = 0;
+                       subflow->map_valid = 0;
+               }
+       }
+       return 0;
+}
+
 static bool subflow_check_data_avail(struct sock *ssk)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
@@ -832,8 +866,6 @@ static bool subflow_check_data_avail(struct sock *ssk)
 
        msk = mptcp_sk(subflow->conn);
        for (;;) {
-               u32 map_remaining;
-               size_t delta;
                u64 ack_seq;
                u64 old_ack;
 
@@ -851,7 +883,7 @@ static bool subflow_check_data_avail(struct sock *ssk)
                        subflow->map_data_len = skb->len;
                        subflow->map_subflow_seq = tcp_sk(ssk)->copied_seq -
                                                   subflow->ssn_offset;
-                       subflow->data_avail = 1;
+                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
                        return true;
                }
 
@@ -880,43 +912,19 @@ static bool subflow_check_data_avail(struct sock *ssk)
                pr_debug("msk ack_seq=%llx subflow ack_seq=%llx", old_ack,
                         ack_seq);
                if (ack_seq == old_ack) {
-                       subflow->data_avail = 1;
+                       subflow->data_avail = MPTCP_SUBFLOW_DATA_AVAIL;
+                       break;
+               } else if (after64(ack_seq, old_ack)) {
+                       subflow->data_avail = MPTCP_SUBFLOW_OOO_DATA;
                        break;
                }
 
                /* only accept in-sequence mapping. Old values are spurious
-                * retransmission; we can hit "future" values on active backup
-                * subflow switch, we relay on retransmissions to get
-                * in-sequence data.
-                * Cuncurrent subflows support will require subflow data
-                * reordering
+                * retransmission
                 */
-               map_remaining = subflow->map_data_len -
-                               mptcp_subflow_get_map_offset(subflow);
-               if (before64(ack_seq, old_ack))
-                       delta = min_t(size_t, old_ack - ack_seq, map_remaining);
-               else
-                       delta = min_t(size_t, ack_seq - old_ack, map_remaining);
-
-               /* discard mapped data */
-               pr_debug("discarding %zu bytes, current map len=%d", delta,
-                        map_remaining);
-               if (delta) {
-                       read_descriptor_t desc = {
-                               .count = delta,
-                       };
-                       int ret;
-
-                       ret = tcp_read_sock(ssk, &desc, subflow_read_actor);
-                       if (ret < 0) {
-                               ssk->sk_err = -ret;
-                               goto fatal;
-                       }
-                       if (ret < delta)
-                               return false;
-                       if (delta == map_remaining)
-                               subflow->map_valid = 0;
-               }
+               if (mptcp_subflow_discard_data(ssk, old_ack - ack_seq))
+                       goto fatal;
+               return false;
        }
        return true;