9p/client: fix data race on req->status
authorDominique Martinet <asmadeus@codewreck.org>
Mon, 5 Dec 2022 12:39:01 +0000 (21:39 +0900)
committerDominique Martinet <asmadeus@codewreck.org>
Tue, 13 Dec 2022 04:02:15 +0000 (13:02 +0900)
KCSAN reported a race between writing req->status in p9_client_cb and
accessing it in p9_client_rpc's wait_event.

Accesses to req itself is protected by the data barrier (writing req
fields, write barrier, writing status // reading status, read barrier,
reading other req fields), but status accesses themselves apparently
also must be annotated properly with WRITE_ONCE/READ_ONCE when we
access it without locks.

Follows:
 - error paths writing status in various threads all can notify
p9_client_rpc, so these all also need WRITE_ONCE
 - there's a similar read loop in trans_virtio for zc case that also
needs READ_ONCE
 - other reads in trans_fd should be protected by the trans_fd lock and
lists state machine, as corresponding writers all are within trans_fd
and should be under the same lock. If KCSAN complains on them we likely
will have something else to fix as well, so it's better to leave them
unmarked and look again if required.

Link: https://lkml.kernel.org/r/20221205124756.426350-1-asmadeus@codewreck.org
Reported-by: Naresh Kamboju <naresh.kamboju@linaro.org>
Suggested-by: Marco Elver <elver@google.com>
Acked-by: Marco Elver <elver@google.com>
Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
net/9p/client.c
net/9p/trans_fd.c
net/9p/trans_rdma.c
net/9p/trans_virtio.c
net/9p/trans_xen.c

index 7b2a997662d9cc5f79aab8f46ccecafda30849f0..fef6516a0639f0024e7d1a80f234a01a3301894c 100644 (file)
@@ -443,7 +443,7 @@ void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
         * the status change is visible to another thread
         */
        smp_wmb();
-       req->status = status;
+       WRITE_ONCE(req->status, status);
 
        wake_up(&req->wq);
        p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc.tag);
@@ -604,7 +604,7 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
        /* if we haven't received a response for oldreq,
         * remove it from the list
         */
-       if (oldreq->status == REQ_STATUS_SENT) {
+       if (READ_ONCE(oldreq->status) == REQ_STATUS_SENT) {
                if (c->trans_mod->cancelled)
                        c->trans_mod->cancelled(c, oldreq);
        }
@@ -704,7 +704,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
        }
 again:
        /* Wait for the response */
-       err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
+       err = wait_event_killable(req->wq,
+                                 READ_ONCE(req->status) >= REQ_STATUS_RCVD);
 
        /* Make sure our req is coherent with regard to updates in other
         * threads - echoes to wmb() in the callback
@@ -718,7 +719,7 @@ again:
                goto again;
        }
 
-       if (req->status == REQ_STATUS_ERROR) {
+       if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
                p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
                err = req->t_err;
        }
@@ -731,7 +732,7 @@ again:
                        p9_client_flush(c, req);
 
                /* if we received the response anyway, don't signal error */
-               if (req->status == REQ_STATUS_RCVD)
+               if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
                        err = 0;
        }
 recalc_sigpending:
@@ -803,7 +804,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
                if (err != -ERESTARTSYS)
                        goto recalc_sigpending;
        }
-       if (req->status == REQ_STATUS_ERROR) {
+       if (READ_ONCE(req->status) == REQ_STATUS_ERROR) {
                p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
                err = req->t_err;
        }
@@ -816,7 +817,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
                        p9_client_flush(c, req);
 
                /* if we received the response anyway, don't signal error */
-               if (req->status == REQ_STATUS_RCVD)
+               if (READ_ONCE(req->status) == REQ_STATUS_RCVD)
                        err = 0;
        }
 recalc_sigpending:
index 06ec9f7d3318c944af18a9cc4ecddab243f9012a..f8899745571cf7f0c20bfa62f1509f3254f64f8f 100644 (file)
@@ -201,11 +201,11 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
 
        list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
                list_move(&req->req_list, &cancel_list);
-               req->status = REQ_STATUS_ERROR;
+               WRITE_ONCE(req->status, REQ_STATUS_ERROR);
        }
        list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
                list_move(&req->req_list, &cancel_list);
-               req->status = REQ_STATUS_ERROR;
+               WRITE_ONCE(req->status, REQ_STATUS_ERROR);
        }
 
        spin_unlock(&m->req_lock);
@@ -466,7 +466,7 @@ static void p9_write_work(struct work_struct *work)
 
                req = list_entry(m->unsent_req_list.next, struct p9_req_t,
                               req_list);
-               req->status = REQ_STATUS_SENT;
+               WRITE_ONCE(req->status, REQ_STATUS_SENT);
                p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);
                list_move_tail(&req->req_list, &m->req_list);
 
@@ -675,7 +675,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
                return m->err;
 
        spin_lock(&m->req_lock);
-       req->status = REQ_STATUS_UNSENT;
+       WRITE_ONCE(req->status, REQ_STATUS_UNSENT);
        list_add_tail(&req->req_list, &m->unsent_req_list);
        spin_unlock(&m->req_lock);
 
@@ -702,7 +702,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
 
        if (req->status == REQ_STATUS_UNSENT) {
                list_del(&req->req_list);
-               req->status = REQ_STATUS_FLSHD;
+               WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
                p9_req_put(client, req);
                ret = 0;
        }
@@ -731,7 +731,7 @@ static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
         * remove it from the list.
         */
        list_del(&req->req_list);
-       req->status = REQ_STATUS_FLSHD;
+       WRITE_ONCE(req->status, REQ_STATUS_FLSHD);
        spin_unlock(&m->req_lock);
 
        p9_req_put(client, req);
index 33a9ac6f2d552ad1bf2ca7bcdd113734391a9c6a..83f9100d46bff763a2c2d4af6d706a42a6fa5e83 100644 (file)
@@ -506,7 +506,7 @@ dont_need_post_recv:
         * because doing if after could erase the REQ_STATUS_RCVD
         * status in case of a very fast reply.
         */
-       req->status = REQ_STATUS_SENT;
+       WRITE_ONCE(req->status, REQ_STATUS_SENT);
        err = ib_post_send(rdma->qp, &wr, NULL);
        if (err)
                goto send_error;
@@ -516,7 +516,7 @@ dont_need_post_recv:
 
  /* Handle errors that happened during or while preparing the send: */
  send_error:
-       req->status = REQ_STATUS_ERROR;
+       WRITE_ONCE(req->status, REQ_STATUS_ERROR);
        kfree(c);
        p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
 
index 19bccfa0d593e312a70af85e5f39bbe016d8f9fd..3c27ffb781e3e03f05e13e45c23c61af29d617b9 100644 (file)
@@ -262,7 +262,7 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
 
        p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
 
-       req->status = REQ_STATUS_SENT;
+       WRITE_ONCE(req->status, REQ_STATUS_SENT);
 req_retry:
        spin_lock_irqsave(&chan->lock, flags);
 
@@ -468,7 +468,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
                        inlen = n;
                }
        }
-       req->status = REQ_STATUS_SENT;
+       WRITE_ONCE(req->status, REQ_STATUS_SENT);
 req_retry_pinned:
        spin_lock_irqsave(&chan->lock, flags);
 
@@ -531,9 +531,10 @@ req_retry_pinned:
        spin_unlock_irqrestore(&chan->lock, flags);
        kicked = 1;
        p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");
-       err = wait_event_killable(req->wq, req->status >= REQ_STATUS_RCVD);
+       err = wait_event_killable(req->wq,
+                                 READ_ONCE(req->status) >= REQ_STATUS_RCVD);
        // RERROR needs reply (== error string) in static data
-       if (req->status == REQ_STATUS_RCVD &&
+       if (READ_ONCE(req->status) == REQ_STATUS_RCVD &&
            unlikely(req->rc.sdata[4] == P9_RERROR))
                handle_rerror(req, in_hdr_len, offs, in_pages);
 
index de2d2ca8819a1952a5ab635ebe4b57047ce11d71..9630b1275557978dd45a5e4a0322192bbc30af42 100644 (file)
@@ -157,7 +157,7 @@ again:
                              &masked_prod, masked_cons,
                              XEN_9PFS_RING_SIZE(ring));
 
-       p9_req->status = REQ_STATUS_SENT;
+       WRITE_ONCE(p9_req->status, REQ_STATUS_SENT);
        virt_wmb();                     /* write ring before updating pointer */
        prod += size;
        ring->intf->out_prod = prod;
@@ -212,7 +212,7 @@ static void p9_xen_response(struct work_struct *work)
                        dev_warn(&priv->dev->dev,
                                 "requested packet size too big: %d for tag %d with capacity %zd\n",
                                 h.size, h.tag, req->rc.capacity);
-                       req->status = REQ_STATUS_ERROR;
+                       WRITE_ONCE(req->status, REQ_STATUS_ERROR);
                        goto recv_error;
                }