SUNRPC dont update timeout value on connection reset
authorOlga Kornievskaia <olga.kornievskaia@gmail.com>
Wed, 15 Jul 2020 17:17:52 +0000 (13:17 -0400)
committerTrond Myklebust <trond.myklebust@hammerspace.com>
Wed, 5 Aug 2020 03:17:11 +0000 (23:17 -0400)
Current behaviour: every time a v3 operation is re-sent to the server
we update (double) the timeout. There is no distinction between whether
or not the previous timer had expired before the re-sent happened.

Here's the scenario:
1. Client sends a v3 operation
2. Server RST-s the connection (prior to the timeout) (eg., connection
is immediately reset)
3. Client re-sends a v3 operation but the timeout is now 120sec.

As a result, an application sees 2mins pause before a retry in case
server again does not reply.

Instead, this patch proposes to keep track off when the minor timeout
should happen and if it didn't, then don't update the new timeout.
Value is updated based on the previous value to make timeouts
predictable.

Signed-off-by: Olga Kornievskaia <kolga@netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
include/linux/sunrpc/xprt.h
net/sunrpc/xprt.c

index e64bd8222f5508223ef5079f59c29a6eba34144f..a603d48d2b2cd55095e2467fca40da52ab26a6b6 100644 (file)
@@ -101,6 +101,7 @@ struct rpc_rqst {
                                                         * used in the softirq.
                                                         */
        unsigned long           rq_majortimeo;  /* major timeout alarm */
+       unsigned long           rq_minortimeo;  /* minor timeout alarm */
        unsigned long           rq_timeout;     /* Current timeout value */
        ktime_t                 rq_rtt;         /* round-trip time */
        unsigned int            rq_retries;     /* # of retries */
index d5cc5db9dbf39c3dfd888c87b6196daa182dd73f..6ba9d58426291c52f8dea999edc62d49427bc2b8 100644 (file)
@@ -607,6 +607,11 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
        req->rq_majortimeo += xprt_calc_majortimeo(req);
 }
 
+static void xprt_reset_minortimeo(struct rpc_rqst *req)
+{
+       req->rq_minortimeo += req->rq_timeout;
+}
+
 static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
 {
        unsigned long time_init;
@@ -618,6 +623,7 @@ static void xprt_init_majortimeo(struct rpc_task *task, struct rpc_rqst *req)
                time_init = xprt_abs_ktime_to_jiffies(task->tk_start);
        req->rq_timeout = task->tk_client->cl_timeout->to_initval;
        req->rq_majortimeo = time_init + xprt_calc_majortimeo(req);
+       req->rq_minortimeo = time_init + req->rq_timeout;
 }
 
 /**
@@ -631,6 +637,8 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
        const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
        int status = 0;
 
+       if (time_before(jiffies, req->rq_minortimeo))
+               return status;
        if (time_before(jiffies, req->rq_majortimeo)) {
                if (to->to_exponential)
                        req->rq_timeout <<= 1;
@@ -649,6 +657,7 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
                spin_unlock(&xprt->transport_lock);
                status = -ETIMEDOUT;
        }
+       xprt_reset_minortimeo(req);
 
        if (req->rq_timeout == 0) {
                printk(KERN_WARNING "xprt_adjust_timeout: rq_timeout = 0!\n");