NFS: Fix another fsync() issue after a server reboot
authorTrond Myklebust <trond.myklebust@hammerspace.com>
Sat, 13 Aug 2022 12:22:25 +0000 (08:22 -0400)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Thu, 15 Sep 2022 09:30:03 +0000 (11:30 +0200)
[ Upstream commit 67f4b5dc49913abcdb5cc736e73674e2f352f81d ]

Currently, when the writeback code detects a server reboot, it redirties
any pages that were not committed to disk, and it sets the flag
NFS_CONTEXT_RESEND_WRITES in the nfs_open_context of the file descriptor
that dirtied the file. While this allows the file descriptor in question
to redrive its own writes, it violates the fsync() requirement that we
should be synchronising all writes to disk.
While the problem is infrequent, we do see corner cases where an
untimely server reboot causes the fsync() call to abandon its attempt to
sync data to disk and causing data corruption issues due to missed error
conditions or similar.

In order to tighted up the client's ability to deal with this situation
without introducing livelocks, add a counter that records the number of
times pages are redirtied due to a server reboot-like condition, and use
that in fsync() to redrive the sync to disk.

Fixes: 2197e9b06c22 ("NFS: Fix up fsync() when the server rebooted")
Cc: stable@vger.kernel.org
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/nfs/file.c
fs/nfs/inode.c
fs/nfs/write.c
include/linux/nfs_fs.h

index a8693cc50c7cadef9fac1a784afb0e50ad0c7cde..ad5114e480097a78c560789cddbc9348278ccd0c 100644 (file)
@@ -223,8 +223,10 @@ nfs_file_fsync_commit(struct file *file, int datasync)
 int
 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-       struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct inode *inode = file_inode(file);
+       struct nfs_inode *nfsi = NFS_I(inode);
+       long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+       long nredirtied;
        int ret;
 
        trace_nfs_fsync_enter(inode);
@@ -239,15 +241,10 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
                ret = pnfs_sync_inode(inode, !!datasync);
                if (ret != 0)
                        break;
-               if (!test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags))
+               nredirtied = atomic_long_read(&nfsi->redirtied_pages);
+               if (nredirtied == save_nredirtied)
                        break;
-               /*
-                * If nfs_file_fsync_commit detected a server reboot, then
-                * resend all dirty pages that might have been covered by
-                * the NFS_CONTEXT_RESEND_WRITES flag
-                */
-               start = 0;
-               end = LLONG_MAX;
+               save_nredirtied = nredirtied;
        }
 
        trace_nfs_fsync_exit(inode, ret);
index dc057ab6b30d103bbe77cb62eeac0fbcc2e47de4..e4524635a129a080823d018f31ad200114eb3693 100644 (file)
@@ -434,6 +434,7 @@ nfs_ilookup(struct super_block *sb, struct nfs_fattr *fattr, struct nfs_fh *fh)
 static void nfs_inode_init_regular(struct nfs_inode *nfsi)
 {
        atomic_long_set(&nfsi->nrequests, 0);
+       atomic_long_set(&nfsi->redirtied_pages, 0);
        INIT_LIST_HEAD(&nfsi->commit_info.list);
        atomic_long_set(&nfsi->commit_info.ncommit, 0);
        atomic_set(&nfsi->commit_info.rpcs_out, 0);
index cdb29fd235492b584e0f36f9bb8f0b3ab89ce5fa..be70874bc329262815b2e65fee1f18f492e330d9 100644 (file)
@@ -1394,10 +1394,12 @@ static void nfs_initiate_write(struct nfs_pgio_header *hdr,
  */
 static void nfs_redirty_request(struct nfs_page *req)
 {
+       struct nfs_inode *nfsi = NFS_I(page_file_mapping(req->wb_page)->host);
+
        /* Bump the transmission count */
        req->wb_nio++;
        nfs_mark_request_dirty(req);
-       set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+       atomic_long_inc(&nfsi->redirtied_pages);
        nfs_end_page_writeback(req);
        nfs_release_request(req);
 }
@@ -1870,7 +1872,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
                /* We have a mismatch. Write the page again */
                dprintk_cont(" mismatch\n");
                nfs_mark_request_dirty(req);
-               set_bit(NFS_CONTEXT_RESEND_WRITES, &nfs_req_openctx(req)->flags);
+               atomic_long_inc(&NFS_I(data->inode)->redirtied_pages);
        next:
                nfs_unlock_and_release_request(req);
                /* Latency breaker */
index d0855352cd6fcad2f28557708f909c6de2aa5575..71467d661fb6611ec74304aaaa0b5fd1678f80e6 100644 (file)
@@ -180,6 +180,7 @@ struct nfs_inode {
                /* Regular file */
                struct {
                        atomic_long_t   nrequests;
+                       atomic_long_t   redirtied_pages;
                        struct nfs_mds_commit_info commit_info;
                        struct mutex    commit_mutex;
                };