bcachefs: Switch fsync to use bi_journal_seq
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 5 Nov 2021 19:17:13 +0000 (15:17 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:16 +0000 (17:09 -0400)
Now that we're recording in each inode the journal sequence number of
the most recent update, fsync becomes a lot simpler and we can delete
all the plumbing for ei_journal_seq.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/acl.c
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/fs.h
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/reflink.c
fs/bcachefs/reflink.h
fs/bcachefs/xattr.c

index 2afa15b267009cd28a012c4e15b7a586437bbfc9..51a0b48a5313800310bb9a6c86b768c4d9644c58 100644 (file)
@@ -330,8 +330,7 @@ retry:
        inode_u.bi_mode         = mode;
 
        ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
-               bch2_trans_commit(&trans, NULL,
-                                 &inode->ei_journal_seq, 0);
+               bch2_trans_commit(&trans, NULL, NULL, 0);
 btree_err:
        bch2_trans_iter_exit(&trans, &inode_iter);
 
index f4c97fc0e3d1080ac72e14b1788328573f51afb6..7de6b7a7aa6081413b7e604034c27fbef736e59c 100644 (file)
@@ -1096,7 +1096,6 @@ static void bch2_writepage_io_alloc(struct bch_fs *c,
        op                      = &w->io->op;
        bch2_write_op_init(op, c, w->opts);
        op->target              = w->opts.foreground_target;
-       op_journal_seq_set(op, &inode->ei_journal_seq);
        op->nr_replicas         = nr_replicas;
        op->res.nr_replicas     = nr_replicas;
        op->write_point         = writepoint_hashed(inode->ei_last_dirtied);
@@ -1947,7 +1946,6 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                bch2_write_op_init(&dio->op, c, io_opts(c, &inode->ei_inode));
                dio->op.end_io          = bch2_dio_write_loop_async;
                dio->op.target          = dio->op.opts.foreground_target;
-               op_journal_seq_set(&dio->op, &inode->ei_journal_seq);
                dio->op.write_point     = writepoint_hashed((unsigned long) current);
                dio->op.nr_replicas     = dio->op.opts.data_replicas;
                dio->op.subvol          = inode->ei_subvol;
@@ -2164,29 +2162,36 @@ unlock:
 
 /* fsync: */
 
-int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+/*
+ * inode->ei_inode.bi_journal_seq won't be up to date since it's set in an
+ * insert trigger: look up the btree inode instead
+ */
+static int bch2_flush_inode(struct bch_fs *c, subvol_inum inum)
 {
-       struct bch_inode_info *inode = file_bch_inode(file);
-       struct bch_fs *c = inode->v.i_sb->s_fs_info;
-       int ret, ret2;
+       struct bch_inode_unpacked inode;
+       int ret;
 
-       ret = file_write_and_wait_range(file, start, end);
+       if (c->opts.journal_flush_disabled)
+               return 0;
+
+       ret = bch2_inode_find_by_inum(c, inum, &inode);
        if (ret)
                return ret;
 
-       if (datasync && !(inode->v.i_state & I_DIRTY_DATASYNC))
-               goto out;
+       return bch2_journal_flush_seq(&c->journal, inode.bi_journal_seq);
+}
 
-       ret = sync_inode_metadata(&inode->v, 1);
-       if (ret)
-               return ret;
-out:
-       if (!c->opts.journal_flush_disabled)
-               ret = bch2_journal_flush_seq(&c->journal,
-                                            inode->ei_journal_seq);
-       ret2 = file_check_and_advance_wb_err(file);
+int bch2_fsync(struct file *file, loff_t start, loff_t end, int datasync)
+{
+       struct bch_inode_info *inode = file_bch_inode(file);
+       struct bch_fs *c = inode->v.i_sb->s_fs_info;
+       int ret, ret2, ret3;
+
+       ret = file_write_and_wait_range(file, start, end);
+       ret2 = sync_inode_metadata(&inode->v, 1);
+       ret3 = bch2_flush_inode(c, inode_inum(inode));
 
-       return ret ?: ret2;
+       return ret ?: ret2 ?: ret3;
 }
 
 /* truncate: */
@@ -2448,7 +2453,7 @@ int bch2_truncate(struct mnt_idmap *idmap,
 
        ret = bch2_fpunch(c, inode_inum(inode),
                        round_up(iattr->ia_size, block_bytes(c)) >> 9,
-                       U64_MAX, &inode->ei_journal_seq, &i_sectors_delta);
+                       U64_MAX, &i_sectors_delta);
        i_sectors_acct(c, inode, NULL, i_sectors_delta);
 
        if (unlikely(ret))
@@ -2508,7 +2513,6 @@ static long bchfs_fpunch(struct bch_inode_info *inode, loff_t offset, loff_t len
 
                ret = bch2_fpunch(c, inode_inum(inode),
                                  discard_start, discard_end,
-                                 &inode->ei_journal_seq,
                                  &i_sectors_delta);
                i_sectors_acct(c, inode, NULL, i_sectors_delta);
        }
@@ -2587,7 +2591,6 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
 
                ret = bch2_fpunch(c, inode_inum(inode),
                                  offset >> 9, (offset + len) >> 9,
-                                 &inode->ei_journal_seq,
                                  &i_sectors_delta);
                i_sectors_acct(c, inode, NULL, i_sectors_delta);
 
@@ -2691,8 +2694,7 @@ reassemble:
                ret =   bch2_btree_iter_traverse(&del) ?:
                        bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
                        bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
-                       bch2_trans_commit(&trans, &disk_res,
-                                         &inode->ei_journal_seq,
+                       bch2_trans_commit(&trans, &disk_res, NULL,
                                          BTREE_INSERT_NOFAIL);
                bch2_disk_reservation_put(c, &disk_res);
 
@@ -2803,7 +2805,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 
                ret = bch2_extent_update(&trans, inode_inum(inode), &iter,
                                         &reservation.k_i,
-                               &disk_res, &inode->ei_journal_seq,
+                               &disk_res, NULL,
                                0, &i_sectors_delta, true);
                i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
 bkey_err:
@@ -3003,7 +3005,6 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
                               inode_inum(dst), pos_dst >> 9,
                               inode_inum(src), pos_src >> 9,
                               aligned_len >> 9,
-                              &dst->ei_journal_seq,
                               pos_dst + len, &i_sectors_delta);
        if (ret < 0)
                goto err;
@@ -3021,10 +3022,9 @@ loff_t bch2_remap_file_range(struct file *file_src, loff_t pos_src,
                i_size_write(&dst->v, pos_dst + ret);
        spin_unlock(&dst->v.i_lock);
 
-       if (((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
-            IS_SYNC(file_inode(file_dst))) &&
-           !c->opts.journal_flush_disabled)
-               ret = bch2_journal_flush_seq(&c->journal, dst->ei_journal_seq);
+       if ((file_dst->f_flags & (__O_SYNC | O_DSYNC)) ||
+           IS_SYNC(file_inode(file_dst)))
+               ret = bch2_flush_inode(c, inode_inum(dst));
 err:
        bch2_unlock_inodes(INODE_LOCK|INODE_PAGECACHE_BLOCK, src, dst);
 
index 12178bd15c344ba5dcc0121e3735de7e722a9c1b..92919b16f2f51ce6b664b0d83f479b32aee37bc8 100644 (file)
@@ -41,25 +41,6 @@ static void bch2_vfs_inode_init(struct bch_fs *, subvol_inum,
                                struct bch_inode_info *,
                                struct bch_inode_unpacked *);
 
-static void journal_seq_copy(struct bch_fs *c,
-                            struct bch_inode_info *dst,
-                            u64 journal_seq)
-{
-       /*
-        * atomic64_cmpxchg has a fallback for archs that don't support it,
-        * cmpxchg does not:
-        */
-       atomic64_t *dst_seq = (void *) &dst->ei_journal_seq;
-       u64 old, v = READ_ONCE(dst->ei_journal_seq);
-
-       do {
-               old = v;
-
-               if (old >= journal_seq)
-                       break;
-       } while ((v = atomic64_cmpxchg(dst_seq, old, journal_seq)) != old);
-}
-
 static void __pagecache_lock_put(struct pagecache_lock *lock, long i)
 {
        BUG_ON(atomic_long_read(&lock->v) == 0);
@@ -152,9 +133,7 @@ retry:
                                BTREE_ITER_INTENT) ?:
                (set ? set(inode, &inode_u, p) : 0) ?:
                bch2_inode_write(&trans, &iter, &inode_u) ?:
-               bch2_trans_commit(&trans, NULL,
-                                 &inode->ei_journal_seq,
-                                 BTREE_INSERT_NOFAIL);
+               bch2_trans_commit(&trans, NULL, NULL, BTREE_INSERT_NOFAIL);
 
        /*
         * the btree node lock protects inode->ei_inode, not ei_update_lock;
@@ -329,7 +308,6 @@ err_before_quota:
        if (!(flags & BCH_CREATE_TMPFILE)) {
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
-               journal_seq_copy(c, dir, journal_seq);
                mutex_unlock(&dir->ei_update_lock);
        }
 
@@ -337,7 +315,6 @@ err_before_quota:
        inum.inum = inode_u.bi_inum;
 
        bch2_vfs_inode_init(c, inum, inode, &inode_u);
-       journal_seq_copy(c, inode, journal_seq);
 
        set_cached_acl(&inode->v, ACL_TYPE_ACCESS, acl);
        set_cached_acl(&inode->v, ACL_TYPE_DEFAULT, default_acl);
@@ -362,7 +339,6 @@ err_before_quota:
                 * We raced, another process pulled the new inode into cache
                 * before us:
                 */
-               journal_seq_copy(c, old, journal_seq);
                make_bad_inode(&inode->v);
                iput(&inode->v);
 
@@ -446,7 +422,7 @@ static int __bch2_link(struct bch_fs *c,
        mutex_lock(&inode->ei_update_lock);
        bch2_trans_init(&trans, c, 4, 1024);
 
-       ret = __bch2_trans_do(&trans, NULL, &inode->ei_journal_seq, 0,
+       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
                        bch2_link_trans(&trans,
                                        inode_inum(dir),   &dir_u,
                                        inode_inum(inode), &inode_u,
@@ -455,7 +431,6 @@ static int __bch2_link(struct bch_fs *c,
        if (likely(!ret)) {
                BUG_ON(inode_u.bi_inum != inode->v.i_ino);
 
-               journal_seq_copy(c, inode, dir->ei_journal_seq);
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
                bch2_inode_update_after_write(c, inode, &inode_u, ATTR_CTIME);
@@ -498,7 +473,7 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
        bch2_lock_inodes(INODE_UPDATE_LOCK, dir, inode);
        bch2_trans_init(&trans, c, 4, 1024);
 
-       ret = __bch2_trans_do(&trans, NULL, &dir->ei_journal_seq,
+       ret = __bch2_trans_do(&trans, NULL, NULL,
                              BTREE_INSERT_NOFAIL,
                        bch2_unlink_trans(&trans,
                                          inode_inum(dir), &dir_u,
@@ -508,7 +483,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
        if (likely(!ret)) {
                BUG_ON(inode_u.bi_inum != inode->v.i_ino);
 
-               journal_seq_copy(c, inode, dir->ei_journal_seq);
                bch2_inode_update_after_write(c, dir, &dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
                bch2_inode_update_after_write(c, inode, &inode_u,
@@ -550,8 +524,6 @@ static int bch2_symlink(struct mnt_idmap *idmap,
        if (unlikely(ret))
                goto err;
 
-       journal_seq_copy(c, dir, inode->ei_journal_seq);
-
        ret = __bch2_link(c, inode, dir, dentry);
        if (unlikely(ret))
                goto err;
@@ -586,7 +558,6 @@ static int bch2_rename2(struct mnt_idmap *idmap,
                ? BCH_RENAME_EXCHANGE
                : dst_dentry->d_inode
                ? BCH_RENAME_OVERWRITE : BCH_RENAME;
-       u64 journal_seq = 0;
        int ret;
 
        if (flags & ~(RENAME_NOREPLACE|RENAME_EXCHANGE))
@@ -626,7 +597,7 @@ static int bch2_rename2(struct mnt_idmap *idmap,
                        goto err;
        }
 
-       ret = __bch2_trans_do(&trans, NULL, &journal_seq, 0,
+       ret = __bch2_trans_do(&trans, NULL, NULL, 0,
                        bch2_rename_trans(&trans,
                                          inode_inum(src_dir), &src_dir_u,
                                          inode_inum(dst_dir), &dst_dir_u,
@@ -644,23 +615,17 @@ static int bch2_rename2(struct mnt_idmap *idmap,
 
        bch2_inode_update_after_write(c, src_dir, &src_dir_u,
                                      ATTR_MTIME|ATTR_CTIME);
-       journal_seq_copy(c, src_dir, journal_seq);
 
-       if (src_dir != dst_dir) {
+       if (src_dir != dst_dir)
                bch2_inode_update_after_write(c, dst_dir, &dst_dir_u,
                                              ATTR_MTIME|ATTR_CTIME);
-               journal_seq_copy(c, dst_dir, journal_seq);
-       }
 
        bch2_inode_update_after_write(c, src_inode, &src_inode_u,
                                      ATTR_CTIME);
-       journal_seq_copy(c, src_inode, journal_seq);
 
-       if (dst_inode) {
+       if (dst_inode)
                bch2_inode_update_after_write(c, dst_inode, &dst_inode_u,
                                              ATTR_CTIME);
-               journal_seq_copy(c, dst_inode, journal_seq);
-       }
 err:
        bch2_trans_exit(&trans);
 
@@ -767,8 +732,7 @@ retry:
        }
 
        ret =   bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
-               bch2_trans_commit(&trans, NULL,
-                                 &inode->ei_journal_seq,
+               bch2_trans_commit(&trans, NULL, NULL,
                                  BTREE_INSERT_NOFAIL);
 btree_err:
        bch2_trans_iter_exit(&trans, &inode_iter);
@@ -1203,7 +1167,6 @@ static void bch2_vfs_inode_init(struct bch_fs *c, subvol_inum inum,
        inode->v.i_size         = bi->bi_size;
 
        inode->ei_flags         = 0;
-       inode->ei_journal_seq   = bi->bi_journal_seq;
        inode->ei_quota_reserved = 0;
        inode->ei_qid           = bch_qid(bi);
        inode->ei_subvol        = inum.subvol;
@@ -1242,7 +1205,6 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
        mutex_init(&inode->ei_update_lock);
        pagecache_lock_init(&inode->ei_pagecache_lock);
        mutex_init(&inode->ei_quota_lock);
-       inode->ei_journal_seq = 0;
 
        return &inode->v;
 }
index 38c04282da6475565b111279bb2708a28ac16559..1c8936df9fbb9cf1dc962bad94d5d65ef18c0f19 100644 (file)
@@ -36,7 +36,6 @@ struct bch_inode_info {
        unsigned long           ei_flags;
 
        struct mutex            ei_update_lock;
-       u64                     ei_journal_seq;
        u64                     ei_quota_reserved;
        unsigned long           ei_last_dirtied;
        struct pagecache_lock   ei_pagecache_lock;
index 0a9cb4d489f417e643e489efaf99b86bf2e7c4cf..dc41286c229e6f8e9562654ee50d51ff673c3ec5 100644 (file)
@@ -393,7 +393,7 @@ err:
  */
 int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
                   subvol_inum inum, u64 end,
-                  u64 *journal_seq, s64 *i_sectors_delta)
+                  s64 *i_sectors_delta)
 {
        struct bch_fs *c        = trans->c;
        unsigned max_sectors    = KEY_SIZE_MAX & (~0 << c->block_bits);
@@ -431,7 +431,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
                bch2_cut_back(end_pos, &delete);
 
                ret = bch2_extent_update(trans, inum, iter, &delete,
-                               &disk_res, journal_seq,
+                               &disk_res, NULL,
                                0, i_sectors_delta, false);
                bch2_disk_reservation_put(c, &disk_res);
 btree_err:
@@ -450,7 +450,7 @@ btree_err:
 }
 
 int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
-               u64 *journal_seq, s64 *i_sectors_delta)
+               s64 *i_sectors_delta)
 {
        struct btree_trans trans;
        struct btree_iter iter;
@@ -461,8 +461,7 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
                             POS(inum.inum, start),
                             BTREE_ITER_INTENT);
 
-       ret = bch2_fpunch_at(&trans, &iter, inum, end,
-                            journal_seq, i_sectors_delta);
+       ret = bch2_fpunch_at(&trans, &iter, inum, end, i_sectors_delta);
 
        bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
index ebb0944b4ca3b14b978479a6e43ea0bab6ae8d32..8be77561badbb2c08bb399436983e186c4e4b8fd 100644 (file)
@@ -68,12 +68,6 @@ static inline u64 *op_journal_seq(struct bch_write_op *op)
                ? op->journal_seq_p : &op->journal_seq;
 }
 
-static inline void op_journal_seq_set(struct bch_write_op *op, u64 *journal_seq)
-{
-       op->journal_seq_p = journal_seq;
-       op->flags |= BCH_WRITE_JOURNAL_SEQ_PTR;
-}
-
 static inline struct workqueue_struct *index_update_wq(struct bch_write_op *op)
 {
        return op->alloc_reserve == RESERVE_MOVINGGC
@@ -88,8 +82,8 @@ int bch2_extent_update(struct btree_trans *, subvol_inum,
                       struct disk_reservation *, u64 *, u64, s64 *, bool);
 
 int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
-                  subvol_inum, u64, u64 *, s64 *);
-int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, u64 *, s64 *);
+                  subvol_inum, u64, s64 *);
+int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
 
 static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
                                      struct bch_io_opts opts)
index 8e66e6390e6249221f7de700706c41cd8df8b397..d003f4088dfc3fb49e33ee621c349121a294419f 100644 (file)
@@ -210,7 +210,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
 s64 bch2_remap_range(struct bch_fs *c,
                     subvol_inum dst_inum, u64 dst_offset,
                     subvol_inum src_inum, u64 src_offset,
-                    u64 remap_sectors, u64 *journal_seq,
+                    u64 remap_sectors,
                     u64 new_i_size, s64 *i_sectors_delta)
 {
        struct btree_trans trans;
@@ -281,7 +281,7 @@ s64 bch2_remap_range(struct bch_fs *c,
                                        min(dst_end.offset,
                                            dst_iter.pos.offset +
                                            src_iter.pos.offset - src_want.offset),
-                                       journal_seq, i_sectors_delta);
+                                       i_sectors_delta);
                        continue;
                }
 
@@ -320,7 +320,7 @@ s64 bch2_remap_range(struct bch_fs *c,
                                    dst_end.offset - dst_iter.pos.offset));
 
                ret = bch2_extent_update(&trans, dst_inum, &dst_iter,
-                                        new_dst.k, &disk_res, journal_seq,
+                                        new_dst.k, &disk_res, NULL,
                                         new_i_size, i_sectors_delta,
                                         true);
                bch2_disk_reservation_put(c, &disk_res);
@@ -347,7 +347,7 @@ s64 bch2_remap_range(struct bch_fs *c,
                    inode_u.bi_size < new_i_size) {
                        inode_u.bi_size = new_i_size;
                        ret2  = bch2_inode_write(&trans, &inode_iter, &inode_u) ?:
-                               bch2_trans_commit(&trans, NULL, journal_seq, 0);
+                               bch2_trans_commit(&trans, NULL, NULL, 0);
                }
 
                bch2_trans_iter_exit(&trans, &inode_iter);
index 4c1b82860b0b9ca31aae88213d2dcb7916bfcfc1..3745873fd88d90947f610de256931cecec4d9181 100644 (file)
@@ -58,6 +58,6 @@ static inline __le64 *bkey_refcount(struct bkey_i *k)
 }
 
 s64 bch2_remap_range(struct bch_fs *, subvol_inum, u64,
-                    subvol_inum, u64, u64, u64 *, u64, s64 *);
+                    subvol_inum, u64, u64, u64, s64 *);
 
 #endif /* _BCACHEFS_REFLINK_H */
index 181af89b0553c538ef5d226cfa644e075a8dd24e..21823ce6923797ac57f4ae758f9ce9584bdcf99d 100644 (file)
@@ -165,8 +165,24 @@ int bch2_xattr_set(struct btree_trans *trans, subvol_inum inum,
                   const char *name, const void *value, size_t size,
                   int type, int flags)
 {
+       struct btree_iter inode_iter = { NULL };
+       struct bch_inode_unpacked inode_u;
        int ret;
 
+       /*
+        * We need to do an inode update so that bi_journal_sync gets updated
+        * and fsync works:
+        *
+        * Perhaps we should be updating bi_mtime too?
+        */
+
+       ret   = bch2_inode_peek(trans, &inode_iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
+               bch2_inode_write(trans, &inode_iter, &inode_u);
+       bch2_trans_iter_exit(trans, &inode_iter);
+
+       if (ret)
+               return ret;
+
        if (value) {
                struct bkey_i_xattr *xattr;
                unsigned namelen = strlen(name);
@@ -352,7 +368,7 @@ static int bch2_xattr_set_handler(const struct xattr_handler *handler,
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct bch_hash_info hash = bch2_hash_info_init(c, &inode->ei_inode);
 
-       return bch2_trans_do(c, NULL, &inode->ei_journal_seq, 0,
+       return bch2_trans_do(c, NULL, NULL, 0,
                        bch2_xattr_set(&trans, inode_inum(inode), &hash,
                                       name, value, size,
                                       handler->flags, flags));