bcachefs: Make deferred inode updates a mount option
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 13 Mar 2019 17:31:02 +0000 (13:31 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:17 +0000 (17:08 -0400)
Journal reclaim may still need performance tuning

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/fs-io.c
fs/bcachefs/fs.c
fs/bcachefs/opts.h

index bb067e4f627e8c512bde4d9416d34832fa7efda9..0ea4bebdd0af53f23824ba268fbb84984eb00992 100644 (file)
@@ -953,6 +953,13 @@ retry:
        a->k.p = iter->pos;
        bch2_alloc_pack(a, u);
 
+       /*
+        * XXX:
+        * when using deferred btree updates, we have journal reclaim doing
+        * btree updates and thus requiring the allocator to make forward
+        * progress, and here the allocator is requiring space in the journal -
+        * so we need a journal pre-reservation:
+        */
        ret = bch2_btree_insert_at(c, NULL,
                        invalidating_cached_data ? journal_seq : NULL,
                        BTREE_INSERT_ATOMIC|
@@ -960,7 +967,6 @@ retry:
                        BTREE_INSERT_NOFAIL|
                        BTREE_INSERT_USE_RESERVE|
                        BTREE_INSERT_USE_ALLOC_RESERVE|
-                       BTREE_INSERT_JOURNAL_RESERVED|
                        flags,
                        BTREE_INSERT_ENTRY(iter, &a->k_i));
        if (ret == -EINTR)
index eda6d71646e10f649a28801be18e7ccf14784638..251c811abeda47f95bc741d7f3ee177b1c146dbf 100644 (file)
@@ -287,6 +287,8 @@ static int bch2_extent_update(struct btree_trans *trans,
                              bool direct,
                              s64 *total_delta)
 {
+       struct bch_fs *c = trans->c;
+       struct btree_iter *inode_iter = NULL;
        struct bch_inode_unpacked inode_u;
        struct bkey_inode_buf inode_p;
        bool allocating = false;
@@ -319,35 +321,62 @@ static int bch2_extent_update(struct btree_trans *trans,
        /* XXX: inode->i_size locking */
        if (i_sectors_delta ||
            new_i_size > inode->ei_inode.bi_size) {
-               bch2_btree_iter_unlock(extent_iter);
-               mutex_lock(&inode->ei_update_lock);
+               if (c->opts.new_inode_updates) {
+                       bch2_btree_iter_unlock(extent_iter);
+                       mutex_lock(&inode->ei_update_lock);
 
-               if (!bch2_btree_iter_relock(extent_iter)) {
-                       mutex_unlock(&inode->ei_update_lock);
-                       return -EINTR;
-               }
+                       if (!bch2_btree_iter_relock(extent_iter)) {
+                               mutex_unlock(&inode->ei_update_lock);
+                               return -EINTR;
+                       }
 
-               inode_locked = true;
+                       inode_locked = true;
 
-               if (!inode->ei_inode_update)
-                       inode->ei_inode_update =
-                               bch2_deferred_update_alloc(trans->c,
-                                                       BTREE_ID_INODES, 64);
+                       if (!inode->ei_inode_update)
+                               inode->ei_inode_update =
+                                       bch2_deferred_update_alloc(c,
+                                                               BTREE_ID_INODES, 64);
 
-               inode_u = inode->ei_inode;
-               inode_u.bi_sectors += i_sectors_delta;
+                       inode_u = inode->ei_inode;
+                       inode_u.bi_sectors += i_sectors_delta;
 
-               /* XXX: this is slightly suspect */
-               if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
-                   new_i_size > inode_u.bi_size) {
-                       inode_u.bi_size = new_i_size;
-                       extended = true;
-               }
+                       /* XXX: this is slightly suspect */
+                       if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                           new_i_size > inode_u.bi_size) {
+                               inode_u.bi_size = new_i_size;
+                               extended = true;
+                       }
 
-               bch2_inode_pack(&inode_p, &inode_u);
-               bch2_trans_update(trans,
-                       BTREE_INSERT_DEFERRED(inode->ei_inode_update,
-                                             &inode_p.inode.k_i));
+                       bch2_inode_pack(&inode_p, &inode_u);
+                       bch2_trans_update(trans,
+                               BTREE_INSERT_DEFERRED(inode->ei_inode_update,
+                                                     &inode_p.inode.k_i));
+               } else {
+                       inode_iter = bch2_trans_get_iter(trans,
+                               BTREE_ID_INODES,
+                               POS(k->k.p.inode, 0),
+                               BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+                       if (IS_ERR(inode_iter))
+                               return PTR_ERR(inode_iter);
+
+                       ret = bch2_btree_iter_traverse(inode_iter);
+                       if (ret)
+                               goto err;
+
+                       inode_u = inode->ei_inode;
+                       inode_u.bi_sectors += i_sectors_delta;
+
+                       /* XXX: this is slightly suspect */
+                       if (!(inode_u.bi_flags & BCH_INODE_I_SIZE_DIRTY) &&
+                           new_i_size > inode_u.bi_size) {
+                               inode_u.bi_size = new_i_size;
+                               extended = true;
+                       }
+
+                       bch2_inode_pack(&inode_p, &inode_u);
+                       bch2_trans_update(trans,
+                               BTREE_INSERT_ENTRY(inode_iter, &inode_p.inode.k_i));
+               }
        }
 
        ret = bch2_trans_commit(trans, disk_res,
@@ -376,11 +405,13 @@ static int bch2_extent_update(struct btree_trans *trans,
        }
 
        if (direct)
-               i_sectors_acct(trans->c, inode, quota_res, i_sectors_delta);
+               i_sectors_acct(c, inode, quota_res, i_sectors_delta);
 
        if (total_delta)
                *total_delta += i_sectors_delta;
 err:
+       if (!IS_ERR_OR_NULL(inode_iter))
+               bch2_trans_iter_put(trans, inode_iter);
        if (inode_locked)
                mutex_unlock(&inode->ei_update_lock);
 
index 5f93ea76785f2652b697705634a0bcf5b96b80b1..dc55d36ecfd5a75167fb00e469f537bf163716c8 100644 (file)
@@ -163,22 +163,24 @@ int __must_check bch2_write_inode_trans(struct btree_trans *trans,
 
        lockdep_assert_held(&inode->ei_update_lock);
 
+       if (c->opts.new_inode_updates) {
        /* XXX: Don't do this with btree locks held */
        if (!inode->ei_inode_update)
                inode->ei_inode_update =
                        bch2_deferred_update_alloc(c, BTREE_ID_INODES, 64);
-#if 0
-       iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-                       POS(inode->v.i_ino, 0),
-                       BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-       if (IS_ERR(iter))
-               return PTR_ERR(iter);
-
-       /* The btree node lock is our lock on the inode: */
-       ret = bch2_btree_iter_traverse(iter);
-       if (ret)
-               return ret;
-#endif
+       } else {
+               iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
+                                          POS(inode->v.i_ino, 0),
+                                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+               if (IS_ERR(iter))
+                       return PTR_ERR(iter);
+
+               /* The btree node lock is our lock on the inode: */
+               ret = bch2_btree_iter_traverse(iter);
+               if (ret)
+                       return ret;
+       }
+
        *inode_u = inode->ei_inode;
 
        if (set) {
index f4cb0625c3ccd3c7dbc3c63f8d7e862fff490d94..53bf06e70cd5115e794d04c2b171a3c602bab5bc 100644 (file)
@@ -295,7 +295,12 @@ enum opt_type {
          OPT_UINT(0, BCH_REPLICAS_MAX),                                \
          NO_SB_OPT,                    1,                              \
          "n",          "Data written to this device will be considered\n"\
-                       "to have already been replicated n times")
+                       "to have already been replicated n times")      \
+       x(new_inode_updates,            u8,                             \
+         OPT_MOUNT,                                                    \
+         OPT_BOOL(),                                                   \
+         NO_SB_OPT,                    false,                          \
+         NULL,         "Enable new btree write-cache for inode updates")
 
 
 struct bch_opts {