bcachefs: Centralize marking of replicas in btree update path
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 7 Nov 2018 22:48:32 +0000 (17:48 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:11 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_gc.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/extents.c
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/journal_io.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h

index 0fb89e03fac8d872afb4d62b39ef57f968d1e94c..b0d04ed5f2a619e048b743d5f4b53bc7d2c8537e 100644 (file)
@@ -155,7 +155,7 @@ static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
               k.k->version.lo > journal_cur_seq(&c->journal));
 
        if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
-           fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c,
+           fsck_err_on(!bch2_bkey_replicas_marked(c, type, k, false), c,
                        "superblock not marked as containing replicas (type %u)",
                        data_type)) {
                ret = bch2_mark_bkey_replicas(c, type, k);
index 467c619f7f6d5417f9e97f2eaa16e03678f124ba..7e9ba60288aaac0486c79000e34a4ca79c6b27d0 100644 (file)
@@ -440,11 +440,11 @@ enum btree_insert_ret {
        BTREE_INSERT_OK,
        /* extent spanned multiple leaf nodes: have to traverse to next node: */
        BTREE_INSERT_NEED_TRAVERSE,
-       /* write lock held for too long */
        /* leaf node needs to be split */
        BTREE_INSERT_BTREE_NODE_FULL,
        BTREE_INSERT_ENOSPC,
        BTREE_INSERT_NEED_GC_LOCK,
+       BTREE_INSERT_NEED_MARK_REPLICAS,
 };
 
 enum btree_gc_coalesce_fail_reason {
index 44501e98a4ac5a34e9e4e5ca28aebf2e3184d82f..093e480977c713eb47c38c917f26c39e716cceea 100644 (file)
@@ -12,6 +12,7 @@
 #include "journal.h"
 #include "journal_reclaim.h"
 #include "keylist.h"
+#include "replicas.h"
 #include "trace.h"
 
 #include <linux/sort.h>
@@ -301,8 +302,8 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
 
 static enum btree_insert_ret
 btree_key_can_insert(struct btree_insert *trans,
-                     struct btree_insert_entry *insert,
-                     unsigned *u64s)
+                    struct btree_insert_entry *insert,
+                    unsigned *u64s)
 {
        struct bch_fs *c = trans->c;
        struct btree *b = insert->iter->l[0].b;
@@ -311,6 +312,12 @@ btree_key_can_insert(struct btree_insert *trans,
        if (unlikely(btree_node_fake(b)))
                return BTREE_INSERT_BTREE_NODE_FULL;
 
+       if (!bch2_bkey_replicas_marked(c,
+                       insert->iter->btree_id,
+                       bkey_i_to_s_c(insert->k),
+                       true))
+               return BTREE_INSERT_NEED_MARK_REPLICAS;
+
        ret = !btree_node_is_extents(b)
                ? BTREE_INSERT_OK
                : bch2_extent_can_insert(trans, insert, u64s);
@@ -327,8 +334,7 @@ btree_key_can_insert(struct btree_insert *trans,
  * Get journal reservation, take write locks, and attempt to do btree update(s):
  */
 static inline int do_btree_insert_at(struct btree_insert *trans,
-                                    struct btree_iter **split,
-                                    bool *cycle_gc_lock)
+                                    struct btree_insert_entry **stopped_at)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
@@ -372,22 +378,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
                        u64s = 0;
 
                u64s += i->k->k.u64s;
-               switch (btree_key_can_insert(trans, i, &u64s)) {
-               case BTREE_INSERT_OK:
-                       break;
-               case BTREE_INSERT_BTREE_NODE_FULL:
-                       ret = -EINTR;
-                       *split = i->iter;
-                       goto out;
-               case BTREE_INSERT_ENOSPC:
-                       ret = -ENOSPC;
+               ret = btree_key_can_insert(trans, i, &u64s);
+               if (ret) {
+                       *stopped_at = i;
                        goto out;
-               case BTREE_INSERT_NEED_GC_LOCK:
-                       ret = -EINTR;
-                       *cycle_gc_lock = true;
-                       goto out;
-               default:
-                       BUG();
                }
        }
 
@@ -445,8 +439,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
-       struct btree_iter *linked, *split = NULL;
-       bool cycle_gc_lock = false;
+       struct btree_iter *linked;
        unsigned flags;
        int ret;
 
@@ -466,9 +459,6 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
        if (unlikely(!percpu_ref_tryget(&c->writes)))
                return -EROFS;
 retry:
-       split = NULL;
-       cycle_gc_lock = false;
-
        trans_for_each_entry(trans, i) {
                unsigned old_locks_want = i->iter->locks_want;
                unsigned old_uptodate = i->iter->uptodate;
@@ -486,7 +476,7 @@ retry:
                }
        }
 
-       ret = do_btree_insert_at(trans, &split, &cycle_gc_lock);
+       ret = do_btree_insert_at(trans, &i);
        if (unlikely(ret))
                goto err;
 
@@ -521,8 +511,9 @@ err:
        if (!trans->did_work)
                flags &= ~BTREE_INSERT_NOUNLOCK;
 
-       if (split) {
-               ret = bch2_btree_split_leaf(c, split, flags);
+       switch (ret) {
+       case BTREE_INSERT_BTREE_NODE_FULL:
+               ret = bch2_btree_split_leaf(c, i->iter, flags);
 
                /*
                 * if the split succeeded without dropping locks the insert will
@@ -547,9 +538,10 @@ err:
                        trans_restart(" (split)");
                        ret = -EINTR;
                }
-       }
+               break;
+       case BTREE_INSERT_NEED_GC_LOCK:
+               ret = -EINTR;
 
-       if (cycle_gc_lock) {
                if (!down_read_trylock(&c->gc_lock)) {
                        if (flags & BTREE_INSERT_NOUNLOCK)
                                goto out;
@@ -558,6 +550,24 @@ err:
                        down_read(&c->gc_lock);
                }
                up_read(&c->gc_lock);
+               break;
+       case BTREE_INSERT_ENOSPC:
+               ret = -ENOSPC;
+               break;
+       case BTREE_INSERT_NEED_MARK_REPLICAS:
+               if (flags & BTREE_INSERT_NOUNLOCK) {
+                       ret = -EINTR;
+                       goto out;
+               }
+
+               bch2_btree_iter_unlock(trans->entries[0].iter);
+               ret = bch2_mark_bkey_replicas(c, i->iter->btree_id,
+                                             bkey_i_to_s_c(i->k))
+                       ?: -EINTR;
+               break;
+       default:
+               BUG_ON(ret >= 0);
+               break;
        }
 
        if (ret == -EINTR) {
index 0cf3436247934bfe2cc02a316faaa38af0083b7f..df0ca1fcf2e85acc6890f8ac276670f3ed01bd8c 100644 (file)
@@ -675,7 +675,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
        }
 
        if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-           !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
+           !bch2_bkey_replicas_marked(c, btree_node_type(b),
+                                      e.s_c, false)) {
                bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
                bch2_fs_bug(c,
                        "btree key bad (replicas not marked in superblock):\n%s",
@@ -1635,7 +1636,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
        }
 
        if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
-           !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
+           !bch2_bkey_replicas_marked(c, btree_node_type(b),
+                                      e.s_c, false)) {
                bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
                                      e.s_c);
                bch2_fs_bug(c,
index 133b702299dd8118a7f4537ef104b3e26fbe0b6c..fbd0a82fdeac4d9b4355cc0b674bc53bf48a6c73 100644 (file)
@@ -23,7 +23,6 @@
 #include "keylist.h"
 #include "move.h"
 #include "rebalance.h"
-#include "replicas.h"
 #include "super.h"
 #include "super-io.h"
 #include "trace.h"
@@ -336,13 +335,6 @@ static void __bch2_write_index(struct bch_write_op *op)
                        goto err;
                }
 
-               if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
-                       ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
-                                                     e.s_c);
-                       if (ret)
-                               goto err;
-               }
-
                dst = bkey_next(dst);
        }
 
index 8a7f246e88235b618bb949c206e2c3caf32a420d..84070b674187e0154a8abe7f0bd59bc729e36adc 100644 (file)
@@ -35,10 +35,9 @@ enum bch_write_flags {
        BCH_WRITE_PAGES_OWNED           = (1 << 5),
        BCH_WRITE_ONLY_SPECIFIED_DEVS   = (1 << 6),
        BCH_WRITE_NOPUT_RESERVATION     = (1 << 7),
-       BCH_WRITE_NOMARK_REPLICAS       = (1 << 8),
 
        /* Internal: */
-       BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 9),
+       BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 8),
 };
 
 static inline u64 *op_journal_seq(struct bch_write_op *op)
index 4555d55b23dd90a0c1a02f10bd06865bf20ce17b..b1f6433cf9e928d9e1379abaff04531e23c736fe 100644 (file)
@@ -785,7 +785,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
                if (!degraded &&
                    (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
                     fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL,
-                                                      i->devs), c,
+                                                      i->devs, false), c,
                                 "superblock not marked as containing replicas (type %u)",
                                 BCH_DATA_JOURNAL))) {
                        ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs);
index 38b392472521d79fac3301e28e2758524141d843..46878590327d5aeecfea5fdea3fec723230639b9 100644 (file)
@@ -72,11 +72,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                 */
                bch2_extent_normalize(c, e.s);
 
-               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
-                                             bkey_i_to_s_c(&tmp.key));
-               if (ret)
-                       break;
-
                iter.pos = bkey_start_pos(&tmp.key.k);
 
                ret = bch2_btree_insert_at(c, NULL, NULL,
index 1f6bad1ae388be09d0e83d9f3f28fb852be4b37a..7de3c6c475beb5e7551ed2f77755155a23ed7936 100644 (file)
@@ -150,11 +150,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
                        goto next;
                }
 
-               ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
-                                             extent_i_to_s_c(insert).s_c);
-               if (ret)
-                       break;
-
                ret = bch2_btree_insert_at(c, &op->res,
                                op_journal_seq(op),
                                BTREE_INSERT_ATOMIC|
@@ -239,8 +234,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
        m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
                BCH_WRITE_PAGES_STABLE|
                BCH_WRITE_PAGES_OWNED|
-               BCH_WRITE_DATA_ENCODED|
-               BCH_WRITE_NOMARK_REPLICAS;
+               BCH_WRITE_DATA_ENCODED;
 
        m->op.nr_replicas       = 1;
        m->op.nr_replicas_required = 1;
index ef62756e8908f0cc897d02c1bbd69927528e6c60..83fc9c93d295fa1275aed842a4c0fcce34ab4458 100644 (file)
@@ -160,8 +160,8 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
        return new;
 }
 
-static bool replicas_has_entry(struct bch_replicas_cpu *r,
-                              struct bch_replicas_entry *search)
+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
+                                struct bch_replicas_entry *search)
 {
        return replicas_entry_bytes(search) <= r->entry_size &&
                eytzinger0_find(r->entries, r->nr,
@@ -169,6 +169,24 @@ static bool replicas_has_entry(struct bch_replicas_cpu *r,
                                memcmp, search) < r->nr;
 }
 
+static bool replicas_has_entry(struct bch_fs *c,
+                              struct bch_replicas_entry *search,
+                              bool check_gc_replicas)
+{
+       struct bch_replicas_cpu *r, *gc_r;
+       bool marked;
+
+       rcu_read_lock();
+       r = rcu_dereference(c->replicas);
+       marked = __replicas_has_entry(r, search) &&
+               (!check_gc_replicas ||
+                likely(!(gc_r = rcu_dereference(c->replicas_gc))) ||
+                __replicas_has_entry(gc_r, search));
+       rcu_read_unlock();
+
+       return marked;
+}
+
 noinline
 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
                                struct bch_replicas_entry *new_entry)
@@ -180,7 +198,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
        old_gc = rcu_dereference_protected(c->replicas_gc,
                                           lockdep_is_held(&c->sb_lock));
-       if (old_gc && !replicas_has_entry(old_gc, new_entry)) {
+       if (old_gc && !__replicas_has_entry(old_gc, new_entry)) {
                new_gc = cpu_replicas_add_entry(old_gc, new_entry);
                if (!new_gc)
                        goto err;
@@ -188,7 +206,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
 
        old_r = rcu_dereference_protected(c->replicas,
                                          lockdep_is_held(&c->sb_lock));
-       if (!replicas_has_entry(old_r, new_entry)) {
+       if (!__replicas_has_entry(old_r, new_entry)) {
                new_r = cpu_replicas_add_entry(old_r, new_entry);
                if (!new_r)
                        goto err;
@@ -227,17 +245,8 @@ err:
 static int __bch2_mark_replicas(struct bch_fs *c,
                                struct bch_replicas_entry *devs)
 {
-       struct bch_replicas_cpu *r, *gc_r;
-       bool marked;
-
-       rcu_read_lock();
-       r = rcu_dereference(c->replicas);
-       gc_r = rcu_dereference(c->replicas_gc);
-       marked = replicas_has_entry(r, devs) &&
-               (!likely(gc_r) || replicas_has_entry(gc_r, devs));
-       rcu_read_unlock();
-
-       return likely(marked) ? 0
+       return likely(replicas_has_entry(c, devs, true))
+               ? 0
                : bch2_mark_replicas_slowpath(c, devs);
 }
 
@@ -666,10 +675,10 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
 
 bool bch2_replicas_marked(struct bch_fs *c,
                          enum bch_data_type data_type,
-                         struct bch_devs_list devs)
+                         struct bch_devs_list devs,
+                         bool check_gc_replicas)
 {
        struct bch_replicas_entry_padded search;
-       bool ret;
 
        if (!devs.nr)
                return true;
@@ -678,19 +687,15 @@ bool bch2_replicas_marked(struct bch_fs *c,
 
        devlist_to_replicas(devs, data_type, &search.e);
 
-       rcu_read_lock();
-       ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
-       rcu_read_unlock();
-
-       return ret;
+       return replicas_has_entry(c, &search.e, check_gc_replicas);
 }
 
 bool bch2_bkey_replicas_marked(struct bch_fs *c,
                               enum bkey_type type,
-                              struct bkey_s_c k)
+                              struct bkey_s_c k,
+                              bool check_gc_replicas)
 {
        struct bch_replicas_entry_padded search;
-       bool ret;
 
        memset(&search, 0, sizeof(search));
 
@@ -700,20 +705,16 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
 
                for (i = 0; i < cached.nr; i++)
                        if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
-                                       bch2_dev_list_single(cached.devs[i])))
+                                       bch2_dev_list_single(cached.devs[i]),
+                                       check_gc_replicas))
                                return false;
        }
 
        bkey_to_replicas(type, k, &search.e);
 
-       if (!search.e.nr_devs)
-               return true;
-
-       rcu_read_lock();
-       ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
-       rcu_read_unlock();
-
-       return ret;
+       return search.e.nr_devs
+               ? replicas_has_entry(c, &search.e, check_gc_replicas)
+               : true;
 }
 
 struct replicas_status __bch2_replicas_status(struct bch_fs *c,
index a343dd9cd97ff43401a444a51499d504b4b93d32..e22d2d7cd08a5dfd4221245808856a3d958328ce 100644 (file)
@@ -5,9 +5,9 @@
 #include "replicas_types.h"
 
 bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
-                         struct bch_devs_list);
+                         struct bch_devs_list, bool);
 bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
-                              struct bkey_s_c);
+                              struct bkey_s_c, bool);
 int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
                       struct bch_devs_list);
 int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,