bcachefs: Btree update path cleanup
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 15 Mar 2019 21:11:58 +0000 (17:11 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:18 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_leaf.c

index 674a617a8018a11044afc8cb0a5a51ccb66e4855..2904239b79477ccfeea00c272a3bfe36d1d456cc 100644 (file)
@@ -289,12 +289,12 @@ struct btree_trans {
 
        struct btree_iter       *iters;
        struct btree_insert_entry *updates;
-       struct disk_reservation *disk_res;
 
        /* update path: */
        struct journal_res      journal_res;
        struct journal_preres   journal_preres;
        u64                     *journal_seq;
+       struct disk_reservation *disk_res;
        unsigned                flags;
 
        struct btree_iter       iters_onstack[2];
@@ -501,6 +501,7 @@ enum btree_insert_ret {
        BTREE_INSERT_BTREE_NODE_FULL,
        BTREE_INSERT_ENOSPC,
        BTREE_INSERT_NEED_MARK_REPLICAS,
+       BTREE_INSERT_NEED_JOURNAL_RES,
 };
 
 enum btree_gc_coalesce_fail_reason {
index 876b0e78f982ae9efd8d8c4ef2f1e9c380b9815f..4d7cef75a017a3027f2499fef0b7257bd9d9df22 100644 (file)
@@ -128,4 +128,39 @@ int bch2_trans_commit(struct btree_trans *,
        _ret;                                                           \
 })
 
+/*
+ * We sort transaction entries so that if multiple iterators point to the same
+ * leaf node they'll be adjacent:
+ */
+static inline bool same_leaf_as_prev(struct btree_trans *trans,
+                                    struct btree_insert_entry *i)
+{
+       return i != trans->updates &&
+               !i->deferred &&
+               i[0].iter->l[0].b == i[-1].iter->l[0].b;
+}
+
+#define __trans_next_update(_trans, _i, _filter)                       \
+({                                                                     \
+       while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
+               (_i)++;                                                 \
+                                                                       \
+       (_i) < (_trans)->updates + (_trans->nr_updates);                \
+})
+
+#define __trans_for_each_update(_trans, _i, _filter)                   \
+       for ((_i) = (_trans)->updates;                                  \
+            __trans_next_update(_trans, _i, _filter);                  \
+            (_i)++)
+
+#define trans_for_each_update(trans, i)                                        \
+       __trans_for_each_update(trans, i, true)
+
+#define trans_for_each_update_iter(trans, i)                           \
+       __trans_for_each_update(trans, i, !(i)->deferred)
+
+#define trans_for_each_update_leaf(trans, i)                           \
+       __trans_for_each_update(trans, i, !(i)->deferred &&             \
+                              !same_leaf_as_prev(trans, i))
+
 #endif /* _BCACHEFS_BTREE_UPDATE_H */
index d239aff7c13c8344199602adc24f09494b9e150e..42fdb6c2963aae6cfc674a3707d3da9ec7b57570 100644 (file)
 
 #include <linux/sort.h>
 
-static bool btree_trans_relock(struct btree_trans *);
-static void btree_trans_unlock(struct btree_trans *);
+inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
+                                           struct btree_iter *iter)
+{
+       bch2_btree_node_lock_write(b, iter);
+
+       if (btree_node_just_written(b) &&
+           bch2_btree_post_write_cleanup(c, b))
+               bch2_btree_iter_reinit_node(iter, b);
+
+       /*
+        * If the last bset has been written, or if it's gotten too big - start
+        * a new bset to insert into:
+        */
+       if (want_new_bset(c, b))
+               bch2_btree_init_next(c, b, iter);
+}
+
+static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update_leaf(trans, i)
+               bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
+}
+
+static void btree_trans_unlock_write(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update_leaf(trans, i)
+               bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
+}
+
+static bool btree_trans_relock(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update_iter(trans, i)
+               return bch2_btree_iter_relock(i->iter);
+       return true;
+}
+
+static void btree_trans_unlock(struct btree_trans *trans)
+{
+       struct btree_insert_entry *i;
+
+       trans_for_each_update_iter(trans, i) {
+               bch2_btree_iter_unlock(i->iter);
+               break;
+       }
+}
+
+static inline int btree_trans_cmp(struct btree_insert_entry l,
+                                 struct btree_insert_entry r)
+{
+       return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?:
+               btree_iter_cmp(l.iter, r.iter);
+}
 
 /* Inserting into a given leaf node (last stage of insert): */
 
@@ -350,103 +406,86 @@ bch2_deferred_update_alloc(struct bch_fs *c,
        return d;
 }
 
-/* struct btree_insert operations: */
+/* Normal update interface: */
 
-/*
- * We sort transaction entries so that if multiple iterators point to the same
- * leaf node they'll be adjacent:
- */
-static bool same_leaf_as_prev(struct btree_trans *trans,
-                             struct btree_insert_entry *i)
+static inline void btree_insert_entry_checks(struct bch_fs *c,
+                                            struct btree_insert_entry *i)
 {
-       return i != trans->updates &&
-               !i->deferred &&
-               i[0].iter->l[0].b == i[-1].iter->l[0].b;
-}
-
-#define __trans_next_entry(_trans, _i, _filter)                                \
-({                                                                     \
-       while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\
-               (_i)++;                                                 \
-                                                                       \
-       (_i) < (_trans)->updates + (_trans->nr_updates);                \
-})
-
-#define __trans_for_each_entry(_trans, _i, _filter)                    \
-       for ((_i) = (_trans)->updates;                                  \
-            __trans_next_entry(_trans, _i, _filter);                   \
-            (_i)++)
+       enum btree_id btree_id = !i->deferred
+               ? i->iter->btree_id
+               : i->d->btree_id;
 
-#define trans_for_each_entry(trans, i)                                 \
-       __trans_for_each_entry(trans, i, true)
+       if (!i->deferred) {
+               BUG_ON(i->iter->level);
+               BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
+               EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
+                       !bch2_extent_is_atomic(i->k, i->iter));
 
-#define trans_for_each_iter(trans, i)                                  \
-       __trans_for_each_entry(trans, i, !(i)->deferred)
+               bch2_btree_iter_verify_locks(i->iter);
+       }
 
-#define trans_for_each_leaf(trans, i)                                  \
-       __trans_for_each_entry(trans, i, !(i)->deferred &&              \
-                              !same_leaf_as_prev(trans, i))
+       BUG_ON(debug_check_bkeys(c) &&
+              !bkey_deleted(&i->k->k) &&
+              bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id));
+}
 
-inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b,
-                                           struct btree_iter *iter)
+static int bch2_trans_journal_preres_get(struct btree_trans *trans)
 {
-       bch2_btree_node_lock_write(b, iter);
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
+       unsigned u64s = 0;
+       int ret;
 
-       if (btree_node_just_written(b) &&
-           bch2_btree_post_write_cleanup(c, b))
-               bch2_btree_iter_reinit_node(iter, b);
+       trans_for_each_update(trans, i)
+               if (i->deferred)
+                       u64s += jset_u64s(i->k->k.u64s);
 
-       /*
-        * If the last bset has been written, or if it's gotten too big - start
-        * a new bset to insert into:
-        */
-       if (want_new_bset(c, b))
-               bch2_btree_init_next(c, b, iter);
-}
+       if (!u64s)
+               return 0;
 
-static void multi_lock_write(struct bch_fs *c, struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
+       ret = bch2_journal_preres_get(&c->journal,
+                       &trans->journal_preres, u64s,
+                       JOURNAL_RES_GET_NONBLOCK);
+       if (ret != -EAGAIN)
+               return ret;
 
-       trans_for_each_leaf(trans, i)
-               bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter);
-}
+       btree_trans_unlock(trans);
 
-static void multi_unlock_write(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
+       ret = bch2_journal_preres_get(&c->journal,
+                       &trans->journal_preres, u64s, 0);
+       if (ret)
+               return ret;
 
-       trans_for_each_leaf(trans, i)
-               bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter);
-}
+       if (!btree_trans_relock(trans)) {
+               trans_restart(" (iter relock after journal preres get blocked)");
+               return -EINTR;
+       }
 
-static inline int btree_trans_cmp(struct btree_insert_entry l,
-                                 struct btree_insert_entry r)
-{
-       return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?:
-               btree_iter_cmp(l.iter, r.iter);
+       return 0;
 }
 
-static bool btree_trans_relock(struct btree_trans *trans)
+static int bch2_trans_journal_res_get(struct btree_trans *trans,
+                                     unsigned flags)
 {
+       struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
+       unsigned u64s = 0;
+       int ret;
 
-       trans_for_each_iter(trans, i)
-               return bch2_btree_iter_relock(i->iter);
-       return true;
-}
+       if (unlikely(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+               return 0;
 
-static void btree_trans_unlock(struct btree_trans *trans)
-{
-       struct btree_insert_entry *i;
+       if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
+               flags |= JOURNAL_RES_GET_RESERVED;
 
-       trans_for_each_iter(trans, i) {
-               bch2_btree_iter_unlock(i->iter);
-               break;
-       }
-}
+       trans_for_each_update(trans, i)
+               u64s += jset_u64s(i->k->k.u64s);
 
-/* Normal update interface: */
+       ret = bch2_journal_res_get(&c->journal, &trans->journal_res,
+                                  u64s, flags);
+
+       return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret;
+}
 
 static enum btree_insert_ret
 btree_key_can_insert(struct btree_trans *trans,
@@ -477,6 +516,29 @@ btree_key_can_insert(struct btree_trans *trans,
        return BTREE_INSERT_OK;
 }
 
+static int btree_trans_check_can_insert(struct btree_trans *trans,
+                                       struct btree_insert_entry **stopped_at)
+{
+       struct btree_insert_entry *i;
+       unsigned u64s = 0;
+       int ret;
+
+       trans_for_each_update_iter(trans, i) {
+               /* Multiple inserts might go to same leaf: */
+               if (!same_leaf_as_prev(trans, i))
+                       u64s = 0;
+
+               u64s += i->k->k.u64s;
+               ret = btree_key_can_insert(trans, i, &u64s);
+               if (ret) {
+                       *stopped_at = i;
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
 static inline void do_btree_insert_one(struct btree_trans *trans,
                                       struct btree_insert_entry *insert)
 {
@@ -495,15 +557,12 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
        struct btree_iter *linked;
-       unsigned u64s;
        int ret;
-retry:
-       trans_for_each_iter(trans, i)
-               BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
-       memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+       trans_for_each_update_iter(trans, i)
+               BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
-       multi_lock_write(c, trans);
+       btree_trans_lock_write(c, trans);
 
        if (race_fault()) {
                ret = -EINTR;
@@ -516,59 +575,24 @@ retry:
         * held, otherwise another thread could write the node changing the
         * amount of space available:
         */
-       u64s = 0;
-       trans_for_each_iter(trans, i) {
-               /* Multiple inserts might go to same leaf: */
-               if (!same_leaf_as_prev(trans, i))
-                       u64s = 0;
-
-               u64s += i->k->k.u64s;
-               ret = btree_key_can_insert(trans, i, &u64s);
-               if (ret) {
-                       *stopped_at = i;
-                       goto out;
-               }
-       }
-
-       if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) {
-               unsigned flags = (trans->flags & BTREE_INSERT_JOURNAL_RESERVED)
-                       ? JOURNAL_RES_GET_RESERVED : 0;
-
-               u64s = 0;
-               trans_for_each_entry(trans, i)
-                       u64s += jset_u64s(i->k->k.u64s);
-
-               ret = bch2_journal_res_get(&c->journal,
-                               &trans->journal_res, u64s,
-                               flags|JOURNAL_RES_GET_NONBLOCK);
-               if (likely(!ret))
-                       goto got_journal_res;
-               if (ret != -EAGAIN)
-                       goto out;
-
-               multi_unlock_write(trans);
-               btree_trans_unlock(trans);
-
-               ret = bch2_journal_res_get(&c->journal,
-                               &trans->journal_res, u64s,
-                               flags|JOURNAL_RES_GET_CHECK);
-               if (ret)
-                       return ret;
+       ret = btree_trans_check_can_insert(trans, stopped_at);
+       if (ret)
+               goto out;
 
-               if (!btree_trans_relock(trans)) {
-                       trans_restart(" (iter relock after journal res get blocked)");
-                       return -EINTR;
-               }
+       /*
+        * Don't get journal reservation until after we know insert will
+        * succeed:
+        */
+       ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK);
+       if (ret)
+               goto out;
 
-               goto retry;
-       }
-got_journal_res:
        if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) {
                if (journal_seq_verify(c))
-                       trans_for_each_entry(trans, i)
+                       trans_for_each_update(trans, i)
                                i->k->k.version.lo = trans->journal_res.seq;
                else if (inject_invalid_keys(c))
-                       trans_for_each_entry(trans, i)
+                       trans_for_each_update(trans, i)
                                i->k->k.version = MAX_VERSION;
        }
 
@@ -578,7 +602,7 @@ got_journal_res:
                 * have been traversed/locked, depending on what the caller was
                 * doing:
                 */
-               trans_for_each_iter(trans, i) {
+               trans_for_each_update_iter(trans, i) {
                        for_each_btree_iter(i->iter, linked)
                                if (linked->uptodate < BTREE_ITER_NEED_RELOCK)
                                        linked->flags |= BTREE_ITER_NOUNLOCK;
@@ -586,40 +610,19 @@ got_journal_res:
                }
        }
 
-       trans_for_each_entry(trans, i)
+       trans_for_each_update(trans, i)
                do_btree_insert_one(trans, i);
 out:
        BUG_ON(ret &&
               (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) &&
               trans->journal_res.ref);
 
-       multi_unlock_write(trans);
+       btree_trans_unlock_write(trans);
        bch2_journal_res_put(&c->journal, &trans->journal_res);
 
        return ret;
 }
 
-static inline void btree_insert_entry_checks(struct bch_fs *c,
-                                            struct btree_insert_entry *i)
-{
-       enum btree_id btree_id = !i->deferred
-               ? i->iter->btree_id
-               : i->d->btree_id;
-
-       if (!i->deferred) {
-               BUG_ON(i->iter->level);
-               BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos));
-               EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) &&
-                       !bch2_extent_is_atomic(i->k, i->iter));
-
-               bch2_btree_iter_verify_locks(i->iter);
-       }
-
-       BUG_ON(debug_check_bkeys(c) &&
-              !bkey_deleted(&i->k->k) &&
-              bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id));
-}
-
 /**
  * __bch_btree_insert_at - insert keys at given iterator positions
  *
@@ -631,60 +634,15 @@ static inline void btree_insert_entry_checks(struct bch_fs *c,
  * -EROFS: filesystem read only
  * -EIO: journal or btree node IO error
  */
-static int __bch2_btree_insert_at(struct btree_trans *trans)
+static int __bch2_trans_commit(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
        struct btree_iter *linked;
-       unsigned flags, u64s = 0;
+       unsigned flags;
        int ret;
-
-       BUG_ON(!trans->nr_updates);
-
-       /* for the sake of sanity: */
-       BUG_ON(trans->nr_updates > 1 && !(trans->flags & BTREE_INSERT_ATOMIC));
-
-       if (trans->flags & BTREE_INSERT_GC_LOCK_HELD)
-               lockdep_assert_held(&c->gc_lock);
-
-       memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
-
-       bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp);
-
-       trans_for_each_entry(trans, i)
-               btree_insert_entry_checks(c, i);
-
-       trans_for_each_entry(trans, i)
-               if (i->deferred)
-                       u64s += jset_u64s(i->k->k.u64s);
-
-       if (u64s) {
-               ret = bch2_journal_preres_get(&c->journal,
-                               &trans->journal_preres, u64s,
-                               JOURNAL_RES_GET_NONBLOCK);
-               if (!ret)
-                       goto got_journal_preres;
-               if (ret != -EAGAIN)
-                       return ret;
-
-               btree_trans_unlock(trans);
-               ret = bch2_journal_preres_get(&c->journal,
-                               &trans->journal_preres, u64s, 0);
-               if (ret)
-                       return ret;
-
-               if (!btree_trans_relock(trans)) {
-                       trans_restart(" (iter relock after journal preres get blocked)");
-                       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-                       return -EINTR;
-               }
-       }
-got_journal_preres:
-       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
-                    !percpu_ref_tryget(&c->writes)))
-               return -EROFS;
 retry:
-       trans_for_each_iter(trans, i) {
+       trans_for_each_update_iter(trans, i) {
                unsigned old_locks_want = i->iter->locks_want;
                unsigned old_uptodate = i->iter->uptodate;
 
@@ -705,24 +663,19 @@ retry:
        if (unlikely(ret))
                goto err;
 
-       trans_for_each_leaf(trans, i)
+       trans_for_each_update_leaf(trans, i)
                bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags);
 
-       trans_for_each_iter(trans, i)
+       trans_for_each_update_iter(trans, i)
                bch2_btree_iter_downgrade(i->iter);
 out:
-       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
-
-       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
-               percpu_ref_put(&c->writes);
-
        /* make sure we didn't drop or screw up locks: */
-       trans_for_each_iter(trans, i) {
+       trans_for_each_update_iter(trans, i) {
                bch2_btree_iter_verify_locks(i->iter);
                break;
        }
 
-       trans_for_each_iter(trans, i) {
+       trans_for_each_update_iter(trans, i) {
                for_each_btree_iter(i->iter, linked)
                        linked->flags &= ~BTREE_ITER_NOUNLOCK;
                break;
@@ -784,12 +737,25 @@ err:
                bch2_trans_unlock(trans);
                ret = -EINTR;
 
-               trans_for_each_iter(trans, i) {
+               trans_for_each_update_iter(trans, i) {
                        int ret2 = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k));
                        if (ret2)
                                ret = ret2;
                }
                break;
+       case BTREE_INSERT_NEED_JOURNAL_RES:
+               btree_trans_unlock(trans);
+
+               ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK);
+               if (ret)
+                       goto out;
+
+               if (btree_trans_relock(trans))
+                       goto retry;
+
+               trans_restart(" (iter relock after journal res get blocked)");
+               ret = -EINTR;
+               break;
        default:
                BUG_ON(ret >= 0);
                break;
@@ -801,7 +767,7 @@ err:
                        goto out;
                }
 
-               trans_for_each_iter(trans, i) {
+               trans_for_each_update_iter(trans, i) {
                        int ret2 = bch2_btree_iter_traverse(i->iter);
                        if (ret2) {
                                ret = ret2;
@@ -830,16 +796,44 @@ int bch2_trans_commit(struct btree_trans *trans,
                      u64 *journal_seq,
                      unsigned flags)
 {
+       struct bch_fs *c = trans->c;
+       struct btree_insert_entry *i;
        int ret;
 
        if (!trans->nr_updates)
                return 0;
 
+       /* for the sake of sanity: */
+       BUG_ON(trans->nr_updates > 1 && !(flags & BTREE_INSERT_ATOMIC));
+
+       if (flags & BTREE_INSERT_GC_LOCK_HELD)
+               lockdep_assert_held(&c->gc_lock);
+
+       memset(&trans->journal_res, 0, sizeof(trans->journal_res));
+       memset(&trans->journal_preres, 0, sizeof(trans->journal_preres));
        trans->disk_res         = disk_res;
        trans->journal_seq      = journal_seq;
        trans->flags            = flags;
 
-       ret = __bch2_btree_insert_at(trans);
+       bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp);
+
+       trans_for_each_update(trans, i)
+               btree_insert_entry_checks(c, i);
+
+       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
+                    !percpu_ref_tryget(&c->writes)))
+               return -EROFS;
+
+       ret = bch2_trans_journal_preres_get(trans);
+       if (ret)
+               goto err;
+
+       ret = __bch2_trans_commit(trans);
+err:
+       bch2_journal_preres_put(&c->journal, &trans->journal_preres);
+
+       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
+               percpu_ref_put(&c->writes);
 
        trans->nr_updates = 0;
 
@@ -861,7 +855,7 @@ int bch2_btree_delete_at(struct btree_trans *trans,
 }
 
 /**
- * bch_btree_insert - insert keys into the extent btree
+ * bch2_btree_insert - insert keys into the extent btree
  * @c:                 pointer to struct bch_fs
  * @id:                        btree to insert into
  * @insert_keys:       list of keys to insert