bcachefs: Fix restart handling in for_each_btree_key()
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 21 Oct 2021 16:05:21 +0000 (12:05 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:14 +0000 (17:09 -0400)
Code that uses for_each_btree_key often wants transaction restarts to be
handled locally and not returned. Originally, we wouldn't return
transaction restarts if there was a single iterator in the transaction -
the reasoning being if there weren't other iterators being invalidated,
and the current iterator was being advanced/retraversed, there weren't
any locks or iterators we were required to preserve.

But with the btree_path conversion that approach doesn't work anymore -
even when we're using for_each_btree_key() with a single iterator there
will still be two paths in the transaction, since we now always preserve
the path at the pos the iterator was initialized at - the reason being
that on restart we often restart from the same place.

And it turns out there's now a lot of for_each_btree_key() uses that _do
not_ want transaction restarts handled locally, and should be returning
them.

This patch splits out for_each_btree_key_norestart() and
for_each_btree_key_continue_norestart(), and converts existing users as
appropriate. for_each_btree_key(), for_each_btree_key_continue(), and
for_each_btree_node() now handle transaction restarts themselves by
calling bch2_trans_begin() when necessary - and the old hack to not
return transaction restarts when there's a single path in the
transaction has been deleted.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/dirent.c
fs/bcachefs/extent_update.c
fs/bcachefs/fs-io.c
fs/bcachefs/io.c
fs/bcachefs/reflink.c
fs/bcachefs/str_hash.h
fs/bcachefs/xattr.c

index 4b2bf8f7b28a7b6f1a86bbc50f41b66a97dc9d39..6ed3ca075ba00f8ed8b553d34fb9bd09b54eb254 100644 (file)
@@ -1041,8 +1041,6 @@ LE64_BITMASK(BCH_MEMBER_DATA_ALLOWED,     struct bch_member, flags[0], 15, 20)
 LE64_BITMASK(BCH_MEMBER_GROUP,         struct bch_member, flags[0], 20, 28)
 LE64_BITMASK(BCH_MEMBER_DURABILITY,    struct bch_member, flags[0], 28, 30)
 
-#define BCH_TIER_MAX                   4U
-
 #if 0
 LE64_BITMASK(BCH_MEMBER_NR_READ_ERRORS,        struct bch_member, flags[1], 0,  20);
 LE64_BITMASK(BCH_MEMBER_NR_WRITE_ERRORS,struct bch_member, flags[1], 20, 40);
index 25ed4f2ce19c31d60b86538203a386e162d056b2..2b51245dd5fe8c1c103f3c7ef296ec5267923665 100644 (file)
@@ -1511,19 +1511,11 @@ static int __btree_path_traverse_all(struct btree_trans *, int, unsigned long);
 int __must_check bch2_btree_path_traverse(struct btree_trans *trans,
                                          struct btree_path *path, unsigned flags)
 {
-       int ret;
-
        if (path->uptodate < BTREE_ITER_NEED_RELOCK)
                return 0;
 
-       ret =   bch2_trans_cond_resched(trans) ?:
+       return  bch2_trans_cond_resched(trans) ?:
                btree_path_traverse_one(trans, path, flags, _RET_IP_);
-       if (unlikely(ret) && hweight64(trans->paths_allocated) == 1) {
-               ret = __btree_path_traverse_all(trans, ret, _RET_IP_);
-               BUG_ON(ret == -EINTR);
-       }
-
-       return ret;
 }
 
 static void btree_path_copy(struct btree_trans *trans, struct btree_path *dst,
@@ -1937,10 +1929,6 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter)
        if (!btree_path_node(path, path->level))
                goto out;
 
-       ret = bch2_trans_cond_resched(trans);
-       if (ret)
-               goto err;
-
        btree_node_unlock(path, path->level);
        path->l[path->level].b = BTREE_ITER_NO_NODE_UP;
        path->level++;
index 4cd05fd06e6404a220aac55b3345b43c25402307..fea1101155be72493a832b69a3ba0e4b740ed0ea 100644 (file)
@@ -284,11 +284,39 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
        }
 }
 
+void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
+void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
+                         unsigned, struct bpos, unsigned);
+void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
+                              enum btree_id, struct bpos,
+                              unsigned, unsigned, unsigned);
+void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
+
+static inline void set_btree_iter_dontneed(struct btree_iter *iter)
+{
+       iter->path->preserve = false;
+}
+
+void *bch2_trans_kmalloc(struct btree_trans *, size_t);
+void bch2_trans_begin(struct btree_trans *);
+
+static inline struct btree *
+__btree_iter_peek_node_and_restart(struct btree_trans *trans, struct btree_iter *iter)
+{
+       struct btree *b;
+
+       while (b = bch2_btree_iter_peek_node(iter),
+              PTR_ERR_OR_ZERO(b) == -EINTR)
+               bch2_trans_begin(trans);
+
+       return b;
+}
+
 #define __for_each_btree_node(_trans, _iter, _btree_id, _start,                \
                              _locks_want, _depth, _flags, _b, _ret)    \
        for (bch2_trans_node_iter_init((_trans), &(_iter), (_btree_id), \
-                               _start, _locks_want, _depth, _flags),   \
-            _b = bch2_btree_iter_peek_node(&(_iter));                  \
+                               _start, _locks_want, _depth, _flags);   \
+            (_b) = __btree_iter_peek_node_and_restart((_trans), &(_iter)),\
             !((_ret) = PTR_ERR_OR_ZERO(_b)) && (_b);                   \
             (_b) = bch2_btree_iter_next_node(&(_iter)))
 
@@ -297,6 +325,11 @@ static inline int bch2_trans_cond_resched(struct btree_trans *trans)
        __for_each_btree_node(_trans, _iter, _btree_id, _start,         \
                              0, 0, _flags, _b, _ret)
 
+static inline int bkey_err(struct bkey_s_c k)
+{
+       return PTR_ERR_OR_ZERO(k.k);
+}
+
 static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
                                                     unsigned flags)
 {
@@ -305,51 +338,50 @@ static inline struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter,
                : bch2_btree_iter_peek(iter);
 }
 
-static inline struct bkey_s_c __bch2_btree_iter_next(struct btree_iter *iter,
-                                                    unsigned flags)
+static inline struct bkey_s_c
+__bch2_btree_iter_peek_and_restart(struct btree_trans *trans,
+                                  struct btree_iter *iter, unsigned flags)
 {
-       return flags & BTREE_ITER_SLOTS
-               ? bch2_btree_iter_next_slot(iter)
-               : bch2_btree_iter_next(iter);
-}
+       struct bkey_s_c k;
 
-static inline int bkey_err(struct bkey_s_c k)
-{
-       return PTR_ERR_OR_ZERO(k.k);
+       while (k = __bch2_btree_iter_peek(iter, flags),
+              bkey_err(k) == -EINTR)
+               bch2_trans_begin(trans);
+
+       return k;
 }
 
 #define for_each_btree_key(_trans, _iter, _btree_id,                   \
                           _start, _flags, _k, _ret)                    \
        for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),      \
-                                 (_start), (_flags)),                  \
-            (_k) = __bch2_btree_iter_peek(&(_iter), _flags);           \
+                                 (_start), (_flags));                  \
+            (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
             !((_ret) = bkey_err(_k)) && (_k).k;                        \
-            (_k) = __bch2_btree_iter_next(&(_iter), _flags))
+            bch2_btree_iter_advance(&(_iter)))
 
-#define for_each_btree_key_continue(_iter, _flags, _k, _ret)           \
-       for ((_k) = __bch2_btree_iter_peek(&(_iter), _flags);           \
+#define for_each_btree_key_norestart(_trans, _iter, _btree_id,         \
+                          _start, _flags, _k, _ret)                    \
+       for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id),      \
+                                 (_start), (_flags));                  \
+            (_k) = __bch2_btree_iter_peek(&(_iter), _flags),           \
             !((_ret) = bkey_err(_k)) && (_k).k;                        \
-            (_k) = __bch2_btree_iter_next(&(_iter), _flags))
+            bch2_btree_iter_advance(&(_iter)))
 
-/* new multiple iterator interface: */
-
-void bch2_dump_trans_paths_updates(struct btree_trans *);
+#define for_each_btree_key_continue(_trans, _iter, _flags, _k, _ret)   \
+       for (;                                                          \
+            (_k) = __bch2_btree_iter_peek_and_restart((_trans), &(_iter), _flags),\
+            !((_ret) = bkey_err(_k)) && (_k).k;                        \
+            bch2_btree_iter_advance(&(_iter)))
 
-void bch2_trans_iter_exit(struct btree_trans *, struct btree_iter *);
-void bch2_trans_iter_init(struct btree_trans *, struct btree_iter *,
-                         unsigned, struct bpos, unsigned);
-void bch2_trans_node_iter_init(struct btree_trans *, struct btree_iter *,
-                              enum btree_id, struct bpos,
-                              unsigned, unsigned, unsigned);
-void bch2_trans_copy_iter(struct btree_iter *, struct btree_iter *);
+#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
+       for (;                                                          \
+            (_k) = __bch2_btree_iter_peek(&(_iter), _flags),           \
+            !((_ret) = bkey_err(_k)) && (_k).k;                        \
+            bch2_btree_iter_advance(&(_iter)))
 
-static inline void set_btree_iter_dontneed(struct btree_iter *iter)
-{
-       iter->path->preserve = false;
-}
+/* new multiple iterator interface: */
 
-void *bch2_trans_kmalloc(struct btree_trans *, size_t);
-void bch2_trans_begin(struct btree_trans *);
+void bch2_dump_trans_paths_updates(struct btree_trans *);
 void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
 void bch2_trans_exit(struct btree_trans *);
 
index b4a2f2e322489584df60ce34205494c87b762a3b..b344979ca7ed18de4d01a215a931ca7f3eabd846 100644 (file)
@@ -1216,7 +1216,7 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
 
        pos.snapshot++;
 
-       for_each_btree_key(trans, iter, btree_id, pos,
+       for_each_btree_key_norestart(trans, iter, btree_id, pos,
                           BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
                if (bkey_cmp(k.k->p, pos))
                        break;
index 26df20ad090cd91956803e03ebcf163c3196b977..00dac68701f54a21d185da64b5ad657db1688193 100644 (file)
@@ -432,7 +432,7 @@ int bch2_empty_dir_trans(struct btree_trans *trans, subvol_inum dir)
        if (ret)
                return ret;
 
-       for_each_btree_key(trans, iter, BTREE_ID_dirents,
+       for_each_btree_key_norestart(trans, iter, BTREE_ID_dirents,
                           SPOS(dir.inum, 0, snapshot), 0, k, ret) {
                if (k.k->p.inode > dir.inum)
                        break;
@@ -464,7 +464,7 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_dirents,
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_dirents,
                           SPOS(inum.inum, ctx->pos, snapshot), 0, k, ret) {
                if (k.k->p.inode > inum.inum)
                        break;
index 9d959b053defd6c23a43188325db20ad145c3ac3..58b2c96f450c9ba8a4787431665e5e42b7b9833f 100644 (file)
@@ -61,7 +61,7 @@ static int count_iters_for_insert(struct btree_trans *trans,
                struct btree_iter iter;
                struct bkey_s_c r_k;
 
-               for_each_btree_key(trans, iter,
+               for_each_btree_key_norestart(trans, iter,
                                   BTREE_ID_reflink, POS(0, idx + offset),
                                   BTREE_ITER_SLOTS, r_k, ret2) {
                        if (bkey_cmp(bkey_start_pos(r_k.k),
@@ -120,7 +120,7 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
 
        bch2_trans_copy_iter(&copy, iter);
 
-       for_each_btree_key_continue(copy, 0, k, ret) {
+       for_each_btree_key_continue_norestart(copy, 0, k, ret) {
                unsigned offset = 0;
 
                if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
index 079c20cbf10e4282420cf30672b7919907d76b2d..f4c97fc0e3d1080ac72e14b1788328573f51afb6 100644 (file)
@@ -1815,7 +1815,7 @@ retry:
        if (err)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
                           SPOS(inum.inum, offset, snapshot),
                           BTREE_ITER_SLOTS, k, err) {
                if (bkey_cmp(bkey_start_pos(k.k), POS(inum.inum, end)) >= 0)
@@ -2208,7 +2208,7 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents, start, 0, k, ret) {
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents, start, 0, k, ret) {
                if (bkey_cmp(bkey_start_pos(k.k), end) >= 0)
                        break;
 
@@ -3111,7 +3111,7 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
                           SPOS(inode->v.i_ino, offset >> 9, snapshot), 0, k, ret) {
                if (k.k->p.inode != inode->v.i_ino) {
                        break;
@@ -3218,7 +3218,7 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_extents,
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_extents,
                           SPOS(inode->v.i_ino, offset >> 9, snapshot),
                           BTREE_ITER_SLOTS, k, ret) {
                if (k.k->p.inode != inode->v.i_ino) {
index ea2adcc213d08f577a565be2ff9fb8d697b2689e..772fdeb722c7c2c14348b300e46c19b74a4b2647 100644 (file)
@@ -216,7 +216,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
 
        bch2_trans_copy_iter(&iter, extent_iter);
 
-       for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, old, ret) {
+       for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, old, ret) {
                s64 sectors = min(new->k.p.offset, old.k->p.offset) -
                        max(bkey_start_offset(&new->k),
                            bkey_start_offset(old.k));
index 2827d0ef10195dd36a45ac87dca7522ebd5dab9b..8e66e6390e6249221f7de700706c41cd8df8b397 100644 (file)
@@ -131,7 +131,7 @@ static int bch2_make_extent_indirect(struct btree_trans *trans,
        if (orig->k.type == KEY_TYPE_inline_data)
                bch2_check_set_feature(c, BCH_FEATURE_reflink_inline_data);
 
-       for_each_btree_key(trans, reflink_iter, BTREE_ID_reflink,
+       for_each_btree_key_norestart(trans, reflink_iter, BTREE_ID_reflink,
                           POS(0, c->reflink_hint),
                           BTREE_ITER_INTENT|BTREE_ITER_SLOTS, k, ret) {
                if (reflink_iter.pos.inode) {
@@ -194,7 +194,7 @@ static struct bkey_s_c get_next_src(struct btree_iter *iter, struct bpos end)
        struct bkey_s_c k;
        int ret;
 
-       for_each_btree_key_continue(*iter, 0, k, ret) {
+       for_each_btree_key_continue_norestart(*iter, 0, k, ret) {
                if (bkey_cmp(iter->pos, end) >= 0)
                        break;
 
index 6486e709b700de4f8160334ad54e0b08f7a871d7..3e54d0b0fb5c603aa352e989a5c44def1437085e 100644 (file)
@@ -156,7 +156,7 @@ bch2_hash_lookup(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key(trans, *iter, desc.btree_id,
+       for_each_btree_key_norestart(trans, *iter, desc.btree_id,
                           SPOS(inum.inum, desc.hash_key(info, key), snapshot),
                           BTREE_ITER_SLOTS|flags, k, ret) {
                if (iter->pos.inode != inum.inum)
@@ -192,7 +192,7 @@ bch2_hash_hole(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key(trans, *iter, desc.btree_id,
+       for_each_btree_key_norestart(trans, *iter, desc.btree_id,
                           SPOS(inum.inum, desc.hash_key(info, key), snapshot),
                           BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
                if (iter->pos.inode != inum.inum)
@@ -220,7 +220,7 @@ int bch2_hash_needs_whiteout(struct btree_trans *trans,
 
        bch2_btree_iter_advance(&iter);
 
-       for_each_btree_key_continue(iter, BTREE_ITER_SLOTS, k, ret) {
+       for_each_btree_key_continue_norestart(iter, BTREE_ITER_SLOTS, k, ret) {
                if (k.k->type != desc.key_type &&
                    k.k->type != KEY_TYPE_hash_whiteout)
                        break;
@@ -253,7 +253,7 @@ int bch2_hash_set(struct btree_trans *trans,
        if (ret)
                return ret;
 
-       for_each_btree_key(trans, iter, desc.btree_id,
+       for_each_btree_key_norestart(trans, iter, desc.btree_id,
                           SPOS(inum.inum,
                                desc.hash_bkey(info, bkey_i_to_s_c(insert)),
                                snapshot),
index dcd2f6a91a7269bc326acb3738dd09a2ccf389ce..181af89b0553c538ef5d226cfa644e075a8dd24e 100644 (file)
@@ -295,7 +295,7 @@ retry:
        if (ret)
                goto err;
 
-       for_each_btree_key(&trans, iter, BTREE_ID_xattrs,
+       for_each_btree_key_norestart(&trans, iter, BTREE_ID_xattrs,
                           SPOS(inum, offset, snapshot), 0, k, ret) {
                BUG_ON(k.k->p.inode < inum);