bcachefs: bch2_btree_iter_peek_all_levels()
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 12 Apr 2022 22:04:08 +0000 (18:04 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:32 +0000 (17:09 -0400)
This adds bch2_btree_iter_peek_all_levels(), which returns keys from
every level of the btree - interior nodes included - in monotonically
increasing order, soon to be used by the backpointers check & repair
code.

 - BTREE_ITER_ALL_LEVELS can now be passed to for_each_btree_key() to
   iterate thusly, much like BTREE_ITER_SLOTS

 - The existing algorithm in bch2_btree_iter_advance() doesn't work with
   peek_all_levels(): we have to defer the actual advancing until the
   next time we call peek, where we have the btree path traversed and
   uptodate. So, we add an advanced bit to btree_iter; when
   BTREE_ITER_ALL_LEVELS is set bch2_btree_iter_advanced() just marks
   the iterator as advanced.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_types.h

index 74d41fe5c074d16a84d0c786fe13f544e4bca212..cd85c3ad2ab74a306ff53034f4ef54123f45139f 100644 (file)
@@ -2181,15 +2181,23 @@ err:
 
 inline bool bch2_btree_iter_advance(struct btree_iter *iter)
 {
-       struct bpos pos = iter->k.p;
-       bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
-                   ? bpos_cmp(pos, SPOS_MAX)
-                   : bkey_cmp(pos, SPOS_MAX)) != 0;
+       if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) {
+               struct bpos pos = iter->k.p;
+               bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS
+                           ? bpos_cmp(pos, SPOS_MAX)
+                           : bkey_cmp(pos, SPOS_MAX)) != 0;
+
+               if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
+                       pos = bkey_successor(iter, pos);
+               bch2_btree_iter_set_pos(iter, pos);
+               return ret;
+       } else {
+               if (!btree_path_node(iter->path, iter->path->level))
+                       return true;
 
-       if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS))
-               pos = bkey_successor(iter, pos);
-       bch2_btree_iter_set_pos(iter, pos);
-       return ret;
+               iter->advanced = true;
+               return false;
+       }
 }
 
 inline bool bch2_btree_iter_rewind(struct btree_iter *iter)
@@ -2396,6 +2404,8 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e
        struct bpos iter_pos;
        int ret;
 
+       EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
+
        if (iter->update_path) {
                bch2_path_put(trans, iter->update_path,
                              iter->flags & BTREE_ITER_INTENT);
@@ -2510,6 +2520,99 @@ out:
        return k;
 }
 
+/**
+ * bch2_btree_iter_peek_all_levels: returns the first key greater than or equal
+ * to iterator's current position, returning keys from every level of the btree.
+ * For keys at different levels of the btree that compare equal, the key from
+ * the lower level (leaf) is returned first.
+ */
+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter)
+{
+       struct btree_trans *trans = iter->trans;
+       struct bkey_s_c k;
+       int ret;
+
+       EBUG_ON(iter->path->cached);
+       bch2_btree_iter_verify(iter);
+       BUG_ON(iter->path->level < iter->min_depth);
+       BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS));
+       EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS));
+
+       while (1) {
+               iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos,
+                                       iter->flags & BTREE_ITER_INTENT);
+
+               ret = bch2_btree_path_traverse(trans, iter->path, iter->flags);
+               if (unlikely(ret)) {
+                       /* ensure that iter->k is consistent with iter->pos: */
+                       bch2_btree_iter_set_pos(iter, iter->pos);
+                       k = bkey_s_c_err(ret);
+                       goto out;
+               }
+
+               /* Already at end? */
+               if (!btree_path_node(iter->path, iter->path->level)) {
+                       k = bkey_s_c_null;
+                       goto out;
+               }
+
+               k = btree_path_level_peek_all(trans->c,
+                               &iter->path->l[iter->path->level], &iter->k);
+
+               /* Check if we should go up to the parent node: */
+               if (!k.k ||
+                   (iter->advanced &&
+                    !bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) {
+                       iter->pos = path_l(iter->path)->b->key.k.p;
+                       btree_path_set_level_up(iter->path);
+                       iter->advanced = false;
+                       continue;
+               }
+
+               /*
+                * Check if we should go back down to a leaf:
+                * If we're not in a leaf node, we only return the current key
+                * if it exactly matches iter->pos - otherwise we first have to
+                * go back to the leaf:
+                */
+               if (iter->path->level != iter->min_depth &&
+                   (iter->advanced ||
+                    !k.k ||
+                    bpos_cmp(iter->pos, k.k->p))) {
+                       btree_path_set_level_down(trans, iter->path, iter->min_depth);
+                       iter->pos = bpos_successor(iter->pos);
+                       iter->advanced = false;
+                       continue;
+               }
+
+               /* Check if we should go to the next key: */
+               if (iter->path->level == iter->min_depth &&
+                   iter->advanced &&
+                   k.k &&
+                   !bpos_cmp(iter->pos, k.k->p)) {
+                       iter->pos = bpos_successor(iter->pos);
+                       iter->advanced = false;
+                       continue;
+               }
+
+               if (iter->advanced &&
+                   iter->path->level == iter->min_depth &&
+                   bpos_cmp(k.k->p, iter->pos))
+                       iter->advanced = false;
+
+               BUG_ON(iter->advanced);
+               BUG_ON(!k.k);
+               break;
+       }
+
+       iter->pos = k.k->p;
+out:
+       iter->path->should_be_locked = true;
+       bch2_btree_iter_verify(iter);
+
+       return k;
+}
+
 /**
  * bch2_btree_iter_next: returns first key greater than iterator's current
  * position
@@ -2665,6 +2768,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
 
        bch2_btree_iter_verify(iter);
        bch2_btree_iter_verify_entry_exit(iter);
+       EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS);
        EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE));
 
        /* extents can't span inode numbers: */
@@ -2935,6 +3039,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
 {
        EBUG_ON(trans->restarted);
 
+       if (flags & BTREE_ITER_ALL_LEVELS)
+               flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS;
+
        if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) &&
            btree_node_type_is_extents(btree_id))
                flags |= BTREE_ITER_IS_EXTENTS;
index 29c1df83b35ebffa57b8117c841a5cddf4a79fd3..dc6f07492bc9a8711f5e69bd41a819dd70aefc74 100644 (file)
@@ -249,6 +249,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *);
 struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos);
 struct bkey_s_c bch2_btree_iter_next(struct btree_iter *);
 
+struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *);
+
 static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter)
 {
        return bch2_btree_iter_peek_upto(iter, SPOS_MAX);
@@ -350,9 +352,9 @@ static inline int bkey_err(struct bkey_s_c k)
 static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter,
                                                        unsigned flags)
 {
-       return flags & BTREE_ITER_SLOTS
-               ? bch2_btree_iter_peek_slot(iter)
-               : bch2_btree_iter_peek(iter);
+       return  flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) :
+               flags & BTREE_ITER_SLOTS      ? bch2_btree_iter_peek_slot(iter) :
+                                               bch2_btree_iter_peek(iter);
 }
 
 static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter,
index a475b1a9467a2bb063bee7b676f34701ff10be0e..4f359ff79334ffbccb03b7bb3a253fb6b64a8622 100644 (file)
@@ -182,22 +182,16 @@ struct btree_node_iter {
  * Iterate over all possible positions, synthesizing deleted keys for holes:
  */
 #define BTREE_ITER_SLOTS               (1 << 0)
+#define BTREE_ITER_ALL_LEVELS          (1 << 1)
 /*
  * Indicates that intent locks should be taken on leaf nodes, because we expect
  * to be doing updates:
  */
-#define BTREE_ITER_INTENT              (1 << 1)
+#define BTREE_ITER_INTENT              (1 << 2)
 /*
  * Causes the btree iterator code to prefetch additional btree nodes from disk:
  */
-#define BTREE_ITER_PREFETCH            (1 << 2)
-/*
- * Indicates that this iterator should not be reused until transaction commit,
- * either because a pending update references it or because the update depends
- * on that particular key being locked (e.g. by the str_hash code, for hash
- * table consistency)
- */
-#define BTREE_ITER_KEEP_UNTIL_COMMIT   (1 << 3)
+#define BTREE_ITER_PREFETCH            (1 << 3)
 /*
  * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for
  * @pos or the first key strictly greater than @pos
@@ -282,7 +276,8 @@ struct btree_iter {
        struct btree_path       *key_cache_path;
 
        enum btree_id           btree_id:4;
-       unsigned                min_depth:4;
+       unsigned                min_depth:3;
+       unsigned                advanced:1;
 
        /* btree_iter_copy starts here: */
        u16                     flags;