From b0babf2a34233c651060e54b68fa3cd0b9e7a6e7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 12 Apr 2022 18:04:08 -0400 Subject: [PATCH] bcachefs: bch2_btree_iter_peek_all_levels() This adds bch2_btree_iter_peek_all_levels(), which returns keys from every level of the btree - interior nodes included - in monotonically increasing order, soon to be used by the backpointers check & repair code. - BTREE_ITER_ALL_LEVELS can now be passed to for_each_btree_key() to iterate thusly, much like BTREE_ITER_SLOTS - The existing algorithm in bch2_btree_iter_advance() doesn't work with peek_all_levels(): we have to defer the actual advancing until the next time we call peek, where we have the btree path traversed and uptodate. So, we add an advanced bit to btree_iter; when BTREE_ITER_ALL_LEVELS is set bch2_btree_iter_advanced() just marks the iterator as advanced. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_iter.c | 123 +++++++++++++++++++++++++++++++++++--- fs/bcachefs/btree_iter.h | 8 ++- fs/bcachefs/btree_types.h | 15 ++--- 3 files changed, 125 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index 74d41fe5c074d..cd85c3ad2ab74 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2181,15 +2181,23 @@ err: inline bool bch2_btree_iter_advance(struct btree_iter *iter) { - struct bpos pos = iter->k.p; - bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS - ? bpos_cmp(pos, SPOS_MAX) - : bkey_cmp(pos, SPOS_MAX)) != 0; + if (likely(!(iter->flags & BTREE_ITER_ALL_LEVELS))) { + struct bpos pos = iter->k.p; + bool ret = (iter->flags & BTREE_ITER_ALL_SNAPSHOTS + ? bpos_cmp(pos, SPOS_MAX) + : bkey_cmp(pos, SPOS_MAX)) != 0; + + if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) + pos = bkey_successor(iter, pos); + bch2_btree_iter_set_pos(iter, pos); + return ret; + } else { + if (!btree_path_node(iter->path, iter->path->level)) + return true; - if (ret && !(iter->flags & BTREE_ITER_IS_EXTENTS)) - pos = bkey_successor(iter, pos); - bch2_btree_iter_set_pos(iter, pos); - return ret; + iter->advanced = true; + return false; + } } inline bool bch2_btree_iter_rewind(struct btree_iter *iter) @@ -2396,6 +2404,8 @@ struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *iter, struct bpos e struct bpos iter_pos; int ret; + EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); + if (iter->update_path) { bch2_path_put(trans, iter->update_path, iter->flags & BTREE_ITER_INTENT); @@ -2510,6 +2520,99 @@ out: return k; } +/** + * bch2_btree_iter_peek_all_levels: returns the first key greater than or equal + * to iterator's current position, returning keys from every level of the btree. + * For keys at different levels of the btree that compare equal, the key from + * the lower level (leaf) is returned first. + */ +struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *iter) +{ + struct btree_trans *trans = iter->trans; + struct bkey_s_c k; + int ret; + + EBUG_ON(iter->path->cached); + bch2_btree_iter_verify(iter); + BUG_ON(iter->path->level < iter->min_depth); + BUG_ON(!(iter->flags & BTREE_ITER_ALL_SNAPSHOTS)); + EBUG_ON(!(iter->flags & BTREE_ITER_ALL_LEVELS)); + + while (1) { + iter->path = bch2_btree_path_set_pos(trans, iter->path, iter->pos, + iter->flags & BTREE_ITER_INTENT); + + ret = bch2_btree_path_traverse(trans, iter->path, iter->flags); + if (unlikely(ret)) { + /* ensure that iter->k is consistent with iter->pos: */ + bch2_btree_iter_set_pos(iter, iter->pos); + k = bkey_s_c_err(ret); + goto out; + } + + /* Already at end? */ + if (!btree_path_node(iter->path, iter->path->level)) { + k = bkey_s_c_null; + goto out; + } + + k = btree_path_level_peek_all(trans->c, + &iter->path->l[iter->path->level], &iter->k); + + /* Check if we should go up to the parent node: */ + if (!k.k || + (iter->advanced && + !bpos_cmp(path_l(iter->path)->b->key.k.p, iter->pos))) { + iter->pos = path_l(iter->path)->b->key.k.p; + btree_path_set_level_up(iter->path); + iter->advanced = false; + continue; + } + + /* + * Check if we should go back down to a leaf: + * If we're not in a leaf node, we only return the current key + * if it exactly matches iter->pos - otherwise we first have to + * go back to the leaf: + */ + if (iter->path->level != iter->min_depth && + (iter->advanced || + !k.k || + bpos_cmp(iter->pos, k.k->p))) { + btree_path_set_level_down(trans, iter->path, iter->min_depth); + iter->pos = bpos_successor(iter->pos); + iter->advanced = false; + continue; + } + + /* Check if we should go to the next key: */ + if (iter->path->level == iter->min_depth && + iter->advanced && + k.k && + !bpos_cmp(iter->pos, k.k->p)) { + iter->pos = bpos_successor(iter->pos); + iter->advanced = false; + continue; + } + + if (iter->advanced && + iter->path->level == iter->min_depth && + bpos_cmp(k.k->p, iter->pos)) + iter->advanced = false; + + BUG_ON(iter->advanced); + BUG_ON(!k.k); + break; + } + + iter->pos = k.k->p; +out: + iter->path->should_be_locked = true; + bch2_btree_iter_verify(iter); + + return k; +} + /** * bch2_btree_iter_next: returns first key greater than iterator's current * position @@ -2665,6 +2768,7 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter) bch2_btree_iter_verify(iter); bch2_btree_iter_verify_entry_exit(iter); + EBUG_ON(iter->flags & BTREE_ITER_ALL_LEVELS); EBUG_ON(iter->path->level && (iter->flags & BTREE_ITER_WITH_KEY_CACHE)); /* extents can't span inode numbers: */ @@ -2935,6 +3039,9 @@ static void __bch2_trans_iter_init(struct btree_trans *trans, { EBUG_ON(trans->restarted); + if (flags & BTREE_ITER_ALL_LEVELS) + flags |= BTREE_ITER_ALL_SNAPSHOTS|__BTREE_ITER_ALL_SNAPSHOTS; + if (!(flags & (BTREE_ITER_ALL_SNAPSHOTS|BTREE_ITER_NOT_EXTENTS)) && btree_node_type_is_extents(btree_id)) flags |= BTREE_ITER_IS_EXTENTS; diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h index 29c1df83b35eb..dc6f07492bc9a 100644 --- a/fs/bcachefs/btree_iter.h +++ b/fs/bcachefs/btree_iter.h @@ -249,6 +249,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *); struct bkey_s_c bch2_btree_iter_peek_upto(struct btree_iter *, struct bpos); struct bkey_s_c bch2_btree_iter_next(struct btree_iter *); +struct bkey_s_c bch2_btree_iter_peek_all_levels(struct btree_iter *); + static inline struct bkey_s_c bch2_btree_iter_peek(struct btree_iter *iter) { return bch2_btree_iter_peek_upto(iter, SPOS_MAX); @@ -350,9 +352,9 @@ static inline int bkey_err(struct bkey_s_c k) static inline struct bkey_s_c bch2_btree_iter_peek_type(struct btree_iter *iter, unsigned flags) { - return flags & BTREE_ITER_SLOTS - ? bch2_btree_iter_peek_slot(iter) - : bch2_btree_iter_peek(iter); + return flags & BTREE_ITER_ALL_LEVELS ? bch2_btree_iter_peek_all_levels(iter) : + flags & BTREE_ITER_SLOTS ? bch2_btree_iter_peek_slot(iter) : + bch2_btree_iter_peek(iter); } static inline struct bkey_s_c bch2_btree_iter_peek_upto_type(struct btree_iter *iter, diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index a475b1a9467a2..4f359ff79334f 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -182,22 +182,16 @@ struct btree_node_iter { * Iterate over all possible positions, synthesizing deleted keys for holes: */ #define BTREE_ITER_SLOTS (1 << 0) +#define BTREE_ITER_ALL_LEVELS (1 << 1) /* * Indicates that intent locks should be taken on leaf nodes, because we expect * to be doing updates: */ -#define BTREE_ITER_INTENT (1 << 1) +#define BTREE_ITER_INTENT (1 << 2) /* * Causes the btree iterator code to prefetch additional btree nodes from disk: */ -#define BTREE_ITER_PREFETCH (1 << 2) -/* - * Indicates that this iterator should not be reused until transaction commit, - * either because a pending update references it or because the update depends - * on that particular key being locked (e.g. by the str_hash code, for hash - * table consistency) - */ -#define BTREE_ITER_KEEP_UNTIL_COMMIT (1 << 3) +#define BTREE_ITER_PREFETCH (1 << 3) /* * Used in bch2_btree_iter_traverse(), to indicate whether we're searching for * @pos or the first key strictly greater than @pos @@ -282,7 +276,8 @@ struct btree_iter { struct btree_path *key_cache_path; enum btree_id btree_id:4; - unsigned min_depth:4; + unsigned min_depth:3; + unsigned advanced:1; /* btree_iter_copy starts here: */ u16 flags; -- 2.30.2