bcachefs: BTREE_ITER_WITH_KEY_CACHE
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 7 Feb 2022 04:15:12 +0000 (23:15 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:23 +0000 (17:09 -0400)
This is the start of cache coherency with the btree key cache - this
adds a btree iterator flag that causes lookups to also check the key
cache when we're iterating over the btree (not iterating over the key
cache).

Note that we could still race with another thread creating at item in
the key cache and updating it, since we aren't holding the key cache
locked if it wasn't found. The next patch for the update path will
address this by causing the transaction to restart if the key cache is
found to be dirty.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_iter.h
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_types.h

index 200108c0c778a87679456ab696f63e32ccf03889..ff98024e76fc772413a8516134cf76af14c425ab 100644 (file)
@@ -1964,13 +1964,13 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
 
        struct bkey_s_c k;
 
-       BUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
-
        if (!path->cached) {
                struct btree_path_level *l = path_l(path);
-               struct bkey_packed *_k =
-                       bch2_btree_node_iter_peek_all(&l->iter, l->b);
+               struct bkey_packed *_k;
+
+               EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
 
+               _k = bch2_btree_node_iter_peek_all(&l->iter, l->b);
                k = _k ? bkey_disassemble(l->b, _k, u) : bkey_s_c_null;
 
                EBUG_ON(k.k && bkey_deleted(k.k) && bpos_cmp(k.k->p, path->pos) == 0);
@@ -1980,12 +1980,15 @@ inline struct bkey_s_c bch2_btree_path_peek_slot(struct btree_path *path, struct
        } else {
                struct bkey_cached *ck = (void *) path->l[0].b;
 
-               EBUG_ON(path->btree_id != ck->key.btree_id ||
-                       bkey_cmp(path->pos, ck->key.pos));
+               EBUG_ON(ck &&
+                       (path->btree_id != ck->key.btree_id ||
+                        bkey_cmp(path->pos, ck->key.pos)));
 
-               /* BTREE_ITER_CACHED_NOFILL? */
-               if (unlikely(!ck->valid))
-                       goto hole;
+               /* BTREE_ITER_CACHED_NOFILL|BTREE_ITER_CACHED_NOCREATE? */
+               if (unlikely(!ck || !ck->valid))
+                       return bkey_s_c_null;
+
+               EBUG_ON(path->uptodate != BTREE_ITER_UPTODATE);
 
                *u = ck->k->k;
                k = bkey_i_to_s_c(ck->k);
@@ -2233,11 +2236,43 @@ struct bkey_s_c btree_trans_peek_journal(struct btree_trans *trans,
        return k;
 }
 
+/*
+ * Checks btree key cache for key at iter->pos and returns it if present, or
+ * bkey_s_c_null:
+ */
+static noinline
+struct bkey_s_c btree_trans_peek_key_cache(struct btree_iter *iter, struct bpos pos)
+{
+       struct btree_trans *trans = iter->trans;
+       struct bch_fs *c = trans->c;
+       struct bkey u;
+       int ret;
+
+       if (!bch2_btree_key_cache_find(c, iter->btree_id, pos))
+               return bkey_s_c_null;
+
+       if (!iter->key_cache_path)
+               iter->key_cache_path = bch2_path_get(trans, iter->btree_id, pos,
+                                                    iter->flags & BTREE_ITER_INTENT, 0,
+                                                    iter->flags|BTREE_ITER_CACHED);
+
+       iter->key_cache_path = bch2_btree_path_set_pos(trans, iter->key_cache_path, pos,
+                                       iter->flags & BTREE_ITER_INTENT);
+
+       ret = bch2_btree_path_traverse(trans, iter->key_cache_path, iter->flags|BTREE_ITER_CACHED);
+       if (unlikely(ret))
+               return bkey_s_c_err(ret);
+
+       iter->key_cache_path->should_be_locked = true;
+
+       return bch2_btree_path_peek_slot(iter->key_cache_path, &u);
+}
+
 static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bpos search_key)
 {
        struct btree_trans *trans = iter->trans;
        struct bkey_i *next_update;
-       struct bkey_s_c k;
+       struct bkey_s_c k, k2;
        int ret;
 
        EBUG_ON(iter->path->cached || iter->path->level);
@@ -2255,8 +2290,24 @@ static struct bkey_s_c __bch2_btree_iter_peek(struct btree_iter *iter, struct bp
                        goto out;
                }
 
+               iter->path->should_be_locked = true;
+
                k = btree_path_level_peek_all(trans->c, &iter->path->l[0], &iter->k);
 
+               if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
+                   k.k &&
+                   (k2 = btree_trans_peek_key_cache(iter, k.k->p)).k) {
+                       ret = bkey_err(k2);
+                       if (ret) {
+                               k = k2;
+                               bch2_btree_iter_set_pos(iter, iter->pos);
+                               goto out;
+                       }
+
+                       k = k2;
+                       iter->k = *k.k;
+               }
+
                if (unlikely(iter->flags & BTREE_ITER_WITH_JOURNAL))
                        k = btree_trans_peek_journal(trans, iter, k);
 
@@ -2603,6 +2654,13 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
                    (k = btree_trans_peek_slot_journal(trans, iter)).k)
                        goto out;
 
+               if (unlikely(iter->flags & BTREE_ITER_WITH_KEY_CACHE) &&
+                   (k = btree_trans_peek_key_cache(iter, iter->pos)).k) {
+                       if (!bkey_err(k))
+                               iter->k = *k.k;
+                       goto out;
+               }
+
                k = bch2_btree_path_peek_slot(iter->path, &iter->k);
        } else {
                struct bpos next;
@@ -2806,8 +2864,12 @@ void bch2_trans_iter_exit(struct btree_trans *trans, struct btree_iter *iter)
        if (iter->update_path)
                bch2_path_put(trans, iter->update_path,
                              iter->flags & BTREE_ITER_INTENT);
+       if (iter->key_cache_path)
+               bch2_path_put(trans, iter->key_cache_path,
+                             iter->flags & BTREE_ITER_INTENT);
        iter->path = NULL;
        iter->update_path = NULL;
+       iter->key_cache_path = NULL;
 }
 
 static void __bch2_trans_iter_init(struct btree_trans *trans,
@@ -2834,12 +2896,16 @@ static void __bch2_trans_iter_init(struct btree_trans *trans,
        if (trans->journal_replay_not_finished)
                flags |= BTREE_ITER_WITH_JOURNAL;
 
-       if (!btree_id_cached(trans->c, btree_id))
+       if (!btree_id_cached(trans->c, btree_id)) {
                flags &= ~BTREE_ITER_CACHED;
+               flags &= ~BTREE_ITER_WITH_KEY_CACHE;
+       } else if (!(flags & BTREE_ITER_CACHED))
+               flags |= BTREE_ITER_WITH_KEY_CACHE;
 
        iter->trans     = trans;
        iter->path      = NULL;
        iter->update_path = NULL;
+       iter->key_cache_path = NULL;
        iter->btree_id  = btree_id;
        iter->min_depth = depth;
        iter->flags     = flags;
@@ -2887,6 +2953,7 @@ void bch2_trans_copy_iter(struct btree_iter *dst, struct btree_iter *src)
                __btree_path_get(src->path, src->flags & BTREE_ITER_INTENT);
        if (src->update_path)
                __btree_path_get(src->update_path, src->flags & BTREE_ITER_INTENT);
+       dst->key_cache_path = NULL;
 }
 
 void *bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
index 3f8aaccc52086ffaddc4d6ab8e5671ca79ce9fc8..1e3172a2885ae03013524d072a301b85b551db7d 100644 (file)
@@ -50,11 +50,6 @@ static inline struct btree *btree_node_parent(struct btree_path *path,
        return btree_path_node(path, b->c.level + 1);
 }
 
-static inline int btree_iter_err(const struct btree_iter *iter)
-{
-       return iter->flags & BTREE_ITER_ERROR ? -EIO : 0;
-}
-
 /* Iterate over paths within a transaction: */
 
 void __bch2_btree_trans_sort_paths(struct btree_trans *);
index ba50cad14757192ce0a611d3d408b2caa4a8d694..29d46d0aa5d3d474258b16b4bafbd6df16e684a9 100644 (file)
@@ -209,19 +209,20 @@ static int btree_key_cache_fill(struct btree_trans *trans,
                                struct btree_path *ck_path,
                                struct bkey_cached *ck)
 {
-       struct btree_iter iter;
+       struct btree_path *path;
        struct bkey_s_c k;
        unsigned new_u64s = 0;
        struct bkey_i *new_k = NULL;
+       struct bkey u;
        int ret;
 
-       bch2_trans_iter_init(trans, &iter, ck->key.btree_id,
-                            ck->key.pos, BTREE_ITER_SLOTS);
-       k = bch2_btree_iter_peek_slot(&iter);
-       ret = bkey_err(k);
+       path = bch2_path_get(trans, ck->key.btree_id, ck->key.pos, 0, 0, 0);
+       ret = bch2_btree_path_traverse(trans, path, 0);
        if (ret)
                goto err;
 
+       k = bch2_btree_path_peek_slot(path, &u);
+
        if (!bch2_btree_node_relock(trans, ck_path, 0)) {
                trace_trans_restart_relock_key_cache_fill(trans->fn,
                                _THIS_IP_, ck_path->btree_id, &ck_path->pos);
@@ -262,9 +263,9 @@ static int btree_key_cache_fill(struct btree_trans *trans,
        bch2_btree_node_unlock_write(trans, ck_path, ck_path->l[0].b);
 
        /* We're not likely to need this iterator again: */
-       set_btree_iter_dontneed(&iter);
+       path->preserve = false;
 err:
-       bch2_trans_iter_exit(trans, &iter);
+       bch2_path_put(trans, path, 0);
        return ret;
 }
 
@@ -385,6 +386,8 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
                             BTREE_ITER_CACHED_NOFILL|
                             BTREE_ITER_CACHED_NOCREATE|
                             BTREE_ITER_INTENT);
+       b_iter.flags &= ~BTREE_ITER_WITH_KEY_CACHE;
+
        ret = bch2_btree_iter_traverse(&c_iter);
        if (ret)
                goto out;
index 587307ff5321f70e10d220ba3a14b1c65c0ccce0..6db2ac49ee3fa12394fcc0a42172157ff35fcaf8 100644 (file)
@@ -202,10 +202,10 @@ struct btree_node_iter {
  */
 #define BTREE_ITER_IS_EXTENTS          (1 << 4)
 #define BTREE_ITER_NOT_EXTENTS         (1 << 5)
-#define BTREE_ITER_ERROR               (1 << 6)
-#define BTREE_ITER_CACHED              (1 << 7)
-#define BTREE_ITER_CACHED_NOFILL       (1 << 8)
-#define BTREE_ITER_CACHED_NOCREATE     (1 << 9)
+#define BTREE_ITER_CACHED              (1 << 6)
+#define BTREE_ITER_CACHED_NOFILL       (1 << 7)
+#define BTREE_ITER_CACHED_NOCREATE     (1 << 8)
+#define BTREE_ITER_WITH_KEY_CACHE      (1 << 9)
 #define BTREE_ITER_WITH_UPDATES                (1 << 10)
 #define BTREE_ITER_WITH_JOURNAL                (1 << 11)
 #define __BTREE_ITER_ALL_SNAPSHOTS     (1 << 12)
@@ -277,6 +277,7 @@ struct btree_iter {
        struct btree_trans      *trans;
        struct btree_path       *path;
        struct btree_path       *update_path;
+       struct btree_path       *key_cache_path;
 
        enum btree_id           btree_id:4;
        unsigned                min_depth:4;