From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Sun, 22 Sep 2019 23:10:21 +0000 (-0400)
Subject: bcachefs: Use cached iterators for inode updates
X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=8cad3e2f73f5c6ad39e9da5564382a2a737a201c;p=linux.git

bcachefs: Use cached iterators for inode updates

This switches inode updates to use cached btree iterators - which should
be a nice performance boost, since lock contention on the inodes btree
can be a bottleneck on multithreaded workloads.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---

diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 1be01035869f3..52b657030755b 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -28,8 +28,8 @@ static const struct rhashtable_params bch2_btree_key_cache_params = {
 };
 
 __flatten
-static inline struct bkey_cached *
-btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
+inline struct bkey_cached *
+bch2_btree_key_cache_find(struct bch_fs *c, enum btree_id btree_id, struct bpos pos)
 {
 	struct bkey_cached_key key = {
 		.btree_id	= btree_id,
@@ -218,7 +218,7 @@ int bch2_btree_iter_traverse_cached(struct btree_iter *iter)
 		goto fill;
 	}
 retry:
-	ck = btree_key_cache_find(c, iter->btree_id, iter->pos);
+	ck = bch2_btree_key_cache_find(c, iter->btree_id, iter->pos);
 	if (!ck) {
 		if (iter->flags & BTREE_ITER_CACHED_NOCREATE) {
 			iter->l[0].b = NULL;
@@ -415,7 +415,7 @@ int bch2_btree_key_cache_flush(struct btree_trans *trans,
 	struct bkey_cached_key key = { id, pos };
 
 	/* Fastpath - assume it won't be found: */
-	if (!btree_key_cache_find(c, id, pos))
+	if (!bch2_btree_key_cache_find(c, id, pos))
 		return 0;
 
 	return btree_key_cache_flush_pos(trans, key, 0, true);
@@ -462,7 +462,7 @@ bool bch2_btree_insert_key_cached(struct btree_trans *trans,
 void bch2_btree_key_cache_verify_clean(struct btree_trans *trans,
 			       enum btree_id id, struct bpos pos)
 {
-	BUG_ON(btree_key_cache_find(trans->c, id, pos));
+	BUG_ON(bch2_btree_key_cache_find(trans->c, id, pos));
 }
 #endif
 
diff --git a/fs/bcachefs/btree_key_cache.h b/fs/bcachefs/btree_key_cache.h
index b1756c6c622ce..d448264abcc89 100644
--- a/fs/bcachefs/btree_key_cache.h
+++ b/fs/bcachefs/btree_key_cache.h
@@ -1,6 +1,9 @@
 #ifndef _BCACHEFS_BTREE_KEY_CACHE_H
 #define _BCACHEFS_BTREE_KEY_CACHE_H
 
+struct bkey_cached *
+bch2_btree_key_cache_find(struct bch_fs *, enum btree_id, struct bpos);
+
 int bch2_btree_iter_traverse_cached(struct btree_iter *);
 
 bool bch2_btree_insert_key_cached(struct btree_trans *,
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 71670f415d66d..631c60bb2facf 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
+#include "btree_key_cache.h"
 #include "bkey_methods.h"
 #include "btree_update.h"
 #include "error.h"
@@ -189,11 +190,11 @@ struct btree_iter *bch2_inode_peek(struct btree_trans *trans,
 	int ret;
 
 	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES, POS(0, inum),
-				   BTREE_ITER_SLOTS|flags);
+				   BTREE_ITER_CACHED|flags);
 	if (IS_ERR(iter))
 		return iter;
 
-	k = bch2_btree_iter_peek_slot(iter);
+	k = bch2_btree_iter_peek_cached(iter);
 	ret = bkey_err(k);
 	if (ret)
 		goto err;
@@ -390,7 +391,17 @@ again:
 		if (bkey_cmp(iter->pos, POS(0, max)) > 0)
 			break;
 
-		if (k.k->type != KEY_TYPE_inode)
+		/*
+		 * There's a potential cache coherency issue with the btree key
+		 * cache code here - we're iterating over the btree, skipping
+		 * that cache. We should never see an empty slot that isn't
+		 * actually empty due to a pending update in the key cache
+		 * because the update that creates the inode isn't done with a
+		 * cached iterator, but - better safe than sorry, check the
+		 * cache before using a slot:
+		 */
+		if (k.k->type != KEY_TYPE_inode &&
+		    !bch2_btree_key_cache_find(trans->c, BTREE_ID_INODES, iter->pos))
 			goto found_slot;
 	}
 
@@ -424,6 +435,8 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 	struct bkey_i_inode_generation delete;
 	struct bpos start = POS(inode_nr, 0);
 	struct bpos end = POS(inode_nr + 1, 0);
+	struct bkey_s_c k;
+	u64 bi_generation;
 	int ret;
 
 	/*
@@ -444,51 +457,62 @@ int bch2_inode_rm(struct bch_fs *c, u64 inode_nr)
 		return ret;
 
 	bch2_trans_init(&trans, c, 0, 0);
+retry:
+	bch2_trans_begin(&trans);
+
+	bi_generation = 0;
+
+	ret = bch2_btree_key_cache_flush(&trans, BTREE_ID_INODES, POS(0, inode_nr));
+	if (ret) {
+		if (ret != -EINTR)
+			bch_err(c, "error flushing btree key cache: %i", ret);
+		goto err;
+	}
 
 	iter = bch2_trans_get_iter(&trans, BTREE_ID_INODES, POS(0, inode_nr),
 				   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
-	do {
-		struct bkey_s_c k = bch2_btree_iter_peek_slot(iter);
-		u32 bi_generation = 0;
+	k = bch2_btree_iter_peek_slot(iter);
 
-		ret = bkey_err(k);
-		if (ret)
-			break;
+	ret = bkey_err(k);
+	if (ret)
+		goto err;
 
-		bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
-					"inode %llu not found when deleting",
-					inode_nr);
+	bch2_fs_inconsistent_on(k.k->type != KEY_TYPE_inode, c,
+				"inode %llu not found when deleting",
+				inode_nr);
 
-		switch (k.k->type) {
-		case KEY_TYPE_inode: {
-			struct bch_inode_unpacked inode_u;
+	switch (k.k->type) {
+	case KEY_TYPE_inode: {
+		struct bch_inode_unpacked inode_u;
 
-			if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
-				bi_generation = inode_u.bi_generation + 1;
-			break;
-		}
-		case KEY_TYPE_inode_generation: {
-			struct bkey_s_c_inode_generation g =
-				bkey_s_c_to_inode_generation(k);
-			bi_generation = le32_to_cpu(g.v->bi_generation);
-			break;
-		}
-		}
+		if (!bch2_inode_unpack(bkey_s_c_to_inode(k), &inode_u))
+			bi_generation = inode_u.bi_generation + 1;
+		break;
+	}
+	case KEY_TYPE_inode_generation: {
+		struct bkey_s_c_inode_generation g =
+			bkey_s_c_to_inode_generation(k);
+		bi_generation = le32_to_cpu(g.v->bi_generation);
+		break;
+	}
+	}
 
-		if (!bi_generation) {
-			bkey_init(&delete.k);
-			delete.k.p.offset = inode_nr;
-		} else {
-			bkey_inode_generation_init(&delete.k_i);
-			delete.k.p.offset = inode_nr;
-			delete.v.bi_generation = cpu_to_le32(bi_generation);
-		}
+	if (!bi_generation) {
+		bkey_init(&delete.k);
+		delete.k.p.offset = inode_nr;
+	} else {
+		bkey_inode_generation_init(&delete.k_i);
+		delete.k.p.offset = inode_nr;
+		delete.v.bi_generation = cpu_to_le32(bi_generation);
+	}
 
-		bch2_trans_update(&trans, iter, &delete.k_i, 0);
+	bch2_trans_update(&trans, iter, &delete.k_i, 0);
 
-		ret = bch2_trans_commit(&trans, NULL, NULL,
-					BTREE_INSERT_NOFAIL);
-	} while (ret == -EINTR);
+	ret = bch2_trans_commit(&trans, NULL, NULL,
+				BTREE_INSERT_NOFAIL);
+err:
+	if (ret == -EINTR)
+		goto retry;
 
 	bch2_trans_exit(&trans);
 	return ret;
@@ -502,11 +526,11 @@ int bch2_inode_find_by_inum_trans(struct btree_trans *trans, u64 inode_nr,
 	int ret;
 
 	iter = bch2_trans_get_iter(trans, BTREE_ID_INODES,
-			POS(0, inode_nr), BTREE_ITER_SLOTS);
+			POS(0, inode_nr), BTREE_ITER_CACHED);
 	if (IS_ERR(iter))
 		return PTR_ERR(iter);
 
-	k = bch2_btree_iter_peek_slot(iter);
+	k = bch2_btree_iter_peek_cached(iter);
 	ret = bkey_err(k);
 	if (ret)
 		goto err;