From 495aabede3ff594c5eda98cb9f4463502cb48cad Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Tue, 2 Jun 2020 16:36:11 -0400
Subject: [PATCH] bcachefs: Add debug code to print btree transactions

Intented to help debug deadlocks, since we can't use lockdep to check
btree node lock ordering.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bcachefs.h      |  3 ++
 fs/bcachefs/btree_iter.c    | 62 ++++++++++++++++++++++++++++++++++++-
 fs/bcachefs/btree_iter.h    |  2 ++
 fs/bcachefs/btree_locking.h | 12 ++++++-
 fs/bcachefs/btree_types.h   |  4 +++
 fs/bcachefs/clock.c         |  2 +-
 fs/bcachefs/journal.c       |  4 +--
 fs/bcachefs/sysfs.c         |  8 +++++
 8 files changed, 92 insertions(+), 5 deletions(-)

diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index a900725088196..dd34f30f01e52 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -628,6 +628,9 @@ struct bch_fs {
 	struct workqueue_struct	*btree_interior_update_worker;
 	struct work_struct	btree_interior_update_work;
 
+	/* btree_iter.c: */
+	struct mutex		btree_trans_lock;
+	struct list_head	btree_trans_list;
 	mempool_t		btree_iters_pool;
 
 	struct workqueue_struct	*wq;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index ca775e63b4c64..f7de2def58df4 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -1912,7 +1912,7 @@ static struct btree_iter *btree_trans_iter_alloc(struct btree_trans *trans)
 			struct btree_iter *iter;
 
 			trans_for_each_iter(trans, iter) {
-				pr_err("iter: btree %s pos %llu:%llu%s%s%s %pf",
+				pr_err("iter: btree %s pos %llu:%llu%s%s%s %ps",
 				       bch2_btree_ids[iter->btree_id],
 				       iter->pos.inode,
 				       iter->pos.offset,
@@ -2193,12 +2193,24 @@ void bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
 
 	if (expected_mem_bytes)
 		bch2_trans_preload_mem(trans, expected_mem_bytes);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+	mutex_lock(&c->btree_trans_lock);
+	list_add(&trans->list, &c->btree_trans_list);
+	mutex_unlock(&c->btree_trans_lock);
+#endif
 }
 
 int bch2_trans_exit(struct btree_trans *trans)
 {
 	bch2_trans_unlock(trans);
 
+#ifdef CONFIG_BCACHEFS_DEBUG
+	mutex_lock(&trans->c->btree_trans_lock);
+	list_del(&trans->list);
+	mutex_unlock(&trans->c->btree_trans_lock);
+#endif
+
 	kfree(trans->fs_usage_deltas);
 	kfree(trans->mem);
 	if (trans->used_mempool)
@@ -2211,6 +2223,51 @@ int bch2_trans_exit(struct btree_trans *trans)
 	return trans->error ? -EIO : 0;
 }
 
+void bch2_btree_trans_to_text(struct printbuf *out, struct bch_fs *c)
+{
+#ifdef CONFIG_BCACHEFS_DEBUG
+	struct btree_trans *trans;
+	struct btree_iter *iter;
+	struct btree *b;
+	unsigned l;
+
+	mutex_lock(&c->btree_trans_lock);
+	list_for_each_entry(trans, &c->btree_trans_list, list) {
+		pr_buf(out, "%ps\n", (void *) trans->ip);
+
+		trans_for_each_iter(trans, iter) {
+			if (!iter->nodes_locked)
+				continue;
+
+			pr_buf(out, "  iter %s:", bch2_btree_ids[iter->btree_id]);
+			bch2_bpos_to_text(out, iter->pos);
+			pr_buf(out, "\n");
+
+			for (l = 0; l < BTREE_MAX_DEPTH; l++) {
+				if (btree_node_locked(iter, l)) {
+					b = iter->l[l].b;
+
+					pr_buf(out, "    %p l=%u %s ",
+					       b, l, btree_node_intent_locked(iter, l) ? "i" : "r");
+					bch2_bpos_to_text(out, b->key.k.p);
+					pr_buf(out, "\n");
+				}
+			}
+		}
+
+		b = READ_ONCE(trans->locking);
+		if (b) {
+			pr_buf(out, "  locking %px l=%u %s:",
+			       b, b->c.level,
+			       bch2_btree_ids[b->c.btree_id]);
+			bch2_bpos_to_text(out, b->key.k.p);
+			pr_buf(out, "\n");
+		}
+	}
+	mutex_unlock(&c->btree_trans_lock);
+#endif
+}
+
 void bch2_fs_btree_iter_exit(struct bch_fs *c)
 {
 	mempool_exit(&c->btree_iters_pool);
@@ -2220,6 +2277,9 @@ int bch2_fs_btree_iter_init(struct bch_fs *c)
 {
 	unsigned nr = BTREE_ITER_MAX;
 
+	INIT_LIST_HEAD(&c->btree_trans_list);
+	mutex_init(&c->btree_trans_lock);
+
 	return mempool_init_kmalloc_pool(&c->btree_iters_pool, 1,
 			sizeof(struct btree_iter) * nr +
 			sizeof(struct btree_insert_entry) * nr +
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index 928170afe3b51..80577853a04ef 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -303,6 +303,8 @@ void *bch2_trans_kmalloc(struct btree_trans *, size_t);
 void bch2_trans_init(struct btree_trans *, struct bch_fs *, unsigned, size_t);
 int bch2_trans_exit(struct btree_trans *);
 
+void bch2_btree_trans_to_text(struct printbuf *, struct bch_fs *);
+
 void bch2_fs_btree_iter_exit(struct bch_fs *);
 int bch2_fs_btree_iter_init(struct bch_fs *);
 
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index 3b199e2e1e9e9..cf1801ee14a2a 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -181,11 +181,21 @@ static inline bool btree_node_lock(struct btree *b, struct bpos pos,
 				   struct btree_iter *iter,
 				   enum six_lock_type type)
 {
+	bool ret;
+
 	EBUG_ON(level >= BTREE_MAX_DEPTH);
+#ifdef CONFIG_BCACHEFS_DEBUG
+	iter->trans->locking = b;
+#endif
 
-	return likely(six_trylock_type(&b->c.lock, type)) ||
+	ret = likely(six_trylock_type(&b->c.lock, type)) ||
 		btree_node_lock_increment(iter, b, level, type) ||
 		__bch2_btree_node_lock(b, pos, level, iter, type);
+
+#ifdef CONFIG_BCACHEFS_DEBUG
+	iter->trans->locking = NULL;
+#endif
+	return ret;
 }
 
 bool __bch2_btree_node_relock(struct btree_iter *, unsigned);
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index 0ecd004757128..316461e2e0168 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -284,6 +284,10 @@ struct btree_insert_entry {
 
 struct btree_trans {
 	struct bch_fs		*c;
+#ifdef CONFIG_BCACHEFS_DEBUG
+	struct list_head	list;
+	struct btree		*locking;
+#endif
 	unsigned long		ip;
 
 	u64			iters_linked;
diff --git a/fs/bcachefs/clock.c b/fs/bcachefs/clock.c
index 51286520c5c73..163058173252a 100644
--- a/fs/bcachefs/clock.c
+++ b/fs/bcachefs/clock.c
@@ -162,7 +162,7 @@ ssize_t bch2_io_timers_show(struct io_clock *clock, char *buf)
 	now = atomic_long_read(&clock->now);
 
 	for (i = 0; i < clock->timers.used; i++)
-		pr_buf(&out, "%pf:\t%li\n",
+		pr_buf(&out, "%ps:\t%li\n",
 		       clock->timers.data[i]->fn,
 		       clock->timers.data[i]->expire - now);
 	spin_unlock(&clock->timer_lock);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 5c84569c34040..48607e01bd22d 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1237,14 +1237,14 @@ ssize_t bch2_journal_print_pins(struct journal *j, char *buf)
 		       i, atomic_read(&pin_list->count));
 
 		list_for_each_entry(pin, &pin_list->list, list)
-			pr_buf(&out, "\t%p %pf\n",
+			pr_buf(&out, "\t%px %ps\n",
 			       pin, pin->flush);
 
 		if (!list_empty(&pin_list->flushed))
 			pr_buf(&out, "flushed:\n");
 
 		list_for_each_entry(pin, &pin_list->flushed, list)
-			pr_buf(&out, "\t%p %pf\n",
+			pr_buf(&out, "\t%px %ps\n",
 			       pin, pin->flush);
 	}
 	spin_unlock(&j->lock);
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index 662c84b913232..06b59e9913128 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -166,6 +166,7 @@ read_attribute(journal_debug);
 read_attribute(journal_pins);
 read_attribute(btree_updates);
 read_attribute(dirty_btree_nodes);
+read_attribute(btree_transactions);
 
 read_attribute(internal_uuid);
 
@@ -401,6 +402,12 @@ SHOW(bch2_fs)
 
 	if (attr == &sysfs_dirty_btree_nodes)
 		return bch2_dirty_btree_nodes_print(c, buf);
+	if (attr == &sysfs_btree_transactions) {
+		struct printbuf out = _PBUF(buf, PAGE_SIZE);
+
+		bch2_btree_trans_to_text(&out, c);
+		return out.pos - buf;
+	}
 
 	if (attr == &sysfs_compression_stats)
 		return bch2_compression_stats(c, buf);
@@ -571,6 +578,7 @@ struct attribute *bch2_fs_internal_files[] = {
 	&sysfs_journal_pins,
 	&sysfs_btree_updates,
 	&sysfs_dirty_btree_nodes,
+	&sysfs_btree_transactions,
 
 	&sysfs_read_realloc_races,
 	&sysfs_extent_migrate_done,
-- 
2.30.2