From eeb83e25bb07ff1d00297c541c03e35c8c3c762c Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@gmail.com>
Date: Thu, 22 Nov 2018 22:50:35 -0500
Subject: [PATCH] bcachefs: Hold usage_lock over mark_key and fs_usage_apply

Fixes an inconsistency at the end of gc

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/bkey_methods.h          | 12 ++++
 fs/bcachefs/btree_gc.c              | 12 ----
 fs/bcachefs/btree_update_interior.c | 44 +++++++-------
 fs/bcachefs/buckets.c               | 93 ++++++++++++++++++++---------
 fs/bcachefs/buckets.h               |  3 +
 5 files changed, 104 insertions(+), 60 deletions(-)

diff --git a/fs/bcachefs/bkey_methods.h b/fs/bcachefs/bkey_methods.h
index be6041e92c05d..62b86a8e2ba85 100644
--- a/fs/bcachefs/bkey_methods.h
+++ b/fs/bcachefs/bkey_methods.h
@@ -54,6 +54,18 @@ struct bkey_ops {
 	bool		is_extents;
 };
 
+static inline bool bkey_type_needs_gc(enum bkey_type type)
+{
+	switch (type) {
+	case BKEY_TYPE_BTREE:
+	case BKEY_TYPE_EXTENTS:
+	case BKEY_TYPE_EC:
+		return true;
+	default:
+		return false;
+	}
+}
+
 const char *bch2_bkey_val_invalid(struct bch_fs *, enum bkey_type,
 				  struct bkey_s_c);
 const char *__bch2_bkey_invalid(struct bch_fs *, enum bkey_type, struct bkey_s_c);
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index f350634ce7a0b..73775cbd1daf9 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -112,18 +112,6 @@ static void btree_node_range_checks(struct bch_fs *c, struct btree *b,
 
 /* marking of btree keys/nodes: */
 
-static bool bkey_type_needs_gc(enum bkey_type type)
-{
-	switch (type) {
-	case BKEY_TYPE_BTREE:
-	case BKEY_TYPE_EXTENTS:
-	case BKEY_TYPE_EC:
-		return true;
-	default:
-		return false;
-	}
-}
-
 static void ptr_gen_recalc_oldest(struct bch_fs *c,
 				  const struct bch_extent_ptr *ptr,
 				  u8 *max_stale)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 4fcda31290b2c..7d7a021416f30 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -160,12 +160,7 @@ static void bch2_btree_node_free_index(struct btree_update *as, struct btree *b,
 {
 	struct bch_fs *c = as->c;
 	struct pending_btree_node_free *d;
-
-	/*
-	 * btree_update lock is only needed here to avoid racing with
-	 * gc:
-	 */
-	mutex_lock(&c->btree_interior_update_lock);
+	struct gc_pos pos = { 0 };
 
 	for (d = as->pending; d < as->pending + as->nr_pending; d++)
 		if (!bkey_cmp(k.k->p, d->key.k.p) &&
@@ -201,20 +196,11 @@ found:
 	if (gc_pos_cmp(c->gc_pos, b
 		       ? gc_pos_btree_node(b)
 		       : gc_pos_btree_root(as->btree_id)) >= 0 &&
-	    gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0) {
-		struct gc_pos pos = { 0 };
-
-		bch2_mark_key(c, BKEY_TYPE_BTREE,
+	    gc_pos_cmp(c->gc_pos, gc_phase(GC_PHASE_PENDING_DELETE)) < 0)
+		bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
 			      bkey_i_to_s_c(&d->key),
 			      false, 0, pos,
 			      NULL, 0, BCH_BUCKET_MARK_GC);
-		/*
-		 * Don't apply tmp - pending deletes aren't tracked in
-		 * bch_alloc_stats:
-		 */
-	}
-
-	mutex_unlock(&c->btree_interior_update_lock);
 }
 
 static void __btree_node_free(struct bch_fs *c, struct btree *b)
@@ -1083,7 +1069,10 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
 
 	__bch2_btree_set_root_inmem(c, b);
 
-	bch2_mark_key(c, BKEY_TYPE_BTREE,
+	mutex_lock(&c->btree_interior_update_lock);
+	percpu_down_read(&c->usage_lock);
+
+	bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
 		      bkey_i_to_s_c(&b->key),
 		      true, 0,
 		      gc_pos_btree_root(b->btree_id),
@@ -1095,6 +1084,9 @@ static void bch2_btree_set_root_inmem(struct btree_update *as, struct btree *b)
 					   &stats);
 	bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
 			    gc_pos_btree_root(b->btree_id));
+
+	percpu_up_read(&c->usage_lock);
+	mutex_unlock(&c->btree_interior_update_lock);
 }
 
 static void bch2_btree_set_root_ondisk(struct bch_fs *c, struct btree *b, int rw)
@@ -1171,8 +1163,11 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
 
 	BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, b));
 
+	mutex_lock(&c->btree_interior_update_lock);
+	percpu_down_read(&c->usage_lock);
+
 	if (bkey_extent_is_data(&insert->k))
-		bch2_mark_key(c, BKEY_TYPE_BTREE,
+		bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
 			      bkey_i_to_s_c(insert),
 			      true, 0,
 			      gc_pos_btree_node(b), &stats, 0, 0);
@@ -1193,6 +1188,9 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
 	bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
 			    gc_pos_btree_node(b));
 
+	percpu_up_read(&c->usage_lock);
+	mutex_unlock(&c->btree_interior_update_lock);
+
 	bch2_btree_bset_insert_key(iter, b, node_iter, insert);
 	set_btree_node_dirty(b);
 	set_btree_node_need_write(b);
@@ -1977,7 +1975,10 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 
 		bch2_btree_node_lock_write(b, iter);
 
-		bch2_mark_key(c, BKEY_TYPE_BTREE,
+		mutex_lock(&c->btree_interior_update_lock);
+		percpu_down_read(&c->usage_lock);
+
+		bch2_mark_key_locked(c, BKEY_TYPE_BTREE,
 			      bkey_i_to_s_c(&new_key->k_i),
 			      true, 0,
 			      gc_pos_btree_root(b->btree_id),
@@ -1988,6 +1989,9 @@ static void __bch2_btree_node_update_key(struct bch_fs *c,
 		bch2_fs_usage_apply(c, &stats, &as->reserve->disk_res,
 				    gc_pos_btree_root(b->btree_id));
 
+		percpu_up_read(&c->usage_lock);
+		mutex_unlock(&c->btree_interior_update_lock);
+
 		if (PTR_HASH(&new_key->k_i) != PTR_HASH(&b->key)) {
 			mutex_lock(&c->btree_cache.lock);
 			bch2_btree_node_hash_remove(&c->btree_cache, b);
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index 87ff4b2c8434c..3f4bbf280a787 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -323,6 +323,8 @@ void bch2_fs_usage_apply(struct bch_fs *c,
 	s64 added = sum.data + sum.reserved;
 	s64 should_not_have_added;
 
+	percpu_rwsem_assert_held(&c->usage_lock);
+
 	/*
 	 * Not allowed to reduce sectors_available except by getting a
 	 * reservation:
@@ -339,7 +341,6 @@ void bch2_fs_usage_apply(struct bch_fs *c,
 		stats->online_reserved	-= added;
 	}
 
-	percpu_down_read(&c->usage_lock);
 	preempt_disable();
 	/* online_reserved not subject to gc: */
 	this_cpu_add(c->usage[0]->online_reserved, stats->online_reserved);
@@ -352,7 +353,6 @@ void bch2_fs_usage_apply(struct bch_fs *c,
 
 	bch2_fs_stats_verify(c);
 	preempt_enable();
-	percpu_up_read(&c->usage_lock);
 
 	memset(stats, 0, sizeof(*stats));
 }
@@ -406,7 +406,24 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
 	bch2_dev_stats_verify(ca);
 }
 
-#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr)		\
+void bch2_dev_usage_from_buckets(struct bch_fs *c, struct bch_dev *ca)
+{
+	struct bucket_mark old = { .v.counter = 0 };
+	struct bch_fs_usage *fs_usage;
+	struct bucket_array *buckets;
+	struct bucket *g;
+
+	percpu_down_read(&c->usage_lock);
+	fs_usage = this_cpu_ptr(c->usage[0]);
+	buckets = bucket_array(ca);
+
+	for_each_bucket(g, buckets)
+		if (g->mark.data_type)
+			bch2_dev_usage_update(c, ca, fs_usage, old, g->mark, false);
+	percpu_up_read(&c->usage_lock);
+}
+
+#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr)	\
 ({								\
 	struct bucket_mark _old = bucket_cmpxchg(g, new, expr);	\
 								\
@@ -490,12 +507,12 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
 {
 	struct bch_fs_usage *fs_usage = this_cpu_ptr(c->usage[gc]);
 	struct bucket *g = __bucket(ca, b, gc);
-	struct bucket_mark old, new;
+	struct bucket_mark new;
 
 	BUG_ON(type != BCH_DATA_SB &&
 	       type != BCH_DATA_JOURNAL);
 
-	old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+	bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
 		new.data_type	= type;
 		checked_add(new.dirty_sectors, sectors);
 	}));
@@ -876,16 +893,14 @@ static int __bch2_mark_key(struct bch_fs *c,
 	return ret;
 }
 
-int bch2_mark_key(struct bch_fs *c,
-		  enum bkey_type type, struct bkey_s_c k,
-		  bool inserting, s64 sectors,
-		  struct gc_pos pos,
-		  struct bch_fs_usage *stats,
-		  u64 journal_seq, unsigned flags)
+int bch2_mark_key_locked(struct bch_fs *c,
+		   enum bkey_type type, struct bkey_s_c k,
+		   bool inserting, s64 sectors,
+		   struct gc_pos pos,
+		   struct bch_fs_usage *stats,
+		   u64 journal_seq, unsigned flags)
 {
-	int ret = 0;
-
-	percpu_down_read(&c->usage_lock);
+	int ret;
 
 	if (!(flags & BCH_BUCKET_MARK_GC)) {
 		if (!stats)
@@ -894,7 +909,7 @@ int bch2_mark_key(struct bch_fs *c,
 		ret = __bch2_mark_key(c, type, k, inserting, sectors,
 				      stats, journal_seq, flags, false);
 		if (ret)
-			goto out;
+			return ret;
 	}
 
 	if ((flags & BCH_BUCKET_MARK_GC) ||
@@ -903,9 +918,24 @@ int bch2_mark_key(struct bch_fs *c,
 				      this_cpu_ptr(c->usage[1]),
 				      journal_seq, flags, true);
 		if (ret)
-			goto out;
+			return ret;
 	}
-out:
+
+	return 0;
+}
+
+int bch2_mark_key(struct bch_fs *c,
+		  enum bkey_type type, struct bkey_s_c k,
+		  bool inserting, s64 sectors,
+		  struct gc_pos pos,
+		  struct bch_fs_usage *stats,
+		  u64 journal_seq, unsigned flags)
+{
+	int ret;
+
+	percpu_down_read(&c->usage_lock);
+	ret = bch2_mark_key_locked(c, type, k, inserting, sectors,
+				   pos, stats, journal_seq, flags);
 	percpu_up_read(&c->usage_lock);
 
 	return ret;
@@ -922,12 +952,17 @@ void bch2_mark_update(struct btree_insert *trans,
 	struct gc_pos		pos = gc_pos_btree_node(b);
 	struct bkey_packed	*_k;
 
+	if (!bkey_type_needs_gc(iter->btree_id))
+		return;
+
+	percpu_down_read(&c->usage_lock);
+
 	if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
-		bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k),
-			      true,
-			      bpos_min(insert->k->k.p, b->key.k.p).offset -
-			      bkey_start_offset(&insert->k->k),
-			      pos, &stats, trans->journal_res.seq, 0);
+		bch2_mark_key_locked(c, btree_node_type(b),
+			bkey_i_to_s_c(insert->k), true,
+			bpos_min(insert->k->k.p, b->key.k.p).offset -
+			bkey_start_offset(&insert->k->k),
+			pos, &stats, trans->journal_res.seq, 0);
 
 	while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
 						      KEY_TYPE_DISCARD))) {
@@ -959,9 +994,9 @@ void bch2_mark_update(struct btree_insert *trans,
 				sectors = k.k->p.offset - insert->k->k.p.offset;
 				BUG_ON(sectors <= 0);
 
-				bch2_mark_key(c, btree_node_type(b), k,
-					      true, sectors,
-					      pos, &stats, trans->journal_res.seq, 0);
+				bch2_mark_key_locked(c, btree_node_type(b),
+					k, true, sectors, pos, &stats,
+					trans->journal_res.seq, 0);
 
 				sectors = bkey_start_offset(&insert->k->k) -
 					k.k->p.offset;
@@ -971,14 +1006,16 @@ void bch2_mark_update(struct btree_insert *trans,
 			BUG_ON(sectors >= 0);
 		}
 
-		bch2_mark_key(c, btree_node_type(b), k,
-			      false, sectors,
-			      pos, &stats, trans->journal_res.seq, 0);
+		bch2_mark_key_locked(c, btree_node_type(b),
+			k, false, sectors, pos, &stats,
+			trans->journal_res.seq, 0);
 
 		bch2_btree_node_iter_advance(&node_iter, b);
 	}
 
 	bch2_fs_usage_apply(c, &stats, trans->disk_res, pos);
+
+	percpu_up_read(&c->usage_lock);
 }
 
 /* Disk reservations: */
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h
index 4eec96101bf6e..884041b53eb93 100644
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -220,6 +220,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
 #define BCH_BUCKET_MARK_NOATOMIC		(1 << 0)
 #define BCH_BUCKET_MARK_GC			(1 << 1)
 
+int bch2_mark_key_locked(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+		  bool, s64, struct gc_pos,
+		  struct bch_fs_usage *, u64, unsigned);
 int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
 		  bool, s64, struct gc_pos,
 		  struct bch_fs_usage *, u64, unsigned);
-- 
2.30.2