From ad520141b155786800261cc7e02ec02f0afe2643 Mon Sep 17 00:00:00 2001
From: Kent Overstreet <kent.overstreet@linux.dev>
Date: Sat, 27 May 2023 23:19:13 -0400
Subject: [PATCH] bcachefs: Fix corruption with writeable snapshots

When partially overwriting an extent in an older snapshot, the existing
extent has to be split.

If the existing extent was overwritten in a different (sibling)
snapshot, we have to ensure that the split won't be visible in the
sibling snapshot.

data_update.c already has code for this,
bch2_insert_snapshot_writeouts() - we just need to move it into
btree_update_leaf.c and change bch2_trans_update_extent() to use it as
well.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
---
 fs/bcachefs/btree_update.h      | 23 ++++++++
 fs/bcachefs/btree_update_leaf.c | 73 +++++++++++++++++++++++--
 fs/bcachefs/data_update.c       | 94 +++------------------------------
 3 files changed, 99 insertions(+), 91 deletions(-)

diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 1ac3a81e0af6f..e90cf292f80bf 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -4,6 +4,7 @@
 
 #include "btree_iter.h"
 #include "journal.h"
+#include "journal.h"
 
 struct bch_fs;
 struct btree;
@@ -83,6 +84,28 @@ int bch2_btree_node_update_key(struct btree_trans *, struct btree_iter *,
 int bch2_btree_node_update_key_get_iter(struct btree_trans *,
 				struct btree *, struct bkey_i *, bool);
 
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *, enum btree_id,
+				     struct bpos, struct bpos);
+
+/*
+ * For use when splitting extents in existing snapshots:
+ *
+ * If @old_pos is an interior snapshot node, iterate over descendent snapshot
+ * nodes: for every descendent snapshot in whiche @old_pos is overwritten and
+ * not visible, emit a whiteout at @new_pos.
+ */
+static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+						 enum btree_id btree,
+						 struct bpos old_pos,
+						 struct bpos new_pos)
+{
+	if (!btree_type_has_snapshots(btree) ||
+	    bkey_eq(old_pos, new_pos))
+		return 0;
+
+	return __bch2_insert_snapshot_whiteouts(trans, btree, old_pos, new_pos);
+}
+
 int bch2_trans_update_extent(struct btree_trans *, struct btree_iter *,
 			     struct bkey_i *, enum btree_update_flags);
 
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c
index 33693467810b2..25d73db9adc69 100644
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -1343,6 +1343,69 @@ static int need_whiteout_for_snapshot(struct btree_trans *trans,
 
 	return ret;
 }
+
+int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
+				   enum btree_id id,
+				   struct bpos old_pos,
+				   struct bpos new_pos)
+{
+	struct bch_fs *c = trans->c;
+	struct btree_iter old_iter, new_iter;
+	struct bkey_s_c old_k, new_k;
+	snapshot_id_list s;
+	struct bkey_i *update;
+	int ret;
+
+	if (!bch2_snapshot_has_children(c, old_pos.snapshot))
+		return 0;
+
+	darray_init(&s);
+
+	bch2_trans_iter_init(trans, &old_iter, id, old_pos,
+			     BTREE_ITER_NOT_EXTENTS|
+			     BTREE_ITER_ALL_SNAPSHOTS);
+	while ((old_k = bch2_btree_iter_prev(&old_iter)).k &&
+	       !(ret = bkey_err(old_k)) &&
+	       bkey_eq(old_pos, old_k.k->p)) {
+		struct bpos whiteout_pos =
+			SPOS(new_pos.inode, new_pos.offset, old_k.k->p.snapshot);;
+
+		if (!bch2_snapshot_is_ancestor(c, old_k.k->p.snapshot, old_pos.snapshot) ||
+		    snapshot_list_has_ancestor(c, &s, old_k.k->p.snapshot))
+			continue;
+
+		new_k = bch2_bkey_get_iter(trans, &new_iter, id, whiteout_pos,
+					   BTREE_ITER_NOT_EXTENTS|
+					   BTREE_ITER_INTENT);
+		ret = bkey_err(new_k);
+		if (ret)
+			break;
+
+		if (new_k.k->type == KEY_TYPE_deleted) {
+			update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
+			ret = PTR_ERR_OR_ZERO(update);
+			if (ret)
+				break;
+
+			bkey_init(&update->k);
+			update->k.p		= whiteout_pos;
+			update->k.type		= KEY_TYPE_whiteout;
+
+			ret = bch2_trans_update(trans, &new_iter, update,
+						BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
+		}
+		bch2_trans_iter_exit(trans, &new_iter);
+
+		ret = snapshot_list_add(c, &s, old_k.k->p.snapshot);
+		if (ret)
+			break;
+	}
+	bch2_trans_iter_exit(trans, &old_iter);
+	darray_exit(&s);
+
+	return ret;
+}
+
 int bch2_trans_update_extent(struct btree_trans *trans,
 			     struct btree_iter *orig_iter,
 			     struct bkey_i *insert,
@@ -1396,8 +1459,10 @@ int bch2_trans_update_extent(struct btree_trans *trans,
 
 			bch2_cut_back(start, update);
 
-			ret = bch2_btree_insert_nonextent(trans, btree_id, update,
-						  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
+			ret =   bch2_insert_snapshot_whiteouts(trans, btree_id,
+						k.k->p, update->k.p) ?:
+				bch2_btree_insert_nonextent(trans, btree_id, update,
+						BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
 			if (ret)
 				goto err;
 		}
@@ -1411,7 +1476,9 @@ int bch2_trans_update_extent(struct btree_trans *trans,
 			bch2_cut_front(start, update);
 			bch2_cut_back(insert->k.p, update);
 
-			ret = bch2_btree_insert_nonextent(trans, btree_id, update,
+			ret =   bch2_insert_snapshot_whiteouts(trans, btree_id,
+						k.k->p, update->k.p) ?:
+				bch2_btree_insert_nonextent(trans, btree_id, update,
 						  BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE|flags);
 			if (ret)
 				goto err;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index ae7e60d6e5836..c89ee14f8b6b8 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -16,81 +16,6 @@
 #include "subvolume.h"
 #include "trace.h"
 
-static int insert_snapshot_whiteouts(struct btree_trans *trans,
-				     enum btree_id id,
-				     struct bpos old_pos,
-				     struct bpos new_pos)
-{
-	struct bch_fs *c = trans->c;
-	struct btree_iter iter, iter2;
-	struct bkey_s_c k, k2;
-	snapshot_id_list s;
-	struct bkey_i *update;
-	int ret;
-
-	if (!btree_type_has_snapshots(id))
-		return 0;
-
-	darray_init(&s);
-
-	if (!bch2_snapshot_has_children(c, old_pos.snapshot))
-		return 0;
-
-	bch2_trans_iter_init(trans, &iter, id, old_pos,
-			     BTREE_ITER_NOT_EXTENTS|
-			     BTREE_ITER_ALL_SNAPSHOTS);
-	while (1) {
-		k = bch2_btree_iter_prev(&iter);
-		ret = bkey_err(k);
-		if (ret)
-			break;
-
-		if (!k.k)
-			break;
-
-		if (!bkey_eq(old_pos, k.k->p))
-			break;
-
-		if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, old_pos.snapshot) &&
-		    !snapshot_list_has_ancestor(c, &s, k.k->p.snapshot)) {
-			struct bpos whiteout_pos = new_pos;
-
-			whiteout_pos.snapshot = k.k->p.snapshot;
-
-			k2 = bch2_bkey_get_iter(trans, &iter2, id, whiteout_pos,
-						BTREE_ITER_NOT_EXTENTS|
-						BTREE_ITER_INTENT);
-			ret = bkey_err(k2);
-
-			if (!ret && k2.k->type == KEY_TYPE_deleted) {
-				update = bch2_trans_kmalloc(trans, sizeof(struct bkey_i));
-				ret = PTR_ERR_OR_ZERO(update);
-				if (ret)
-					break;
-
-				bkey_init(&update->k);
-				update->k.p		= whiteout_pos;
-				update->k.type		= KEY_TYPE_whiteout;
-
-				ret = bch2_trans_update(trans, &iter2, update,
-							BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE);
-			}
-			bch2_trans_iter_exit(trans, &iter2);
-
-			if (ret)
-				break;
-
-			ret = snapshot_list_add(c, &s, k.k->p.snapshot);
-			if (ret)
-				break;
-		}
-	}
-	bch2_trans_iter_exit(trans, &iter);
-	darray_exit(&s);
-
-	return ret;
-}
-
 static void trace_move_extent_finish2(struct bch_fs *c, struct bkey_s_c k)
 {
 	if (trace_move_extent_finish_enabled()) {
@@ -327,19 +252,12 @@ restart_drop_extra_replicas:
 
 		next_pos = insert->k.p;
 
-		if (!bkey_eq(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
-			ret = insert_snapshot_whiteouts(trans, m->btree_id, k.k->p,
-							bkey_start_pos(&insert->k));
-			if (ret)
-				goto err;
-		}
-
-		if (!bkey_eq(insert->k.p, k.k->p)) {
-			ret = insert_snapshot_whiteouts(trans, m->btree_id,
-							k.k->p, insert->k.p);
-			if (ret)
-				goto err;
-		}
+		ret =   bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+						k.k->p, bkey_start_pos(&insert->k)) ?:
+			bch2_insert_snapshot_whiteouts(trans, m->btree_id,
+						k.k->p, insert->k.p);
+		if (ret)
+			goto err;
 
 		ret   = bch2_trans_update(trans, &iter, insert,
 				BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?:
-- 
2.30.2