bcachefs: Fix for leaking of reflinked extents
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 14 Oct 2021 13:54:47 +0000 (09:54 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:14 +0000 (17:09 -0400)
When a reflink pointer points to only part of an indirect extent, and
then that indirect extent is fragmented (e.g. by copygc), if the reflink
pointer only points to one of the fragments we leak a reference.

Fix this by storing front/back pad values in reflink pointers - when
inserting reflink pointesr, we initialize them to cover the full range
of the indirect extents we reference.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/buckets.c
fs/bcachefs/fsck.c
fs/bcachefs/reflink.c

index 579acb69115d4eadf5ba2d0979756e54b6405f75..4b2bf8f7b28a7b6f1a86bbc50f41b66a97dc9d39 100644 (file)
@@ -917,15 +917,24 @@ struct bch_stripe {
 struct bch_reflink_p {
        struct bch_val          v;
        __le64                  idx;
-       __le64                  v2;
-};
+       /*
+        * A reflink pointer might point to an indirect extent which is then
+        * later split (by copygc or rebalance). If we only pointed to part of
+        * the original indirect extent, and then one of the fragments is
+        * outside the range we point to, we'd leak a refcount: so when creating
+        * reflink pointers, we need to store pad values to remember the full
+        * range we were taking a reference on.
+        */
+       __le32                  front_pad;
+       __le32                  back_pad;
+} __attribute__((packed, aligned(8)));
 
 struct bch_reflink_v {
        struct bch_val          v;
        __le64                  refcount;
        union bch_extent_entry  start[0];
        __u64                   _data[0];
-};
+} __attribute__((packed, aligned(8)));
 
 struct bch_indirect_inline_data {
        struct bch_val          v;
index 9c5d18b4efaa78ff8b4baf907e0823e476f5ac49..ee1c71e011c7b6287940add8358d71a13247baad 100644 (file)
@@ -1180,8 +1180,10 @@ static int bch2_mark_reflink_p(struct bch_fs *c,
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
        struct reflink_gc *ref;
        size_t l, r, m;
-       u64 idx = le64_to_cpu(p.v->idx);
-       unsigned sectors = p.k->size;
+       u64 idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       u64 sectors = (u64) le32_to_cpu(p.v->front_pad) +
+                           le32_to_cpu(p.v->back_pad) +
+                           p.k->size;
        s64 ret = 0;
 
        BUG_ON((flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)) ==
@@ -1758,12 +1760,33 @@ static int __bch2_trans_mark_reflink_p(struct btree_trans *trans,
                bch2_fs_inconsistent(c,
                        "%llu:%llu len %u points to nonexistent indirect extent %llu",
                        p.k->p.inode, p.k->p.offset, p.k->size, idx);
-               bch2_inconsistent_error(c);
                ret = -EIO;
                goto err;
        }
 
-       BUG_ON(!*refcount && (flags & BTREE_TRIGGER_OVERWRITE));
+       if (!*refcount && (flags & BTREE_TRIGGER_OVERWRITE)) {
+               bch2_fs_inconsistent(c,
+                       "%llu:%llu len %u idx %llu indirect extent refcount underflow",
+                       p.k->p.inode, p.k->p.offset, p.k->size, idx);
+               ret = -EIO;
+               goto err;
+       }
+
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+               u64 pad;
+
+               pad = max_t(s64, le32_to_cpu(v->front_pad),
+                           le64_to_cpu(v->idx) - bkey_start_offset(k.k));
+               BUG_ON(pad > U32_MAX);
+               v->front_pad = cpu_to_le32(pad);
+
+               pad = max_t(s64, le32_to_cpu(v->back_pad),
+                           k.k->p.offset - p.k->size - le64_to_cpu(v->idx));
+               BUG_ON(pad > U32_MAX);
+               v->back_pad = cpu_to_le32(pad);
+       }
+
        le64_add_cpu(refcount, add);
 
        if (!*refcount) {
@@ -1786,10 +1809,20 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
                                     struct bkey_s_c k, unsigned flags)
 {
        struct bkey_s_c_reflink_p p = bkey_s_c_to_reflink_p(k);
-       u64 idx = le64_to_cpu(p.v->idx);
-       unsigned sectors = p.k->size;
+       u64 idx, sectors;
        s64 ret = 0;
 
+       if (flags & BTREE_TRIGGER_INSERT) {
+               struct bch_reflink_p *v = (struct bch_reflink_p *) p.v;
+
+               v->front_pad = v->back_pad = 0;
+       }
+
+       idx = le64_to_cpu(p.v->idx) - le32_to_cpu(p.v->front_pad);
+       sectors = (u64) le32_to_cpu(p.v->front_pad) +
+                       le32_to_cpu(p.v->back_pad) +
+                       p.k->size;
+
        while (sectors) {
                ret = __bch2_trans_mark_reflink_p(trans, p, idx, flags);
                if (ret < 0)
index b43c31b95dfff339b25b03ba85b2f06cd1cee944..c99e1514fd4f7e394e6e76d977b6f631b3150183 100644 (file)
@@ -2174,7 +2174,7 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
 
        p = bkey_s_c_to_reflink_p(k);
 
-       if (!p.v->v2)
+       if (!p.v->front_pad && !p.v->back_pad)
                return 0;
 
        u = bch2_trans_kmalloc(trans, sizeof(*u));
@@ -2183,7 +2183,8 @@ static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter)
                return ret;
 
        bkey_reassemble(&u->k_i, k);
-       u->v.v2 = 0;
+       u->v.front_pad  = 0;
+       u->v.back_pad   = 0;
 
        return bch2_trans_update(trans, iter, &u->k_i, 0);
 }
index 9bcf4216a286bd63abe96d0f6d6f08b22ab43aa3..2827d0ef10195dd36a45ac87dca7522ebd5dab9b 100644 (file)
@@ -32,6 +32,10 @@ const char *bch2_reflink_p_invalid(const struct bch_fs *c, struct bkey_s_c k)
        if (bkey_val_bytes(p.k) != sizeof(*p.v))
                return "incorrect value size";
 
+       if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix &&
+           le64_to_cpu(p.v->idx) < le32_to_cpu(p.v->front_pad))
+               return "idx < front_pad";
+
        return NULL;
 }