From b547d005d54209dc3a14ffd7924c73e32ba2e3a2 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 29 Nov 2021 16:38:27 -0500 Subject: [PATCH] bcachefs: Erasure coding fixes When we added the stripe and stripe_redundancy fields to alloc keys, we neglected to add them to the functions that convert back and forth with the in-memory types. Signed-off-by: Kent Overstreet --- fs/bcachefs/alloc_background.c | 3 + fs/bcachefs/alloc_background.h | 2 + fs/bcachefs/btree_gc.c | 16 +++-- fs/bcachefs/buckets.c | 119 +++++++++++++++++++++++---------- fs/bcachefs/ec.c | 39 +++++++++-- 5 files changed, 130 insertions(+), 49 deletions(-) diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 10514476cffe2..dc1e09b138b62 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -336,6 +336,9 @@ static int bch2_alloc_read_fn(struct btree_trans *trans, struct bkey_s_c k) g->_mark.data_type = u.data_type; g->_mark.dirty_sectors = u.dirty_sectors; g->_mark.cached_sectors = u.cached_sectors; + g->_mark.stripe = u.stripe != 0; + g->stripe = u.stripe; + g->stripe_redundancy = u.stripe_redundancy; g->io_time[READ] = u.read_time; g->io_time[WRITE] = u.write_time; g->oldest_gen = u.oldest_gen; diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h index 370573f8e05d7..b1efc1494dc4f 100644 --- a/fs/bcachefs/alloc_background.h +++ b/fs/bcachefs/alloc_background.h @@ -65,6 +65,8 @@ alloc_mem_to_key(struct btree_iter *iter, .cached_sectors = m.cached_sectors, .read_time = g->io_time[READ], .write_time = g->io_time[WRITE], + .stripe = g->stripe, + .stripe_redundancy = g->stripe_redundancy, }; } diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index b692451f91b58..6cde4234f5e94 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -1176,14 +1176,14 @@ static int bch2_gc_done(struct bch_fs *c, set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_bucket_field(_f) \ - if (dst->b[b].mark._f != src->b[b].mark._f) { \ + if (dst->b[b]._f != src->b[b]._f) { \ if (verify) \ fsck_err(c, "bucket %u:%zu gen %u data type %s has wrong " #_f \ ": got %u, should be %u", dev, b, \ dst->b[b].mark.gen, \ bch2_data_types[dst->b[b].mark.data_type],\ - dst->b[b].mark._f, src->b[b].mark._f); \ - dst->b[b]._mark._f = src->b[b].mark._f; \ + dst->b[b]._f, src->b[b]._f); \ + dst->b[b]._f = src->b[b]._f; \ set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags); \ } #define copy_dev_field(_f, _msg, ...) \ @@ -1229,11 +1229,13 @@ static int bch2_gc_done(struct bch_fs *c, size_t b; for (b = 0; b < src->nbuckets; b++) { - copy_bucket_field(gen); - copy_bucket_field(data_type); + copy_bucket_field(_mark.gen); + copy_bucket_field(_mark.data_type); + copy_bucket_field(_mark.stripe); + copy_bucket_field(_mark.dirty_sectors); + copy_bucket_field(_mark.cached_sectors); + copy_bucket_field(stripe_redundancy); copy_bucket_field(stripe); - copy_bucket_field(dirty_sectors); - copy_bucket_field(cached_sectors); dst->b[b].oldest_gen = src->b[b].oldest_gen; } diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c index c4d72a4999553..66f0729051733 100644 --- a/fs/bcachefs/buckets.c +++ b/fs/bcachefs/buckets.c @@ -810,6 +810,8 @@ static int mark_stripe_bucket(struct btree_trans *trans, const struct bch_stripe *s = bkey_s_c_to_stripe(k).v; unsigned nr_data = s->nr_blocks - s->nr_redundant; bool parity = ptr_idx >= nr_data; + enum bch_data_type data_type = parity ? BCH_DATA_parity : 0; + s64 sectors = parity ? le16_to_cpu(s->sectors) : 0; const struct bch_extent_ptr *ptr = s->ptrs + ptr_idx; bool gc = flags & BTREE_TRIGGER_GC; struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev); @@ -818,10 +820,13 @@ static int mark_stripe_bucket(struct btree_trans *trans, char buf[200]; int ret = 0; + /* * XXX doesn't handle deletion */ + percpu_down_read(&c->mark_lock); g = PTR_BUCKET(ca, ptr, gc); - if (g->stripe && g->stripe != k.k->p.offset) { + if (g->mark.dirty_sectors || + (g->stripe && g->stripe != k.k->p.offset)) { bch2_fs_inconsistent(c, "bucket %u:%zu gen %u: multiple stripes using same bucket\n%s", ptr->dev, PTR_BUCKET_NR(ca, ptr), g->mark.gen, @@ -831,20 +836,22 @@ static int mark_stripe_bucket(struct btree_trans *trans, } old = bucket_cmpxchg(g, new, ({ - ret = check_bucket_ref(c, k, ptr, 0, 0, new.gen, new.data_type, + ret = check_bucket_ref(c, k, ptr, sectors, data_type, + new.gen, new.data_type, new.dirty_sectors, new.cached_sectors); if (ret) goto err; - if (parity) { - new.data_type = BCH_DATA_parity; - new.dirty_sectors = le16_to_cpu(s->sectors); - } + new.dirty_sectors += sectors; + if (data_type) + new.data_type = data_type; if (journal_seq) { new.journal_seq_valid = 1; new.journal_seq = journal_seq; } + + new.stripe = true; })); g->stripe = k.k->p.offset; @@ -1124,6 +1131,11 @@ static int bch2_mark_stripe(struct btree_trans *trans, } if (gc) { + /* + * This will be wrong when we bring back runtime gc: we should + * be unmarking the old key and then marking the new key + */ + /* * gc recalculates this field from stripe ptr * references: @@ -1656,50 +1668,75 @@ static int bch2_trans_mark_extent(struct btree_trans *trans, return 0; } -static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans, - struct bkey_s_c_stripe s, - unsigned idx, bool deleting) +static int bch2_trans_mark_stripe_bucket(struct btree_trans *trans, + struct bkey_s_c_stripe s, + unsigned idx, bool deleting) { struct bch_fs *c = trans->c; const struct bch_extent_ptr *ptr = &s.v->ptrs[idx]; struct bkey_alloc_buf *a; struct btree_iter iter; struct bkey_alloc_unpacked u; - bool parity = idx >= s.v->nr_blocks - s.v->nr_redundant; + enum bch_data_type data_type = idx >= s.v->nr_blocks - s.v->nr_redundant + ? BCH_DATA_parity : 0; + s64 sectors = data_type ? le16_to_cpu(s.v->sectors) : 0; int ret = 0; + if (deleting) + sectors = -sectors; + a = bch2_trans_start_alloc_update(trans, &iter, ptr, &u); if (IS_ERR(a)) return PTR_ERR(a); - if (parity) { - s64 sectors = le16_to_cpu(s.v->sectors); - - if (deleting) - sectors = -sectors; - - u.dirty_sectors += sectors; - u.data_type = u.dirty_sectors - ? BCH_DATA_parity - : 0; - } + ret = check_bucket_ref(c, s.s_c, ptr, sectors, data_type, + u.gen, u.data_type, + u.dirty_sectors, u.cached_sectors); + if (ret) + goto err; if (!deleting) { - if (bch2_fs_inconsistent_on(u.stripe && u.stripe != s.k->p.offset, c, - "bucket %llu:%llu gen %u: multiple stripes using same bucket (%u, %llu)", + if (bch2_fs_inconsistent_on(u.stripe || + u.stripe_redundancy, c, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: multiple stripes using same bucket (%u, %llu)", iter.pos.inode, iter.pos.offset, u.gen, + bch2_data_types[u.data_type], + u.dirty_sectors, u.stripe, s.k->p.offset)) { ret = -EIO; goto err; } + if (bch2_fs_inconsistent_on(data_type && u.dirty_sectors, c, + "bucket %llu:%llu gen %u data type %s dirty_sectors %u: data already in stripe bucket %llu", + iter.pos.inode, iter.pos.offset, u.gen, + bch2_data_types[u.data_type], + u.dirty_sectors, + s.k->p.offset)) { + ret = -EIO; + goto err; + } + u.stripe = s.k->p.offset; u.stripe_redundancy = s.v->nr_redundant; } else { + if (bch2_fs_inconsistent_on(u.stripe != s.k->p.offset || + u.stripe_redundancy != s.v->nr_redundant, c, + "bucket %llu:%llu gen %u: not marked as stripe when deleting stripe %llu (got %u)", + iter.pos.inode, iter.pos.offset, u.gen, + s.k->p.offset, u.stripe)) { + ret = -EIO; + goto err; + } + u.stripe = 0; u.stripe_redundancy = 0; } + u.dirty_sectors += sectors; + if (data_type) + u.data_type = !deleting ? data_type : 0; + bch2_alloc_pack(c, a, u); bch2_trans_update(trans, &iter, &a->k, 0); err: @@ -1714,7 +1751,7 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, struct bkey_s_c_stripe old_s = { .k = NULL }; struct bkey_s_c_stripe new_s = { .k = NULL }; struct bch_replicas_padded r; - unsigned i; + unsigned i, nr_blocks; int ret = 0; if (old.k->type == KEY_TYPE_stripe) @@ -1732,18 +1769,17 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, new_s.v->nr_blocks * sizeof(struct bch_extent_ptr))) return 0; + BUG_ON(new_s.k && old_s.k && + (new_s.v->nr_blocks != old_s.v->nr_blocks || + new_s.v->nr_redundant != old_s.v->nr_redundant)); + + nr_blocks = new_s.k ? new_s.v->nr_blocks : old_s.v->nr_blocks; + if (new_s.k) { s64 sectors = le16_to_cpu(new_s.v->sectors); bch2_bkey_to_replicas(&r.e, new); update_replicas_list(trans, &r.e, sectors * new_s.v->nr_redundant); - - for (i = 0; i < new_s.v->nr_blocks; i++) { - ret = bch2_trans_mark_stripe_alloc_ref(trans, new_s, - i, false); - if (ret) - return ret; - } } if (old_s.k) { @@ -1751,12 +1787,25 @@ static int bch2_trans_mark_stripe(struct btree_trans *trans, bch2_bkey_to_replicas(&r.e, old); update_replicas_list(trans, &r.e, sectors * old_s.v->nr_redundant); + } + + for (i = 0; i < nr_blocks; i++) { + if (new_s.k && old_s.k && + !memcmp(&new_s.v->ptrs[i], + &old_s.v->ptrs[i], + sizeof(new_s.v->ptrs[i]))) + continue; - for (i = 0; i < old_s.v->nr_blocks; i++) { - ret = bch2_trans_mark_stripe_alloc_ref(trans, old_s, - i, true); + if (new_s.k) { + ret = bch2_trans_mark_stripe_bucket(trans, new_s, i, false); if (ret) - return ret; + break; + } + + if (old_s.k) { + ret = bch2_trans_mark_stripe_bucket(trans, old_s, i, true); + if (ret) + break; } } diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c index b8e9bc91bf0b4..689602d185896 100644 --- a/fs/bcachefs/ec.c +++ b/fs/bcachefs/ec.c @@ -15,6 +15,7 @@ #include "io.h" #include "keylist.h" #include "recovery.h" +#include "replicas.h" #include "super-io.h" #include "util.h" @@ -1635,17 +1636,41 @@ int bch2_stripes_write(struct bch_fs *c, unsigned flags) static int bch2_stripes_read_fn(struct btree_trans *trans, struct bkey_s_c k) { - struct bkey deleted = KEY(0, 0, 0); - struct bkey_s_c old = (struct bkey_s_c) { &deleted, NULL }; + const struct bch_stripe *s; struct bch_fs *c = trans->c; + struct stripe *m; + unsigned i; int ret = 0; - deleted.p = k.k->p; + if (k.k->type != KEY_TYPE_stripe) + return 0; + + ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL); + if (ret) + return ret; + + s = bkey_s_c_to_stripe(k).v; + + m = genradix_ptr(&c->stripes[0], k.k->p.offset); + m->alive = true; + m->sectors = le16_to_cpu(s->sectors); + m->algorithm = s->algorithm; + m->nr_blocks = s->nr_blocks; + m->nr_redundant = s->nr_redundant; + m->blocks_nonempty = 0; + + for (i = 0; i < s->nr_blocks; i++) { + m->block_sectors[i] = + stripe_blockcount_get(s, i); + m->blocks_nonempty += !!m->block_sectors[i]; + m->ptrs[i] = s->ptrs[i]; + } + + bch2_bkey_to_replicas(&m->r.e, k); - if (k.k->type == KEY_TYPE_stripe) - ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?: - bch2_mark_key(trans, old, k, - BTREE_TRIGGER_NOATOMIC); + spin_lock(&c->ec_stripes_heap_lock); + bch2_stripes_heap_update(c, m, k.k->p.offset); + spin_unlock(&c->ec_stripes_heap_lock); return ret; } -- 2.30.2