bcachefs: Rework allocating buckets for stripes
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 19 Jan 2021 04:26:42 +0000 (23:26 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:51 +0000 (17:08 -0400)
Allocating buckets for existing stripes was busted, in part because the
data structures were too contorted. This reworks new stripes so that we
have an array of open buckets that matches blocks in the stripe, and
it's sparse if we're reusing an existing stripe.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_foreground.c
fs/bcachefs/ec.c
fs/bcachefs/ec.h

index 476c46f596ccbd4e706b80137935bd3ed494680b..07aabae379c836921d8e5a3dbba68b084902662c 100644 (file)
@@ -489,16 +489,20 @@ bucket_alloc_from_stripe(struct bch_fs *c,
        devs_sorted = bch2_dev_alloc_list(c, &wp->stripe, devs_may_alloc);
 
        for (i = 0; i < devs_sorted.nr; i++)
-               open_bucket_for_each(c, &h->s->blocks, ob, ec_idx)
+               for (ec_idx = 0; ec_idx < h->s->nr_data; ec_idx++) {
+                       if (!h->s->blocks[ec_idx])
+                               continue;
+
+                       ob = c->open_buckets + h->s->blocks[ec_idx];
                        if (ob->ptr.dev == devs_sorted.devs[i] &&
-                           !test_and_set_bit(h->s->data_block_idx[ec_idx],
-                                             h->s->blocks_allocated))
+                           !test_and_set_bit(ec_idx, h->s->blocks_allocated))
                                goto got_bucket;
+               }
        goto out_put_head;
 got_bucket:
        ca = bch_dev_bkey_exists(c, ob->ptr.dev);
 
-       ob->ec_idx      = h->s->data_block_idx[ec_idx];
+       ob->ec_idx      = ec_idx;
        ob->ec          = h->s;
 
        add_new_bucket(c, ptrs, devs_may_alloc,
@@ -636,10 +640,13 @@ void bch2_open_buckets_stop_dev(struct bch_fs *c, struct bch_dev *ca,
 
                if (!drop && ob->ec) {
                        mutex_lock(&ob->ec->lock);
-                       open_bucket_for_each(c, &ob->ec->blocks, ob2, j)
-                               drop |= ob2->ptr.dev == ca->dev_idx;
-                       open_bucket_for_each(c, &ob->ec->parity, ob2, j)
+                       for (j = 0; j < ob->ec->new_stripe.key.v.nr_blocks; j++) {
+                               if (!ob->ec->blocks[j])
+                                       continue;
+
+                               ob2 = c->open_buckets + ob->ec->blocks[j];
                                drop |= ob2->ptr.dev == ca->dev_idx;
+                       }
                        mutex_unlock(&ob->ec->lock);
                }
 
index d48df42d41e9bc6dc347ec16d67419c514efe658..43296763148cc8ef2c3343010ac0cfb875389713 100644 (file)
@@ -907,9 +907,6 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        if (!percpu_ref_tryget(&c->writes))
                goto err;
 
-       BUG_ON(bitmap_weight(s->blocks_allocated,
-                            s->blocks.nr) != s->blocks.nr);
-
        ec_generate_ec(&s->new_stripe);
 
        ec_generate_checksums(&s->new_stripe);
@@ -952,12 +949,17 @@ err_put_writes:
 err:
        bch2_disk_reservation_put(c, &s->res);
 
-       open_bucket_for_each(c, &s->blocks, ob, i) {
-               ob->ec = NULL;
-               __bch2_open_bucket_put(c, ob);
-       }
-
-       bch2_open_buckets_put(c, &s->parity);
+       for (i = 0; i < v->nr_blocks; i++)
+               if (s->blocks[i]) {
+                       ob = c->open_buckets + s->blocks[i];
+
+                       if (i < nr_data) {
+                               ob->ec = NULL;
+                               __bch2_open_bucket_put(c, ob);
+                       } else {
+                               bch2_open_bucket_put(c, ob);
+                       }
+               }
 
        bch2_keylist_free(&s->keys, s->inline_keys);
 
@@ -1216,7 +1218,7 @@ void bch2_ec_stripe_head_put(struct bch_fs *c, struct ec_stripe_head *h)
        if (h->s &&
            h->s->allocated &&
            bitmap_weight(h->s->blocks_allocated,
-                         h->s->blocks.nr) == h->s->blocks.nr)
+                         h->s->nr_data) == h->s->nr_data)
                ec_stripe_set_pending(c, h);
 
        mutex_unlock(&h->lock);
@@ -1253,64 +1255,82 @@ static enum bucket_alloc_ret
 new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h,
                         struct closure *cl)
 {
-       struct bch_devs_mask devs;
+       struct bch_devs_mask devs = h->devs;
        struct open_bucket *ob;
-       unsigned i, nr_have, nr_data =
-               min_t(unsigned, h->nr_active_devs,
-                     BCH_BKEY_PTRS_MAX) - h->redundancy;
+       struct open_buckets buckets;
+       unsigned i, j, nr_have_parity = 0, nr_have_data = 0;
        bool have_cache = true;
        enum bucket_alloc_ret ret = ALLOC_SUCCESS;
 
-       devs = h->devs;
-
-       for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
-               __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
-               --nr_data;
+       for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
+               if (test_bit(i, h->s->blocks_gotten)) {
+                       __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
+                       if (i < h->s->nr_data)
+                               nr_have_data++;
+                       else
+                               nr_have_parity++;
+               }
        }
 
-       BUG_ON(h->s->blocks.nr > nr_data);
-       BUG_ON(h->s->parity.nr > h->redundancy);
-
-       open_bucket_for_each(c, &h->s->parity, ob, i)
-               __clear_bit(ob->ptr.dev, devs.d);
-       open_bucket_for_each(c, &h->s->blocks, ob, i)
-               __clear_bit(ob->ptr.dev, devs.d);
+       BUG_ON(nr_have_data     > h->s->nr_data);
+       BUG_ON(nr_have_parity   > h->s->nr_parity);
 
        percpu_down_read(&c->mark_lock);
        rcu_read_lock();
 
-       if (h->s->parity.nr < h->redundancy) {
-               nr_have = h->s->parity.nr;
-
-               ret = bch2_bucket_alloc_set(c, &h->s->parity,
+       buckets.nr = 0;
+       if (nr_have_parity < h->s->nr_parity) {
+               ret = bch2_bucket_alloc_set(c, &buckets,
                                            &h->parity_stripe,
                                            &devs,
-                                           h->redundancy,
-                                           &nr_have,
+                                           h->s->nr_parity,
+                                           &nr_have_parity,
                                            &have_cache,
                                            h->copygc
                                            ? RESERVE_MOVINGGC
                                            : RESERVE_NONE,
                                            0,
                                            cl);
+
+               open_bucket_for_each(c, &buckets, ob, i) {
+                       j = find_next_zero_bit(h->s->blocks_gotten,
+                                              h->s->nr_data + h->s->nr_parity,
+                                              h->s->nr_data);
+                       BUG_ON(j >= h->s->nr_data + h->s->nr_parity);
+
+                       h->s->blocks[j] = buckets.v[i];
+                       h->s->new_stripe.key.v.ptrs[j] = ob->ptr;
+                       __set_bit(j, h->s->blocks_gotten);
+               }
+
                if (ret)
                        goto err;
        }
 
-       if (h->s->blocks.nr < nr_data) {
-               nr_have = h->s->blocks.nr;
-
-               ret = bch2_bucket_alloc_set(c, &h->s->blocks,
+       buckets.nr = 0;
+       if (nr_have_data < h->s->nr_data) {
+               ret = bch2_bucket_alloc_set(c, &buckets,
                                            &h->block_stripe,
                                            &devs,
-                                           nr_data,
-                                           &nr_have,
+                                           h->s->nr_data,
+                                           &nr_have_data,
                                            &have_cache,
                                            h->copygc
                                            ? RESERVE_MOVINGGC
                                            : RESERVE_NONE,
                                            0,
                                            cl);
+
+               open_bucket_for_each(c, &buckets, ob, i) {
+                       j = find_next_zero_bit(h->s->blocks_gotten,
+                                              h->s->nr_data, 0);
+                       BUG_ON(j >= h->s->nr_data);
+
+                       h->s->blocks[j] = buckets.v[i];
+                       h->s->new_stripe.key.v.ptrs[j] = ob->ptr;
+                       __set_bit(j, h->s->blocks_gotten);
+               }
+
                if (ret)
                        goto err;
        }
@@ -1362,8 +1382,7 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                               struct closure *cl)
 {
        struct ec_stripe_head *h;
-       struct open_bucket *ob;
-       unsigned i, data_idx = 0;
+       unsigned i;
        s64 idx;
        int ret;
 
@@ -1398,9 +1417,14 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                BUG();
                        }
 
+                       BUG_ON(h->s->existing_stripe.size != h->blocksize);
+                       BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
+
                        for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
-                               if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i))
+                               if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
+                                       __set_bit(i, h->s->blocks_gotten);
                                        __set_bit(i, h->s->blocks_allocated);
+                               }
 
                                ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
                        }
@@ -1438,20 +1462,6 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                        goto out;
                }
 
-               open_bucket_for_each(c, &h->s->blocks, ob, i) {
-                       data_idx = find_next_zero_bit(h->s->blocks_allocated,
-                                                     h->s->nr_data, data_idx);
-                       BUG_ON(data_idx >= h->s->nr_data);
-
-                       h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
-                       h->s->data_block_idx[i] = data_idx;
-                       data_idx++;
-               }
-
-               open_bucket_for_each(c, &h->s->parity, ob, i)
-                       h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
-
-               //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
                h->s->allocated = true;
        }
 out:
@@ -1471,12 +1481,14 @@ void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)
                if (!h->s)
                        goto unlock;
 
-               open_bucket_for_each(c, &h->s->blocks, ob, i)
-                       if (ob->ptr.dev == ca->dev_idx)
-                               goto found;
-               open_bucket_for_each(c, &h->s->parity, ob, i)
+               for (i = 0; i < h->s->new_stripe.key.v.nr_blocks; i++) {
+                       if (!h->s->blocks[i])
+                               continue;
+
+                       ob = c->open_buckets + h->s->blocks[i];
                        if (ob->ptr.dev == ca->dev_idx)
                                goto found;
+               }
                goto unlock;
 found:
                h->s->err = -EROFS;
@@ -1662,19 +1674,17 @@ void bch2_new_stripes_to_text(struct printbuf *out, struct bch_fs *c)
                       h->target, h->algo, h->redundancy);
 
                if (h->s)
-                       pr_buf(out, "\tpending: blocks %u allocated %u\n",
-                              h->s->blocks.nr,
+                       pr_buf(out, "\tpending: blocks %u+%u allocated %u\n",
+                              h->s->nr_data, h->s->nr_parity,
                               bitmap_weight(h->s->blocks_allocated,
-                                            h->s->blocks.nr));
+                                            h->s->nr_data));
        }
        mutex_unlock(&c->ec_stripe_head_lock);
 
        mutex_lock(&c->ec_stripe_new_lock);
        list_for_each_entry(s, &c->ec_stripe_new_list, list) {
-               pr_buf(out, "\tin flight: blocks %u allocated %u pin %u\n",
-                      s->blocks.nr,
-                      bitmap_weight(s->blocks_allocated,
-                                    s->blocks.nr),
+               pr_buf(out, "\tin flight: blocks %u+%u pin %u\n",
+                      s->nr_data, s->nr_parity,
                       atomic_read(&s->pin));
        }
        mutex_unlock(&c->ec_stripe_new_lock);
index f124582fdc5fddf51004c01ba35afe07a7e2ef12..765baa9d926426431e92791267d47b86fd14b48f 100644 (file)
@@ -143,11 +143,9 @@ struct ec_stripe_new {
        bool                    pending;
        bool                    have_existing_stripe;
 
+       unsigned long           blocks_gotten[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
        unsigned long           blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
-
-       struct open_buckets     blocks;
-       u8                      data_block_idx[BCH_BKEY_PTRS_MAX];
-       struct open_buckets     parity;
+       open_bucket_idx_t       blocks[BCH_BKEY_PTRS_MAX];
        struct disk_reservation res;
 
        struct keylist          keys;