bcachefs: Fix unnecessary read amplificaiton when allocating ec stripes
authorRobbie Litchfield <blam.kiwi@gmail.com>
Wed, 10 Feb 2021 00:18:13 +0000 (13:18 +1300)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:53 +0000 (17:08 -0400)
When allocating an erasure coding stripe, bcachefs will always reuse any
partial stripes before reserving a new stripe. This causes unnecessary
read amplification when preparing a stripe for writing. This patch changes
bcachefs to always reserve new stripes first, only relying on stripe reuse
when copygc needs more time to empty buckets from existing stripes.

Signed-off-by: Robbie Litchfield <blam.kiwi@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/ec.c

index a32d399e5b6f9b31d9a0fe6eb3b4c18579b7f0e1..a70b859363f01c66fa17925a02866a1eb278670c 100644 (file)
@@ -1389,6 +1389,72 @@ static s64 get_existing_stripe(struct bch_fs *c,
        return ret;
 }
 
+static int __bch2_ec_stripe_head_reuse(struct bch_fs *c,
+                                                  struct ec_stripe_head *h)
+{
+       unsigned i;
+       s64 idx;
+       int ret;
+
+       idx = get_existing_stripe(c, h);
+       if (idx < 0) {
+               bch_err(c, "failed to find an existing stripe");
+               return -ENOSPC;
+       }
+
+       h->s->have_existing_stripe = true;
+       ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+       if (ret) {
+               bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+               return ret;
+       }
+
+       if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
+               /*
+                * this is a problem: we have deleted from the
+                * stripes heap already
+                */
+               BUG();
+       }
+
+       BUG_ON(h->s->existing_stripe.size != h->blocksize);
+       BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
+
+       for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
+               if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
+                       __set_bit(i, h->s->blocks_gotten);
+                       __set_bit(i, h->s->blocks_allocated);
+               }
+
+               ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+       }
+
+       bkey_copy(&h->s->new_stripe.key.k_i,
+                       &h->s->existing_stripe.key.k_i);
+
+       return 0;
+}
+
+static int __bch2_ec_stripe_head_reserve(struct bch_fs *c,
+                                                       struct ec_stripe_head *h)
+{
+       int ret;
+
+       ret = bch2_disk_reservation_get(c, &h->s->res,
+                       h->blocksize,
+                       h->s->nr_parity, 0);
+
+       if (ret) {
+               /*
+                * This means we need to wait for copygc to
+                * empty out buckets from existing stripes:
+                */
+               bch_err(c, "failed to reserve stripe");
+       }
+
+       return ret;
+}
+
 struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                               unsigned target,
                                               unsigned algo,
@@ -1397,9 +1463,8 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                               struct closure *cl)
 {
        struct ec_stripe_head *h;
-       unsigned i;
-       s64 idx;
        int ret;
+       bool needs_stripe_new;
 
        h = __bch2_ec_stripe_head_get(c, target, algo, redundancy, copygc);
        if (!h) {
@@ -1407,80 +1472,44 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                return NULL;
        }
 
-       if (!h->s) {
+       needs_stripe_new = !h->s;
+       if (needs_stripe_new) {
                if (ec_new_stripe_alloc(c, h)) {
-                       bch2_ec_stripe_head_put(c, h);
+                       ret = -ENOMEM;
                        bch_err(c, "failed to allocate new stripe");
-                       return NULL;
-               }
-
-               idx = get_existing_stripe(c, h);
-               if (idx >= 0) {
-                       h->s->have_existing_stripe = true;
-                       ret = get_stripe_key(c, idx, &h->s->existing_stripe);
-                       if (ret) {
-                               bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
-                               bch2_ec_stripe_head_put(c, h);
-                               return NULL;
-                       }
-
-                       if (ec_stripe_buf_init(&h->s->existing_stripe, 0, h->blocksize)) {
-                               /*
-                                * this is a problem: we have deleted from the
-                                * stripes heap already
-                                */
-                               BUG();
-                       }
-
-                       BUG_ON(h->s->existing_stripe.size != h->blocksize);
-                       BUG_ON(h->s->existing_stripe.size != h->s->existing_stripe.key.v.sectors);
-
-                       for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
-                               if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i)) {
-                                       __set_bit(i, h->s->blocks_gotten);
-                                       __set_bit(i, h->s->blocks_allocated);
-                               }
-
-                               ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
-                       }
-
-                       bkey_copy(&h->s->new_stripe.key.k_i,
-                                 &h->s->existing_stripe.key.k_i);
+                       goto err;
                }
 
-               if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize)) {
+               if (ec_stripe_buf_init(&h->s->new_stripe, 0, h->blocksize))
                        BUG();
-               }
        }
 
-       if (!h->s->allocated) {
-               if (!h->s->have_existing_stripe &&
-                   !h->s->res.sectors) {
-                       ret = bch2_disk_reservation_get(c, &h->s->res,
-                                       h->blocksize,
-                                       h->s->nr_parity, 0);
-                       if (ret) {
-                               /*
-                                * This means we need to wait for copygc to
-                                * empty out buckets from existing stripes:
-                                */
-                               bch2_ec_stripe_head_put(c, h);
-                               h = NULL;
-                               goto out;
-                       }
-               }
+       /*
+        * Try reserve a new stripe before reusing an
+        * existing stripe. This will prevent unnecessary
+        * read amplification during write oriented workloads.
+        */
+       ret = 0;
+       if (!h->s->allocated && !h->s->res.sectors && !h->s->have_existing_stripe)
+               ret = __bch2_ec_stripe_head_reserve(c, h);
+       if (ret && needs_stripe_new)
+               ret = __bch2_ec_stripe_head_reuse(c, h);
+       if (ret)
+               goto err;
 
+       if (!h->s->allocated) {
                ret = new_stripe_alloc_buckets(c, h, cl);
-               if (ret) {
-                       bch2_ec_stripe_head_put(c, h);
-                       h = ERR_PTR(-ret);
-                       goto out;
-               }
+               if (ret)
+                       goto err;
 
                h->s->allocated = true;
        }
-out:
+
        return h;
+
+err:
+       bch2_ec_stripe_head_put(c, h);
+       return ERR_PTR(-ret);
 }
 
 void bch2_ec_stop_dev(struct bch_fs *c, struct bch_dev *ca)