bcachefs: Don't read existing stripes synchronously in write path
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 15 Dec 2020 00:41:03 +0000 (19:41 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:50 +0000 (17:08 -0400)
Previously, in the stripe creation path, when reusing an existing stripe
we'd read the existing stripe synchronously - ouch.

Now, we allocate two stripe bufs if we're using an existing stripe, so
that we can do the read asynchronously - and, we read the full stripe so
that we can run recovery, if necessary.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_format.h
fs/bcachefs/ec.c
fs/bcachefs/ec.h

index 3f8281b5db4177f72627d6f6f790bf5501393bf0..397099514418fe4aa417d75532e4111583b9c895 100644 (file)
@@ -871,7 +871,7 @@ struct bch_stripe {
        __u8                    csum_type;
        __u8                    pad;
 
-       struct bch_extent_ptr   ptrs[0];
+       struct bch_extent_ptr   ptrs[];
 } __attribute__((packed, aligned(8)));
 
 /* Reflink: */
index 76509c5970d201b29e829aa1546880cb9c617566..72ee53dc95d0fcafdf2750428619858960345258 100644 (file)
@@ -200,6 +200,36 @@ static bool extent_has_stripe_ptr(struct bkey_s_c k, u64 idx)
        return false;
 }
 
+/* Stripe bufs: */
+
+static void ec_stripe_buf_free(struct ec_stripe_buf *stripe)
+{
+       unsigned i;
+
+       for (i = 0; i < stripe->key.v.nr_blocks; i++) {
+               kvpfree(stripe->data[i], stripe->size << 9);
+               stripe->data[i] = NULL;
+       }
+}
+
+static int ec_stripe_buf_alloc(struct ec_stripe_buf *stripe)
+{
+       unsigned i;
+
+       memset(stripe->valid, 0xFF, sizeof(stripe->valid));
+
+       for (i = 0; i < stripe->key.v.nr_blocks; i++) {
+               stripe->data[i] = kvpmalloc(stripe->size << 9, GFP_KERNEL);
+               if (!stripe->data[i])
+                       goto err;
+       }
+
+       return 0;
+err:
+       ec_stripe_buf_free(stripe);
+       return -ENOMEM;
+}
+
 /* Checksumming: */
 
 static void ec_generate_checksums(struct ec_stripe_buf *buf)
@@ -287,14 +317,10 @@ static void ec_generate_ec(struct ec_stripe_buf *buf)
        raid_gen(nr_data, v->nr_redundant, bytes, buf->data);
 }
 
-static unsigned __ec_nr_failed(struct ec_stripe_buf *buf, unsigned nr)
-{
-       return nr - bitmap_weight(buf->valid, nr);
-}
-
 static unsigned ec_nr_failed(struct ec_stripe_buf *buf)
 {
-       return __ec_nr_failed(buf, buf->key.v.nr_blocks);
+       return buf->key.v.nr_blocks -
+               bitmap_weight(buf->valid, buf->key.v.nr_blocks);
 }
 
 static int ec_do_recov(struct bch_fs *c, struct ec_stripe_buf *buf)
@@ -822,14 +848,13 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        struct open_bucket *ob;
        struct bkey_i *k;
        struct stripe *m;
-       struct bch_stripe *v = &s->stripe.key.v;
+       struct bch_stripe *v = &s->new_stripe.key.v;
        unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
-       struct closure cl;
        int ret;
 
        BUG_ON(s->h->s == s);
 
-       closure_init_stack(&cl);
+       closure_sync(&s->iodone);
 
        if (s->err) {
                if (s->err != -EROFS)
@@ -837,6 +862,22 @@ static void ec_stripe_create(struct ec_stripe_new *s)
                goto err;
        }
 
+       if (s->have_existing_stripe) {
+               ec_validate_checksums(c, &s->existing_stripe);
+
+               if (ec_do_recov(c, &s->existing_stripe)) {
+                       bch_err(c, "error creating stripe: error reading existing stripe");
+                       goto err;
+               }
+
+               for (i = 0; i < nr_data; i++)
+                       if (stripe_blockcount_get(&s->existing_stripe.key.v, i))
+                               swap(s->new_stripe.data[i],
+                                    s->existing_stripe.data[i]);
+
+               ec_stripe_buf_free(&s->existing_stripe);
+       }
+
        BUG_ON(!s->allocated);
 
        if (!percpu_ref_tryget(&c->writes))
@@ -845,33 +886,31 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        BUG_ON(bitmap_weight(s->blocks_allocated,
                             s->blocks.nr) != s->blocks.nr);
 
-       ec_generate_ec(&s->stripe);
+       ec_generate_ec(&s->new_stripe);
 
-       ec_generate_checksums(&s->stripe);
+       ec_generate_checksums(&s->new_stripe);
 
        /* write p/q: */
        for (i = nr_data; i < v->nr_blocks; i++)
-               ec_block_io(c, &s->stripe, REQ_OP_WRITE, i, &cl);
-
-       closure_sync(&cl);
+               ec_block_io(c, &s->new_stripe, REQ_OP_WRITE, i, &s->iodone);
+       closure_sync(&s->iodone);
 
-       for (i = nr_data; i < v->nr_blocks; i++)
-               if (!test_bit(i, s->stripe.valid)) {
-                       bch_err(c, "error creating stripe: error writing redundancy buckets");
-                       goto err_put_writes;
-               }
+       if (ec_nr_failed(&s->new_stripe)) {
+               bch_err(c, "error creating stripe: error writing redundancy buckets");
+               goto err_put_writes;
+       }
 
-       ret = s->existing_stripe
-               ? bch2_btree_insert(c, BTREE_ID_EC, &s->stripe.key.k_i,
+       ret = s->have_existing_stripe
+               ? bch2_btree_insert(c, BTREE_ID_EC, &s->new_stripe.key.k_i,
                                    &s->res, NULL, BTREE_INSERT_NOFAIL)
-               : ec_stripe_bkey_insert(c, s, &s->stripe.key);
+               : ec_stripe_bkey_insert(c, s, &s->new_stripe.key);
        if (ret) {
                bch_err(c, "error creating stripe: error creating stripe key");
                goto err_put_writes;
        }
 
        for_each_keylist_key(&s->keys, k) {
-               ret = ec_stripe_update_ptrs(c, &s->stripe, &k->k);
+               ret = ec_stripe_update_ptrs(c, &s->new_stripe, &k->k);
                if (ret) {
                        bch_err(c, "error creating stripe: error %i updating pointers", ret);
                        break;
@@ -879,14 +918,14 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        }
 
        spin_lock(&c->ec_stripes_heap_lock);
-       m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
+       m = genradix_ptr(&c->stripes[0], s->new_stripe.key.k.p.offset);
 #if 0
        pr_info("created a %s stripe %llu",
-               s->existing_stripe ? "existing" : "new",
+               s->have_existing_stripe ? "existing" : "new",
                s->stripe.key.k.p.offset);
 #endif
        BUG_ON(m->on_heap);
-       bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
+       bch2_stripes_heap_insert(c, m, s->new_stripe.key.k.p.offset);
        spin_unlock(&c->ec_stripes_heap_lock);
 err_put_writes:
        percpu_ref_put(&c->writes);
@@ -902,8 +941,9 @@ err:
 
        bch2_keylist_free(&s->keys, s->inline_keys);
 
-       for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-               kvpfree(s->stripe.data[i], s->stripe.size << 9);
+       ec_stripe_buf_free(&s->existing_stripe);
+       ec_stripe_buf_free(&s->new_stripe);
+       closure_debug_destroy(&s->iodone);
        kfree(s);
 }
 
@@ -980,7 +1020,7 @@ void *bch2_writepoint_ec_buf(struct bch_fs *c, struct write_point *wp)
        ca      = bch_dev_bkey_exists(c, ob->ptr.dev);
        offset  = ca->mi.bucket_size - ob->sectors_free;
 
-       return ob->ec->stripe.data[ob->ec_idx] + (offset << 9);
+       return ob->ec->new_stripe.data[ob->ec_idx] + (offset << 9);
 }
 
 void bch2_ec_add_backpointer(struct bch_fs *c, struct write_point *wp,
@@ -1087,7 +1127,6 @@ static void ec_stripe_key_init(struct bch_fs *c,
 static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
 {
        struct ec_stripe_new *s;
-       unsigned i;
 
        lockdep_assert_held(&h->lock);
 
@@ -1096,6 +1135,7 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
                return -ENOMEM;
 
        mutex_init(&s->lock);
+       closure_init(&s->iodone, NULL);
        atomic_set(&s->pin, 1);
        s->c            = c;
        s->h            = h;
@@ -1105,27 +1145,14 @@ static int ec_new_stripe_alloc(struct bch_fs *c, struct ec_stripe_head *h)
 
        bch2_keylist_init(&s->keys, s->inline_keys);
 
-       s->stripe.offset        = 0;
-       s->stripe.size          = h->blocksize;
-       memset(s->stripe.valid, 0xFF, sizeof(s->stripe.valid));
+       s->new_stripe.offset    = 0;
+       s->new_stripe.size      = h->blocksize;
 
-       ec_stripe_key_init(c, &s->stripe.key, s->nr_data,
+       ec_stripe_key_init(c, &s->new_stripe.key, s->nr_data,
                           s->nr_parity, h->blocksize);
 
-       for (i = 0; i < s->stripe.key.v.nr_blocks; i++) {
-               s->stripe.data[i] = kvpmalloc(s->stripe.size << 9, GFP_KERNEL);
-               if (!s->stripe.data[i])
-                       goto err;
-       }
-
        h->s = s;
-
        return 0;
-err:
-       for (i = 0; i < s->stripe.key.v.nr_blocks; i++)
-               kvpfree(s->stripe.data[i], s->stripe.size << 9);
-       kfree(s);
-       return -ENOMEM;
 }
 
 static struct ec_stripe_head *
@@ -1217,7 +1244,7 @@ static int new_stripe_alloc_buckets(struct bch_fs *c, struct ec_stripe_head *h)
        devs = h->devs;
 
        for_each_set_bit(i, h->s->blocks_allocated, BCH_BKEY_PTRS_MAX) {
-               __clear_bit(h->s->stripe.key.v.ptrs[i].dev, devs.d);
+               __clear_bit(h->s->new_stripe.key.v.ptrs[i].dev, devs.d);
                --nr_data;
        }
 
@@ -1327,51 +1354,70 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                               unsigned algo,
                                               unsigned redundancy)
 {
-       struct closure cl;
        struct ec_stripe_head *h;
        struct open_bucket *ob;
        unsigned i, data_idx = 0;
        s64 idx;
        int ret;
 
-       closure_init_stack(&cl);
-
        h = __bch2_ec_stripe_head_get(c, target, algo, redundancy);
-       if (!h)
+       if (!h) {
+               bch_err(c, "no stripe head");
                return NULL;
+       }
 
        if (!h->s) {
                if (ec_new_stripe_alloc(c, h)) {
                        bch2_ec_stripe_head_put(c, h);
+                       bch_err(c, "failed to allocate new stripe");
                        return NULL;
                }
 
                idx = get_existing_stripe(c, target, algo, redundancy);
                if (idx >= 0) {
-                       h->s->existing_stripe = true;
-                       h->s->existing_stripe_idx = idx;
-                       if (get_stripe_key(c, idx, &h->s->stripe)) {
-                               /* btree error */
+                       h->s->have_existing_stripe = true;
+                       ret = get_stripe_key(c, idx, &h->s->existing_stripe);
+                       if (ret) {
+                               bch2_fs_fatal_error(c, "error reading stripe key: %i", ret);
+                               bch2_ec_stripe_head_put(c, h);
+                               return NULL;
+                       }
+
+                       if (ec_stripe_buf_alloc(&h->s->existing_stripe)) {
+                               /*
+                                * this is a problem: we have deleted from the
+                                * stripes heap already
+                                */
                                BUG();
                        }
 
-                       for (i = 0; i < h->s->stripe.key.v.nr_blocks; i++)
-                               if (stripe_blockcount_get(&h->s->stripe.key.v, i)) {
+                       for (i = 0; i < h->s->existing_stripe.key.v.nr_blocks; i++) {
+                               if (stripe_blockcount_get(&h->s->existing_stripe.key.v, i))
                                        __set_bit(i, h->s->blocks_allocated);
-                                       ec_block_io(c, &h->s->stripe, READ, i, &cl);
-                               }
+
+                               ec_block_io(c, &h->s->existing_stripe, READ, i, &h->s->iodone);
+                       }
+
+                       bkey_copy(&h->s->new_stripe.key.k_i,
+                                 &h->s->existing_stripe.key.k_i);
+               }
+
+               if (ec_stripe_buf_alloc(&h->s->new_stripe)) {
+                       BUG();
                }
        }
 
        if (!h->s->allocated) {
-               if (!h->s->existing_stripe &&
+               if (!h->s->have_existing_stripe &&
                    !h->s->res.sectors) {
                        ret = bch2_disk_reservation_get(c, &h->s->res,
-                                                       h->blocksize,
-                                                       h->s->nr_parity, 0);
+                                       h->blocksize,
+                                       h->s->nr_parity, 0);
                        if (ret) {
-                               /* What should we do here? */
-                               bch_err(c, "unable to create new stripe: %i", ret);
+                               /*
+                                * This means we need to wait for copygc to
+                                * empty out buckets from existing stripes:
+                                */
                                bch2_ec_stripe_head_put(c, h);
                                h = NULL;
                                goto out;
@@ -1391,19 +1437,18 @@ struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *c,
                                                      h->s->nr_data, data_idx);
                        BUG_ON(data_idx >= h->s->nr_data);
 
-                       h->s->stripe.key.v.ptrs[data_idx] = ob->ptr;
+                       h->s->new_stripe.key.v.ptrs[data_idx] = ob->ptr;
                        h->s->data_block_idx[i] = data_idx;
                        data_idx++;
                }
 
                open_bucket_for_each(c, &h->s->parity, ob, i)
-                       h->s->stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
+                       h->s->new_stripe.key.v.ptrs[h->s->nr_data + i] = ob->ptr;
 
                //pr_info("new stripe, blocks_allocated %lx", h->s->blocks_allocated[0]);
                h->s->allocated = true;
        }
 out:
-       closure_sync(&cl);
        return h;
 }
 
index 450bb1a113a30c200db219525076a7522d34b275..1d4aad50db4d5e7b3fc07e8b8f35e8510221aa67 100644 (file)
@@ -88,6 +88,7 @@ struct ec_stripe_new {
        struct ec_stripe_head   *h;
        struct mutex            lock;
        struct list_head        list;
+       struct closure          iodone;
 
        /* counts in flight writes, stripe is created when pin == 0 */
        atomic_t                pin;
@@ -98,8 +99,7 @@ struct ec_stripe_new {
        u8                      nr_parity;
        bool                    allocated;
        bool                    pending;
-       bool                    existing_stripe;
-       u64                     existing_stripe_idx;
+       bool                    have_existing_stripe;
 
        unsigned long           blocks_allocated[BITS_TO_LONGS(BCH_BKEY_PTRS_MAX)];
 
@@ -111,7 +111,8 @@ struct ec_stripe_new {
        struct keylist          keys;
        u64                     inline_keys[BKEY_U64s * 8];
 
-       struct ec_stripe_buf    stripe;
+       struct ec_stripe_buf    new_stripe;
+       struct ec_stripe_buf    existing_stripe;
 };
 
 struct ec_stripe_head {