bcachefs: Improve stripe triggers/heap code
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 7 Jul 2020 00:18:13 +0000 (20:18 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:42 +0000 (17:08 -0400)
Soon we'll be able to modify existing stripes - replacing empty blocks
with new blocks and new p/q blocks. This patch updates the trigger code
to handle pointers changing in an existing stripe; also, it
significantly improves how the stripes heap works, which means we can
get rid of the stripe creation/deletion lock.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/ec_types.h
fs/bcachefs/super.c
fs/bcachefs/sysfs.c

index 42e3395884c142d66e96c8a6294904a8ef3f4a49..27c5d9da70bfcaf7df1adda6c8040a13a9ac21b3 100644 (file)
@@ -755,7 +755,6 @@ struct bch_fs {
 
        /* STRIPES: */
        GENRADIX(struct stripe) stripes[2];
-       struct mutex            ec_stripe_create_lock;
 
        ec_stripes_heap         ec_stripes_heap;
        spinlock_t              ec_stripes_heap_lock;
index cdd4bc334530f359acd0287af03c7763e5bf3e2f..f32e8009e444cc0bf96624b7e161fe5ef4b4529c 100644 (file)
@@ -619,8 +619,11 @@ static int bch2_gc_done(struct bch_fs *c,
                                copy_stripe_field(block_sectors[i],
                                                  "block_sectors[%u]", i);
 
-                       if (dst->alive)
+                       if (dst->alive) {
+                               spin_lock(&c->ec_stripes_heap_lock);
                                bch2_stripes_heap_insert(c, dst, dst_iter.pos);
+                               spin_unlock(&c->ec_stripes_heap_lock);
+                       }
 
                        genradix_iter_advance(&dst_iter, &c->stripes[0]);
                        genradix_iter_advance(&src_iter, &c->stripes[1]);
index c02dee3e31642c35f7a7fc956d1644360706ba5d..aff1ace3778f2df52543581b6db1697e3a38f632 100644 (file)
@@ -884,51 +884,46 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
 }
 
 static void bucket_set_stripe(struct bch_fs *c,
-                             const struct bch_stripe *v,
+                             const struct bch_extent_ptr *ptr,
                              struct bch_fs_usage *fs_usage,
                              u64 journal_seq,
                              unsigned flags,
                              bool enabled)
 {
        bool gc = flags & BTREE_TRIGGER_GC;
-       unsigned i;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+       struct bucket *g = PTR_BUCKET(ca, ptr, gc);
+       struct bucket_mark new, old;
 
-       for (i = 0; i < v->nr_blocks; i++) {
-               const struct bch_extent_ptr *ptr = v->ptrs + i;
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-               struct bucket *g = PTR_BUCKET(ca, ptr, gc);
-               struct bucket_mark new, old;
-
-               old = bucket_cmpxchg(g, new, ({
-                       new.stripe                      = enabled;
-                       if (journal_seq) {
-                               new.journal_seq_valid   = 1;
-                               new.journal_seq         = journal_seq;
-                       }
-               }));
+       old = bucket_cmpxchg(g, new, ({
+               new.stripe                      = enabled;
+               if (journal_seq) {
+                       new.journal_seq_valid   = 1;
+                       new.journal_seq         = journal_seq;
+               }
+       }));
 
-               bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
+       bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
 
-               /*
-                * XXX write repair code for these, flag stripe as possibly bad
-                */
-               if (old.gen != ptr->gen)
-                       bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-                                     "stripe with stale pointer");
+       /*
+        * XXX write repair code for these, flag stripe as possibly bad
+        */
+       if (old.gen != ptr->gen)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "stripe with stale pointer");
 #if 0
-               /*
-                * We'd like to check for these, but these checks don't work
-                * yet:
-                */
-               if (old.stripe && enabled)
-                       bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-                                     "multiple stripes using same bucket");
-
-               if (!old.stripe && !enabled)
-                       bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
-                                     "deleting stripe but bucket not marked as stripe bucket");
+       /*
+        * We'd like to check for these, but these checks don't work
+        * yet:
+        */
+       if (old.stripe && enabled)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "multiple stripes using same bucket");
+
+       if (!old.stripe && !enabled)
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                             "deleting stripe but bucket not marked as stripe bucket");
 #endif
-       }
 }
 
 static int __mark_pointer(struct bch_fs *c, struct bkey_s_c k,
@@ -1070,8 +1065,7 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
 {
        bool gc = flags & BTREE_TRIGGER_GC;
        struct stripe *m;
-       unsigned old, new;
-       int blocks_nonempty_delta;
+       unsigned i, blocks_nonempty = 0;
 
        m = genradix_ptr(&c->stripes[gc], p.idx);
 
@@ -1090,20 +1084,17 @@ static int bch2_mark_stripe_ptr(struct bch_fs *c,
        *nr_parity      = m->nr_redundant;
        *r = m->r;
 
-       old = m->block_sectors[p.block];
        m->block_sectors[p.block] += sectors;
-       new = m->block_sectors[p.block];
 
-       blocks_nonempty_delta = (int) !!new - (int) !!old;
-       if (blocks_nonempty_delta) {
-               m->blocks_nonempty += blocks_nonempty_delta;
+       for (i = 0; i < m->nr_blocks; i++)
+               blocks_nonempty += m->block_sectors[i] != 0;
 
+       if (m->blocks_nonempty != blocks_nonempty) {
+               m->blocks_nonempty = blocks_nonempty;
                if (!gc)
                        bch2_stripes_heap_update(c, m, p.idx);
        }
 
-       m->dirty = true;
-
        spin_unlock(&c->ec_stripes_heap_lock);
 
        return 0;
@@ -1207,10 +1198,11 @@ static int bch2_mark_stripe(struct bch_fs *c,
 
        if (!new_s) {
                /* Deleting: */
-               bucket_set_stripe(c, old_s, fs_usage,
-                                 journal_seq, flags, false);
+               for (i = 0; i < old_s->nr_blocks; i++)
+                       bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
+                                         journal_seq, flags, false);
 
-               if (!gc) {
+               if (!gc && m->on_heap) {
                        spin_lock(&c->ec_stripes_heap_lock);
                        bch2_stripes_heap_del(c, m, idx);
                        spin_unlock(&c->ec_stripes_heap_lock);
@@ -1221,10 +1213,21 @@ static int bch2_mark_stripe(struct bch_fs *c,
                BUG_ON(old_s && new_s->nr_blocks != old_s->nr_blocks);
                BUG_ON(old_s && new_s->nr_redundant != old_s->nr_redundant);
 
-               if (!old_s)
-                       bucket_set_stripe(c, new_s, fs_usage,
-                                         journal_seq, flags, true);
+               for (i = 0; i < new_s->nr_blocks; i++) {
+                       if (!old_s ||
+                           memcmp(new_s->ptrs + i,
+                                  old_s->ptrs + i,
+                                  sizeof(struct bch_extent_ptr))) {
+
+                               if (old_s)
+                                       bucket_set_stripe(c, old_s->ptrs + i, fs_usage,
+                                                         journal_seq, flags, false);
+                               bucket_set_stripe(c, new_s->ptrs + i, fs_usage,
+                                                 journal_seq, flags, true);
+                       }
+               }
 
+               m->alive        = true;
                m->sectors      = le16_to_cpu(new_s->sectors);
                m->algorithm    = new_s->algorithm;
                m->nr_blocks    = new_s->nr_blocks;
@@ -1248,8 +1251,6 @@ static int bch2_mark_stripe(struct bch_fs *c,
                        bch2_stripes_heap_update(c, m, idx);
                        spin_unlock(&c->ec_stripes_heap_lock);
                }
-
-               m->alive        = true;
        }
 
        return 0;
index d35fa016cf0acee61d8937638ea5788ed8648b24..516a5268f462b3603b470cfdef0e57b8958b19c7 100644 (file)
@@ -607,39 +607,16 @@ static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
        BUG_ON(h->data[m->heap_idx].idx != idx);
 }
 
-void bch2_stripes_heap_update(struct bch_fs *c,
-                             struct stripe *m, size_t idx)
-{
-       ec_stripes_heap *h = &c->ec_stripes_heap;
-       size_t i;
-
-       if (m->alive) {
-               heap_verify_backpointer(c, idx);
-
-               h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
-
-               i = m->heap_idx;
-               heap_sift_up(h,   i, ec_stripes_heap_cmp,
-                            ec_stripes_heap_set_backpointer);
-               heap_sift_down(h, i, ec_stripes_heap_cmp,
-                              ec_stripes_heap_set_backpointer);
-
-               heap_verify_backpointer(c, idx);
-       } else {
-               bch2_stripes_heap_insert(c, m, idx);
-       }
-
-       if (stripe_idx_to_delete(c) >= 0 &&
-           !percpu_ref_is_dying(&c->writes))
-               schedule_work(&c->ec_stripe_delete_work);
-}
-
 void bch2_stripes_heap_del(struct bch_fs *c,
                           struct stripe *m, size_t idx)
 {
+       if (!m->on_heap)
+               return;
+
+       m->on_heap = false;
+
        heap_verify_backpointer(c, idx);
 
-       m->alive = false;
        heap_del(&c->ec_stripes_heap, m->heap_idx,
                 ec_stripes_heap_cmp,
                 ec_stripes_heap_set_backpointer);
@@ -648,19 +625,49 @@ void bch2_stripes_heap_del(struct bch_fs *c,
 void bch2_stripes_heap_insert(struct bch_fs *c,
                              struct stripe *m, size_t idx)
 {
+       if (m->on_heap)
+               return;
+
        BUG_ON(heap_full(&c->ec_stripes_heap));
 
+       m->on_heap = true;
+
        heap_add(&c->ec_stripes_heap, ((struct ec_stripe_heap_entry) {
                        .idx = idx,
                        .blocks_nonempty = m->blocks_nonempty,
                }),
                 ec_stripes_heap_cmp,
                 ec_stripes_heap_set_backpointer);
-       m->alive = true;
 
        heap_verify_backpointer(c, idx);
 }
 
+void bch2_stripes_heap_update(struct bch_fs *c,
+                             struct stripe *m, size_t idx)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       size_t i;
+
+       if (!m->on_heap)
+               return;
+
+       heap_verify_backpointer(c, idx);
+
+       h->data[m->heap_idx].blocks_nonempty = m->blocks_nonempty;
+
+       i = m->heap_idx;
+       heap_sift_up(h,   i, ec_stripes_heap_cmp,
+                    ec_stripes_heap_set_backpointer);
+       heap_sift_down(h, i, ec_stripes_heap_cmp,
+                      ec_stripes_heap_set_backpointer);
+
+       heap_verify_backpointer(c, idx);
+
+       if (stripe_idx_to_delete(c) >= 0 &&
+           !percpu_ref_is_dying(&c->writes))
+               schedule_work(&c->ec_stripe_delete_work);
+}
+
 /* stripe deletion */
 
 static int ec_stripe_delete(struct bch_fs *c, size_t idx)
@@ -677,23 +684,20 @@ static void ec_stripe_delete_work(struct work_struct *work)
                container_of(work, struct bch_fs, ec_stripe_delete_work);
        ssize_t idx;
 
-       down_read(&c->gc_lock);
-       mutex_lock(&c->ec_stripe_create_lock);
-
        while (1) {
                spin_lock(&c->ec_stripes_heap_lock);
                idx = stripe_idx_to_delete(c);
-               spin_unlock(&c->ec_stripes_heap_lock);
-
-               if (idx < 0)
+               if (idx < 0) {
+                       spin_unlock(&c->ec_stripes_heap_lock);
                        break;
+               }
+
+               bch2_stripes_heap_del(c, genradix_ptr(&c->stripes[0], idx), idx);
+               spin_unlock(&c->ec_stripes_heap_lock);
 
                if (ec_stripe_delete(c, idx))
                        break;
        }
-
-       mutex_unlock(&c->ec_stripe_create_lock);
-       up_read(&c->gc_lock);
 }
 
 /* stripe creation: */
@@ -846,6 +850,7 @@ static void ec_stripe_create(struct ec_stripe_new *s)
        struct bch_fs *c = s->c;
        struct open_bucket *ob;
        struct bkey_i *k;
+       struct stripe *m;
        struct bch_stripe *v = &s->stripe.key.v;
        unsigned i, nr_data = v->nr_blocks - v->nr_redundant;
        struct closure cl;
@@ -882,12 +887,10 @@ static void ec_stripe_create(struct ec_stripe_new *s)
                        goto err_put_writes;
                }
 
-       mutex_lock(&c->ec_stripe_create_lock);
-
        ret = ec_stripe_bkey_insert(c, &s->stripe.key);
        if (ret) {
                bch_err(c, "error creating stripe: error creating stripe key");
-               goto err_unlock;
+               goto err_put_writes;
        }
 
        for_each_keylist_key(&s->keys, k) {
@@ -896,8 +899,11 @@ static void ec_stripe_create(struct ec_stripe_new *s)
                        break;
        }
 
-err_unlock:
-       mutex_unlock(&c->ec_stripe_create_lock);
+       spin_lock(&c->ec_stripes_heap_lock);
+       m = genradix_ptr(&c->stripes[0], s->stripe.key.k.p.offset);
+       BUG_ON(m->on_heap);
+       bch2_stripes_heap_insert(c, m, s->stripe.key.k.p.offset);
+       spin_unlock(&c->ec_stripes_heap_lock);
 err_put_writes:
        percpu_ref_put(&c->writes);
 err:
@@ -1280,11 +1286,21 @@ static int bch2_stripes_read_fn(struct bch_fs *c, enum btree_id id,
 {
        int ret = 0;
 
-       if (k.k->type == KEY_TYPE_stripe)
+       if (k.k->type == KEY_TYPE_stripe) {
+               struct stripe *m;
+
                ret = __ec_stripe_mem_alloc(c, k.k->p.offset, GFP_KERNEL) ?:
                        bch2_mark_key(c, k, 0, 0, NULL, 0,
                                      BTREE_TRIGGER_ALLOC_READ|
                                      BTREE_TRIGGER_NOATOMIC);
+               if (ret)
+                       return ret;
+
+               spin_lock(&c->ec_stripes_heap_lock);
+               m = genradix_ptr(&c->stripes[0], k.k->p.offset);
+               bch2_stripes_heap_insert(c, m, k.k->p.offset);
+               spin_unlock(&c->ec_stripes_heap_lock);
+       }
 
        return ret;
 }
@@ -1335,6 +1351,24 @@ int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
        return 0;
 }
 
+void bch2_stripes_heap_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       ec_stripes_heap *h = &c->ec_stripes_heap;
+       struct stripe *m;
+       size_t i;
+
+       spin_lock(&c->ec_stripes_heap_lock);
+       for (i = 0; i < min(h->used, 20UL); i++) {
+               m = genradix_ptr(&c->stripes[0], h->data[i].idx);
+
+               pr_buf(out, "%zu %u/%u+%u\n", h->data[i].idx,
+                      h->data[i].blocks_nonempty,
+                      m->nr_blocks - m->nr_redundant,
+                      m->nr_redundant);
+       }
+       spin_unlock(&c->ec_stripes_heap_lock);
+}
+
 void bch2_fs_ec_exit(struct bch_fs *c)
 {
        struct ec_stripe_head *h;
index 4dfaac0348869eb82640650c03d6709307d9fe1e..36444cb14190307ad7656b5fc83317890dbb10a8 100644 (file)
@@ -157,6 +157,8 @@ int bch2_stripes_write(struct bch_fs *, unsigned, bool *);
 
 int bch2_ec_mem_alloc(struct bch_fs *, bool);
 
+void bch2_stripes_heap_to_text(struct printbuf *, struct bch_fs *);
+
 void bch2_fs_ec_exit(struct bch_fs *);
 int bch2_fs_ec_init(struct bch_fs *);
 
index 5c3f77c8aac71b9f269031f9deb2271e7e59c519..e4d633fca5bf913a78a4d78168141dc1458fdaf1 100644 (file)
@@ -22,6 +22,7 @@ struct stripe {
 
        unsigned                alive:1;
        unsigned                dirty:1;
+       unsigned                on_heap:1;
        u8                      blocks_nonempty;
        u16                     block_sectors[EC_STRIPE_MAX];
 
index 9bc470e68cc9737370c594761b6c2e9c02613bc5..4b21db5811bdf45d5608d75f50825f9555e809e5 100644 (file)
@@ -678,7 +678,6 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
        INIT_LIST_HEAD(&c->ec_new_stripe_list);
        mutex_init(&c->ec_new_stripe_lock);
-       mutex_init(&c->ec_stripe_create_lock);
        spin_lock_init(&c->ec_stripes_heap_lock);
 
        seqcount_init(&c->gc_pos_lock);
index 30be49eb5da6eba6653f7c448b39a42138a8cbfc..d7ac26b8f9f371465172f0c5784fed2c5f3571bb 100644 (file)
@@ -168,6 +168,7 @@ read_attribute(btree_updates);
 read_attribute(dirty_btree_nodes);
 read_attribute(btree_key_cache);
 read_attribute(btree_transactions);
+read_attribute(stripes_heap);
 
 read_attribute(internal_uuid);
 
@@ -418,6 +419,13 @@ SHOW(bch2_fs)
                return out.pos - buf;
        }
 
+       if (attr == &sysfs_stripes_heap) {
+               struct printbuf out = _PBUF(buf, PAGE_SIZE);
+
+               bch2_stripes_heap_to_text(&out, c);
+               return out.pos - buf;
+       }
+
        if (attr == &sysfs_compression_stats)
                return bch2_compression_stats(c, buf);
 
@@ -583,6 +591,7 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_dirty_btree_nodes,
        &sysfs_btree_key_cache,
        &sysfs_btree_transactions,
+       &sysfs_stripes_heap,
 
        &sysfs_read_realloc_races,
        &sysfs_extent_migrate_done,