bcachefs: Refactor bch2_alloc_write()
authorKent Overstreet <kent.overstreet@gmail.com>
Tue, 27 Aug 2019 21:45:42 +0000 (17:45 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:26 +0000 (17:08 -0400)
Major simplification - gets rid of the need for marking buckets as
dirty, instead we write buckets if the in memory mark is different from
what's in the btree.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets_types.h

index c1158ce154c59ac2e8e92e6a889457b28abd7c5b..54051161eba7624998b3bc0c1e1643984e62aaf2 100644 (file)
@@ -258,46 +258,68 @@ int bch2_alloc_read(struct bch_fs *c, struct journal_keys *journal_keys)
        return 0;
 }
 
-int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+enum alloc_write_ret {
+       ALLOC_WROTE,
+       ALLOC_NOWROTE,
+       ALLOC_END,
+};
+
+static int bch2_alloc_write_key(struct btree_trans *trans,
+                               struct btree_iter *iter,
+                               unsigned flags)
 {
-       struct btree_trans trans;
-       struct btree_iter *iter;
+       struct bch_fs *c = trans->c;
+       struct bkey_s_c k;
        struct bch_dev *ca;
+       struct bucket_array *ba;
+       struct bucket *g;
+       struct bucket_mark m;
+       struct bkey_alloc_unpacked old_u, new_u;
+       __BKEY_PADDED(k, 8) alloc_key; /* hack: */
+       struct bkey_i_alloc *a;
        int ret;
+retry:
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret)
+               goto err;
 
-       if (k->k.p.inode >= c->sb.nr_devices ||
-           !c->devs[k->k.p.inode])
-               return 0;
-
-       ca = bch_dev_bkey_exists(c, k->k.p.inode);
+       old_u = bch2_alloc_unpack(k);
 
-       if (k->k.p.offset >= ca->mi.nbuckets)
-               return 0;
+       if (iter->pos.inode >= c->sb.nr_devices ||
+           !c->devs[iter->pos.inode])
+               return ALLOC_END;
 
-       bch2_trans_init(&trans, c, 0, 0);
+       percpu_down_read(&c->mark_lock);
+       ca      = bch_dev_bkey_exists(c, iter->pos.inode);
+       ba      = bucket_array(ca);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
-                                  BTREE_ITER_INTENT);
+       if (iter->pos.offset >= ba->nbuckets) {
+               percpu_up_read(&c->mark_lock);
+               return ALLOC_END;
+       }
 
-       ret = bch2_btree_iter_traverse(iter);
-       if (ret)
-               goto err;
+       g       = &ba->b[iter->pos.offset];
+       m       = READ_ONCE(g->mark);
+       new_u   = alloc_mem_to_key(g, m);
+       percpu_up_read(&c->mark_lock);
 
-       /* check buckets_written with btree node locked: */
-       if (test_bit(k->k.p.offset, ca->buckets_written)) {
-               ret = 0;
-               goto err;
-       }
+       if (!bkey_alloc_unpacked_cmp(old_u, new_u))
+               return ALLOC_NOWROTE;
 
-       bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, k));
+       a = bkey_alloc_init(&alloc_key.k);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, new_u);
 
-       ret = bch2_trans_commit(&trans, NULL, NULL,
+       bch2_trans_update(trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
+       ret = bch2_trans_commit(trans, NULL, NULL,
+                               BTREE_INSERT_ATOMIC|
                                BTREE_INSERT_NOFAIL|
-                               BTREE_INSERT_LAZY_RW|
-                               BTREE_INSERT_JOURNAL_REPLAY|
-                               BTREE_INSERT_NOMARK);
+                               BTREE_INSERT_NOMARK|
+                               flags);
 err:
-       bch2_trans_exit(&trans);
+       if (ret == -EINTR)
+               goto retry;
        return ret;
 }
 
@@ -305,16 +327,8 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
-       struct bucket_array *buckets;
        struct bch_dev *ca;
-       struct bucket *g;
-       struct bucket_mark m, new;
-       struct bkey_alloc_unpacked old_u, new_u;
-       __BKEY_PADDED(k, 8) alloc_key; /* hack: */
-       struct bkey_i_alloc *a;
-       struct bkey_s_c k;
        unsigned i;
-       size_t b;
        int ret = 0;
 
        BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
@@ -325,81 +339,24 @@ int bch2_alloc_write(struct bch_fs *c, unsigned flags, bool *wrote)
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
        for_each_rw_member(ca, c, i) {
-               down_read(&ca->bucket_lock);
-restart:
-               buckets = bucket_array(ca);
-
-               for (b = buckets->first_bucket;
-                    b < buckets->nbuckets;
-                    b++) {
-                       if (!buckets->b[b].mark.dirty)
-                               continue;
-
-                       bch2_btree_iter_set_pos(iter, POS(i, b));
-                       k = bch2_btree_iter_peek_slot(iter);
-                       ret = bkey_err(k);
-                       if (ret)
-                               goto err;
-
-                       old_u = bch2_alloc_unpack(k);
-
-                       percpu_down_read(&c->mark_lock);
-                       g       = bucket(ca, b);
-                       m       = READ_ONCE(g->mark);
-                       new_u   = alloc_mem_to_key(g, m);
-                       percpu_up_read(&c->mark_lock);
+               unsigned first_bucket;
 
-                       if (!m.dirty)
-                               continue;
-
-                       if ((flags & BTREE_INSERT_LAZY_RW) &&
-                           percpu_ref_is_zero(&c->writes)) {
-                               up_read(&ca->bucket_lock);
-                               bch2_trans_unlock(&trans);
-
-                               ret = bch2_fs_read_write_early(c);
-                               down_read(&ca->bucket_lock);
-
-                               if (ret)
-                                       goto err;
-                               goto restart;
-                       }
+               percpu_down_read(&c->mark_lock);
+               first_bucket = bucket_array(ca)->first_bucket;
+               percpu_up_read(&c->mark_lock);
 
-                       a = bkey_alloc_init(&alloc_key.k);
-                       a->k.p = iter->pos;
-                       bch2_alloc_pack(a, new_u);
+               bch2_btree_iter_set_pos(iter, POS(i, first_bucket));
 
-                       bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &a->k_i));
-                       ret = bch2_trans_commit(&trans, NULL, NULL,
-                                               BTREE_INSERT_NOFAIL|
-                                               BTREE_INSERT_NOMARK|
-                                               flags);
-err:
-                       if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) {
-                               bch_err(c, "error %i writing alloc info", ret);
-                               printk(KERN_CONT "dev %llu bucket %llu\n",
-                                      iter->pos.inode, iter->pos.offset);
-                               printk(KERN_CONT "gen %u -> %u\n", old_u.gen, new_u.gen);
-#define x(_name, _bits)                printk(KERN_CONT #_name " %u -> %u\n", old_u._name, new_u._name);
-                               BCH_ALLOC_FIELDS()
-#undef  x
-                       }
-                       if (ret)
+               while (1) {
+                       ret = bch2_alloc_write_key(&trans, iter, flags);
+                       if (ret < 0 || ret == ALLOC_END)
                                break;
-
-                       new = m;
-                       new.dirty = false;
-                       atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
-
-                       if (ca->buckets_written)
-                               set_bit(b, ca->buckets_written);
-
-                       bch2_trans_cond_resched(&trans);
-                       *wrote = true;
+                       if (ret == ALLOC_WROTE)
+                               *wrote = true;
+                       bch2_btree_iter_next_slot(iter);
                }
-               up_read(&ca->bucket_lock);
 
-               if (ret) {
+               if (ret < 0) {
                        percpu_ref_put(&ca->io_ref);
                        break;
                }
@@ -407,7 +364,27 @@ err:
 
        bch2_trans_exit(&trans);
 
-       return ret;
+       return ret < 0 ? ret : 0;
+}
+
+int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
+{
+       struct btree_trans trans;
+       struct btree_iter *iter;
+       int ret;
+
+       bch2_trans_init(&trans, c, 0, 0);
+
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, k->k.p,
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
+
+       ret = bch2_alloc_write_key(&trans, iter,
+                                  BTREE_INSERT_NOFAIL|
+                                  BTREE_INSERT_LAZY_RW|
+                                  BTREE_INSERT_JOURNAL_REPLAY|
+                                  BTREE_INSERT_NOMARK);
+       bch2_trans_exit(&trans);
+       return ret < 0 ? ret : 0;
 }
 
 /* Bucket IO clocks: */
@@ -954,10 +931,6 @@ retry:
                if (!top->nr)
                        heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
 
-               /* with btree still locked: */
-               if (ca->buckets_written)
-                       set_bit(b, ca->buckets_written);
-
                /*
                 * Make sure we flush the last journal entry that updated this
                 * bucket (i.e. deleting the last reference) before writing to
index 134c6d81397c24a040a53ca4bf2772b32bc5db41..501c444353fbf0e4a3fe6e38eaf1216c066c2a3a 100644 (file)
@@ -13,6 +13,17 @@ struct bkey_alloc_unpacked {
 #undef  x
 };
 
+/* returns true if not equal */
+static inline bool bkey_alloc_unpacked_cmp(struct bkey_alloc_unpacked l,
+                                          struct bkey_alloc_unpacked r)
+{
+       return l.gen != r.gen
+#define x(_name, _bits)        || l._name != r._name
+       BCH_ALLOC_FIELDS()
+#undef  x
+       ;
+}
+
 struct bkey_alloc_unpacked bch2_alloc_unpack(struct bkey_s_c);
 void bch2_alloc_pack(struct bkey_i_alloc *,
                     const struct bkey_alloc_unpacked);
index 9bee837dedcfe7ffc7566a024ebe2f24303a0cd1..eb4079e5717866dd451862ea130e28c78507f081 100644 (file)
@@ -412,7 +412,6 @@ struct bch_dev {
         */
        struct bucket_array __rcu *buckets[2];
        unsigned long           *buckets_nouse;
-       unsigned long           *buckets_written;
        struct rw_semaphore     bucket_lock;
 
        struct bch_dev_usage __percpu *usage[2];
index 24cf28bf665b7a7c30c763baa45cf188b49d4797..68ae08f86f3392a1d132ef1e6d11ee359184d9fb 100644 (file)
@@ -148,7 +148,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
                                        "type %u gen %u",
                                        k.k->type, ptr->gen)) {
                                g2->_mark.gen   = g->_mark.gen          = ptr->gen;
-                               g2->_mark.dirty = g->_mark.dirty        = true;
                                g2->gen_valid   = g->gen_valid          = true;
                        }
 
@@ -156,7 +155,6 @@ static int bch2_gc_mark_key(struct bch_fs *c, struct bkey_s_c k,
                                        "%u ptr gen in the future: %u > %u",
                                        k.k->type, ptr->gen, g->mark.gen)) {
                                g2->_mark.gen   = g->_mark.gen          = ptr->gen;
-                               g2->_mark.dirty = g->_mark.dirty        = true;
                                g2->gen_valid   = g->gen_valid          = true;
                                set_bit(BCH_FS_FIXED_GENS, &c->flags);
                        }
@@ -528,7 +526,6 @@ static int bch2_gc_done(struct bch_fs *c,
                                ": got %u, should be %u", i, b,         \
                                dst->b[b].mark._f, src->b[b].mark._f);  \
                dst->b[b]._mark._f = src->b[b].mark._f;                 \
-               dst->b[b]._mark.dirty = true;                           \
        }
 #define copy_dev_field(_f, _msg, ...)                                  \
        copy_field(_f, "dev %u has wrong " _msg, i, ##__VA_ARGS__)
@@ -580,10 +577,7 @@ static int bch2_gc_done(struct bch_fs *c,
                        copy_bucket_field(dirty_sectors);
                        copy_bucket_field(cached_sectors);
 
-                       if (dst->b[b].oldest_gen != src->b[b].oldest_gen) {
-                               dst->b[b].oldest_gen = src->b[b].oldest_gen;
-                               dst->b[b]._mark.dirty = true;
-                       }
+                       dst->b[b].oldest_gen = src->b[b].oldest_gen;
                }
        };
 
index 4ab3b834948b630a0413ab0e9ce2c9c72c46cebf..625c6c5f79639eedd75e9684f04ecca90e89420a 100644 (file)
@@ -634,7 +634,6 @@ static int __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
                BUG_ON(!is_available_bucket(new));
 
                new.owned_by_allocator  = true;
-               new.dirty               = true;
                new.data_type           = 0;
                new.cached_sectors      = 0;
                new.dirty_sectors       = 0;
@@ -774,7 +773,6 @@ static int __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
               type != BCH_DATA_JOURNAL);
 
        old = bucket_cmpxchg(g, new, ({
-               new.dirty       = true;
                new.data_type   = type;
                overflow = checked_add(new.dirty_sectors, sectors);
        }));
@@ -849,7 +847,6 @@ static void bucket_set_stripe(struct bch_fs *c,
                struct bucket_mark new, old;
 
                old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
-                       new.dirty                       = true;
                        new.stripe                      = enabled;
                        if (journal_seq) {
                                new.journal_seq_valid   = 1;
@@ -896,8 +893,6 @@ static bool bch2_mark_pointer(struct bch_fs *c,
        do {
                new.v.counter = old.v.counter = v;
 
-               new.dirty = true;
-
                /*
                 * Check this after reading bucket mark to guard against
                 * the allocator invalidating a bucket after we've already
@@ -1882,7 +1877,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
 {
        struct bucket_array *buckets = NULL, *old_buckets = NULL;
        unsigned long *buckets_nouse = NULL;
-       unsigned long *buckets_written = NULL;
        alloc_fifo      free[RESERVE_NR];
        alloc_fifo      free_inc;
        alloc_heap      alloc_heap;
@@ -1911,9 +1905,6 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
            !(buckets_nouse     = kvpmalloc(BITS_TO_LONGS(nbuckets) *
                                            sizeof(unsigned long),
                                            GFP_KERNEL|__GFP_ZERO)) ||
-           !(buckets_written   = kvpmalloc(BITS_TO_LONGS(nbuckets) *
-                                           sizeof(unsigned long),
-                                           GFP_KERNEL|__GFP_ZERO)) ||
            !init_fifo(&free[RESERVE_BTREE], btree_reserve, GFP_KERNEL) ||
            !init_fifo(&free[RESERVE_MOVINGGC],
                       copygc_reserve, GFP_KERNEL) ||
@@ -1945,16 +1936,12 @@ int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets)
                memcpy(buckets_nouse,
                       ca->buckets_nouse,
                       BITS_TO_LONGS(n) * sizeof(unsigned long));
-               memcpy(buckets_written,
-                      ca->buckets_written,
-                      BITS_TO_LONGS(n) * sizeof(unsigned long));
        }
 
        rcu_assign_pointer(ca->buckets[0], buckets);
        buckets = old_buckets;
 
        swap(ca->buckets_nouse, buckets_nouse);
-       swap(ca->buckets_written, buckets_written);
 
        if (resize)
                percpu_up_write(&c->mark_lock);
@@ -1994,8 +1981,6 @@ err:
                free_fifo(&free[i]);
        kvpfree(buckets_nouse,
                BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
-       kvpfree(buckets_written,
-               BITS_TO_LONGS(nbuckets) * sizeof(unsigned long));
        if (buckets)
                call_rcu(&old_buckets->rcu, buckets_free_rcu);
 
@@ -2011,8 +1996,6 @@ void bch2_dev_buckets_free(struct bch_dev *ca)
        free_fifo(&ca->free_inc);
        for (i = 0; i < RESERVE_NR; i++)
                free_fifo(&ca->free[i]);
-       kvpfree(ca->buckets_written,
-               BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
        kvpfree(ca->buckets_nouse,
                BITS_TO_LONGS(ca->mi.nbuckets) * sizeof(unsigned long));
        kvpfree(rcu_dereference_protected(ca->buckets[0], 1),
index efed658abc6aac717fdf1dbf3963ca402c458eb2..7ab9aa641c95c688cd2aac7b20db918c2f7966eb 100644 (file)
@@ -15,7 +15,6 @@ struct bucket_mark {
        u8              gen;
        u8              data_type:3,
                        owned_by_allocator:1,
-                       dirty:1,
                        journal_seq_valid:1,
                        stripe:1;
        u16             dirty_sectors;