bcachefs: New data structure for buckets waiting on journal commit
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 5 Jan 2022 03:32:09 +0000 (22:32 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:22 +0000 (17:09 -0400)
Implement a hash table, using cuckoo hashing, for empty buckets that are
waiting on a journal commit before they can be reused.

This replaces the journal_seq field of bucket_mark, and is part of
eventually getting rid of the in memory bucket array.

We may need to make bch2_bucket_needs_journal_commit() lockless, pending
profiling and testing.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
12 files changed:
fs/bcachefs/Makefile
fs/bcachefs/alloc_background.c
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/buckets_types.h
fs/bcachefs/buckets_waiting_for_journal.c [new file with mode: 0644]
fs/bcachefs/buckets_waiting_for_journal.h [new file with mode: 0644]
fs/bcachefs/buckets_waiting_for_journal_types.h [new file with mode: 0644]
fs/bcachefs/journal_io.c
fs/bcachefs/super.c

index a2769a85b029713a41e6a1e988c059ffea597a9f..65eeab56cb4be342e3c118f6edbe6252cc480752 100644 (file)
@@ -16,6 +16,7 @@ bcachefs-y            :=      \
        btree_update_interior.o \
        btree_update_leaf.o     \
        buckets.o               \
+       buckets_waiting_for_journal.o   \
        chardev.o               \
        checksum.o              \
        clock.o                 \
index ab7d972aac3a3a5bd2b492ddfffe3152c81ad250..bc5053ebe18fff3c5ae4a503216426d37fec7d99 100644 (file)
@@ -9,6 +9,7 @@
 #include "btree_update_interior.h"
 #include "btree_gc.h"
 #include "buckets.h"
+#include "buckets_waiting_for_journal.h"
 #include "clock.h"
 #include "debug.h"
 #include "ec.h"
@@ -561,8 +562,7 @@ static unsigned bucket_sort_key(struct bucket *g, struct bucket_mark m,
                 * keys when there's only a small difference, so that we can
                 * keep sequential buckets together:
                 */
-               return  (bucket_needs_journal_commit(m, last_seq_ondisk) << 4)|
-                       (bucket_gc_gen(g) >> 4);
+               return bucket_gc_gen(g) >> 4;
        }
 }
 
@@ -611,6 +611,14 @@ static void find_reclaimable_buckets_lru(struct bch_fs *c, struct bch_dev *ca)
                if (!bch2_can_invalidate_bucket(ca, b, m))
                        continue;
 
+               if (!m.data_type &&
+                   bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                                                    last_seq_ondisk,
+                                                    ca->dev_idx, b)) {
+                       ca->buckets_waiting_on_journal++;
+                       continue;
+               }
+
                if (e.nr && e.bucket + e.nr == b && e.key == key) {
                        e.nr++;
                } else {
@@ -647,6 +655,7 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
 
        ca->inc_gen_needs_gc                    = 0;
        ca->inc_gen_really_needs_gc             = 0;
+       ca->buckets_waiting_on_journal          = 0;
 
        find_reclaimable_buckets_lru(c, ca);
 
@@ -658,28 +667,6 @@ static size_t find_reclaimable_buckets(struct bch_fs *c, struct bch_dev *ca)
        return nr;
 }
 
-/*
- * returns sequence number of most recent journal entry that updated this
- * bucket:
- */
-static u64 bucket_journal_seq(struct bch_fs *c, struct bucket_mark m)
-{
-       if (m.journal_seq_valid) {
-               u64 journal_seq = atomic64_read(&c->journal.seq);
-               u64 bucket_seq  = journal_seq;
-
-               bucket_seq &= ~((u64) U16_MAX);
-               bucket_seq |= m.journal_seq;
-
-               if (bucket_seq > journal_seq)
-                       bucket_seq -= 1 << 16;
-
-               return bucket_seq;
-       } else {
-               return 0;
-       }
-}
-
 static int bucket_invalidate_btree(struct btree_trans *trans,
                                   struct bch_dev *ca, u64 b)
 {
@@ -745,9 +732,10 @@ static int bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca,
         * gen in memory here, the incremented gen will be updated in the btree
         * by bch2_trans_mark_pointer():
         */
-       if (!m.cached_sectors &&
-           !bucket_needs_journal_commit(m, c->journal.last_seq_ondisk)) {
-               BUG_ON(m.data_type);
+       if (!m.data_type &&
+           !bch2_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                                             c->journal.flushed_seq_ondisk,
+                                             ca->dev_idx, b)) {
                bucket_cmpxchg(g, m, m.gen++);
                *bucket_gen(ca, b) = m.gen;
                percpu_up_read(&c->mark_lock);
@@ -781,13 +769,6 @@ out:
 
                if (!top->nr)
                        heap_pop(&ca->alloc_heap, e, bucket_alloc_cmp, NULL);
-
-               /*
-                * Make sure we flush the last journal entry that updated this
-                * bucket (i.e. deleting the last reference) before writing to
-                * this bucket again:
-                */
-               *journal_seq = max(*journal_seq, bucket_journal_seq(c, m));
        } else {
                size_t b2;
 
@@ -954,8 +935,14 @@ static int bch2_allocator_thread(void *arg)
                        gc_count = c->gc_count;
                        nr = find_reclaimable_buckets(c, ca);
 
-                       trace_alloc_scan(ca, nr, ca->inc_gen_needs_gc,
-                                        ca->inc_gen_really_needs_gc);
+                       if (!nr && ca->buckets_waiting_on_journal) {
+                               ret = bch2_journal_flush(&c->journal);
+                               if (ret)
+                                       goto stop;
+                       } else if (nr < (ca->mi.nbuckets >> 6) &&
+                                  ca->buckets_waiting_on_journal >= nr / 2) {
+                               bch2_journal_flush_async(&c->journal, NULL);
+                       }
 
                        if ((ca->inc_gen_needs_gc >= ALLOC_SCAN_BATCH(ca) ||
                             ca->inc_gen_really_needs_gc) &&
@@ -963,6 +950,9 @@ static int bch2_allocator_thread(void *arg)
                                atomic_inc(&c->kick_gc);
                                wake_up_process(c->gc_thread);
                        }
+
+                       trace_alloc_scan(ca, nr, ca->inc_gen_needs_gc,
+                                        ca->inc_gen_really_needs_gc);
                }
 
                ret = bch2_invalidate_buckets(c, ca);
index 4ebaefd408a4777b3781e5b21bfbe1ba61c1aacf..3d1a6773393cdf293f84aefe1ec9c6c74297a7aa 100644 (file)
@@ -355,6 +355,7 @@ enum bch_time_stats {
 #include "alloc_types.h"
 #include "btree_types.h"
 #include "buckets_types.h"
+#include "buckets_waiting_for_journal_types.h"
 #include "clock_types.h"
 #include "ec_types.h"
 #include "journal_types.h"
@@ -482,6 +483,7 @@ struct bch_dev {
 
        size_t                  inc_gen_needs_gc;
        size_t                  inc_gen_really_needs_gc;
+       size_t                  buckets_waiting_on_journal;
 
        enum allocator_states   allocator_state;
 
@@ -777,6 +779,8 @@ struct bch_fs {
        struct mutex            write_points_hash_lock;
        unsigned                write_points_nr;
 
+       struct buckets_waiting_for_journal buckets_waiting_for_journal;
+
        /* GARBAGE COLLECTION */
        struct task_struct      *gc_thread;
        atomic_t                kick_gc;
index de33491f25356386840138961f5c37de2692f8f4..24de8604740ce68acf8518f3ff945c2947d4d459 100644 (file)
@@ -396,10 +396,11 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
        }
 }
 
-static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
+static noinline int bch2_trans_mark_gc(struct btree_trans *trans)
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
+       int ret = 0;
 
        trans_for_each_update(trans, i) {
                /*
@@ -408,10 +409,15 @@ static noinline void bch2_trans_mark_gc(struct btree_trans *trans)
                 */
                BUG_ON(i->cached || i->level);
 
-               if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b)))
-                       bch2_mark_update(trans, i->path, i->k,
-                                        i->flags|BTREE_TRIGGER_GC);
+               if (gc_visited(c, gc_pos_btree_node(insert_l(i)->b))) {
+                       ret = bch2_mark_update(trans, i->path, i->k,
+                                              i->flags|BTREE_TRIGGER_GC);
+                       if (ret)
+                               break;
+               }
        }
+
+       return ret;
 }
 
 static inline int
@@ -510,11 +516,17 @@ bch2_trans_commit_write_locked(struct btree_trans *trans,
                return BTREE_INSERT_NEED_MARK_REPLICAS;
 
        trans_for_each_update(trans, i)
-               if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type))
-                       bch2_mark_update(trans, i->path, i->k, i->flags);
+               if (BTREE_NODE_TYPE_HAS_MEM_TRIGGERS & (1U << i->bkey_type)) {
+                       ret = bch2_mark_update(trans, i->path, i->k, i->flags);
+                       if (ret)
+                               return ret;
+               }
 
-       if (unlikely(c->gc_pos.phase))
-               bch2_trans_mark_gc(trans);
+       if (unlikely(c->gc_pos.phase)) {
+               ret = bch2_trans_mark_gc(trans);
+               if  (ret)
+                       return ret;
+       }
 
        trans_for_each_update(trans, i)
                do_btree_insert_one(trans, i);
index b80ab1ed22f7869c7c0a3b14a9e08f5ad2a68ea5..f7a750aff03fdd18e02f16e37ff17baa5b5fb9ec 100644 (file)
@@ -11,6 +11,7 @@
 #include "btree_gc.h"
 #include "btree_update.h"
 #include "buckets.h"
+#include "buckets_waiting_for_journal.h"
 #include "ec.h"
 #include "error.h"
 #include "inode.h"
@@ -43,43 +44,6 @@ static inline void fs_usage_data_type_to_base(struct bch_fs_usage *fs_usage,
        }
 }
 
-/*
- * Clear journal_seq_valid for buckets for which it's not needed, to prevent
- * wraparound:
- */
-void bch2_bucket_seq_cleanup(struct bch_fs *c)
-{
-       u64 journal_seq = atomic64_read(&c->journal.seq);
-       u16 last_seq_ondisk = c->journal.flushed_seq_ondisk;
-       struct bch_dev *ca;
-       struct bucket_array *buckets;
-       struct bucket *g;
-       struct bucket_mark m;
-       unsigned i;
-
-       if (journal_seq - c->last_bucket_seq_cleanup <
-           (1U << (BUCKET_JOURNAL_SEQ_BITS - 2)))
-               return;
-
-       c->last_bucket_seq_cleanup = journal_seq;
-
-       for_each_member_device(ca, c, i) {
-               down_read(&ca->bucket_lock);
-               buckets = bucket_array(ca);
-
-               for_each_bucket(g, buckets) {
-                       bucket_cmpxchg(g, m, ({
-                               if (!m.journal_seq_valid ||
-                                   bucket_needs_journal_commit(m, last_seq_ondisk))
-                                       break;
-
-                               m.journal_seq_valid = 0;
-                       }));
-               }
-               up_read(&ca->bucket_lock);
-       }
-}
-
 void bch2_fs_usage_initialize(struct bch_fs *c)
 {
        struct bch_fs_usage *usage;
@@ -576,16 +540,28 @@ static int bch2_mark_alloc(struct btree_trans *trans,
                v->journal_seq = cpu_to_le64(new_u.journal_seq);
        }
 
-       ca = bch_dev_bkey_exists(c, new.k->p.inode);
+       if (old_u.data_type && !new_u.data_type && new_u.journal_seq) {
+               ret = bch2_set_bucket_needs_journal_commit(&c->buckets_waiting_for_journal,
+                               c->journal.flushed_seq_ondisk,
+                               new_u.dev, new_u.bucket,
+                               new_u.journal_seq);
+               if (ret) {
+                       bch2_fs_fatal_error(c,
+                               "error setting bucket_needs_journal_commit: %i", ret);
+                       return ret;
+               }
+       }
+
+       ca = bch_dev_bkey_exists(c, new_u.dev);
 
-       if (new.k->p.offset >= ca->mi.nbuckets)
+       if (new_u.bucket >= ca->mi.nbuckets)
                return 0;
 
        percpu_down_read(&c->mark_lock);
        if (!gc && new_u.gen != old_u.gen)
-               *bucket_gen(ca, new.k->p.offset) = new_u.gen;
+               *bucket_gen(ca, new_u.bucket) = new_u.gen;
 
-       g = __bucket(ca, new.k->p.offset, gc);
+       g = __bucket(ca, new_u.bucket, gc);
 
        old_m = bucket_cmpxchg(g, m, ({
                m.gen                   = new_u.gen;
@@ -593,11 +569,6 @@ static int bch2_mark_alloc(struct btree_trans *trans,
                m.dirty_sectors         = new_u.dirty_sectors;
                m.cached_sectors        = new_u.cached_sectors;
                m.stripe                = new_u.stripe != 0;
-
-               if (journal_seq) {
-                       m.journal_seq_valid     = 1;
-                       m.journal_seq           = journal_seq;
-               }
        }));
 
        bch2_dev_usage_update(c, ca, old_m, m, journal_seq, gc);
@@ -625,7 +596,7 @@ static int bch2_mark_alloc(struct btree_trans *trans,
                        return ret;
                }
 
-               trace_invalidate(ca, bucket_to_sector(ca, new.k->p.offset),
+               trace_invalidate(ca, bucket_to_sector(ca, new_u.bucket),
                                 old_m.cached_sectors);
        }
 
@@ -775,9 +746,10 @@ static int check_bucket_ref(struct bch_fs *c,
 static int mark_stripe_bucket(struct btree_trans *trans,
                              struct bkey_s_c k,
                              unsigned ptr_idx,
-                             u64 journal_seq, unsigned flags)
+                             unsigned flags)
 {
        struct bch_fs *c = trans->c;
+       u64 journal_seq = trans->journal_res.seq;
        const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
        unsigned nr_data = s->nr_blocks - s->nr_redundant;
        bool parity = ptr_idx >= nr_data;
@@ -818,11 +790,6 @@ static int mark_stripe_bucket(struct btree_trans *trans,
                if (data_type)
                        new.data_type           = data_type;
 
-               if (journal_seq) {
-                       new.journal_seq_valid   = 1;
-                       new.journal_seq         = journal_seq;
-               }
-
                new.stripe = true;
        }));
 
@@ -894,11 +861,6 @@ static int bch2_mark_pointer(struct btree_trans *trans,
 
                new.data_type = bucket_data_type;
 
-               if (journal_seq) {
-                       new.journal_seq_valid = 1;
-                       new.journal_seq = journal_seq;
-               }
-
                if (flags & BTREE_TRIGGER_NOATOMIC) {
                        g->_mark = new;
                        break;
@@ -1119,7 +1081,7 @@ static int bch2_mark_stripe(struct btree_trans *trans,
                memset(m->block_sectors, 0, sizeof(m->block_sectors));
 
                for (i = 0; i < new_s->nr_blocks; i++) {
-                       ret = mark_stripe_bucket(trans, new, i, journal_seq, flags);
+                       ret = mark_stripe_bucket(trans, new, i, flags);
                        if (ret)
                                return ret;
                }
index 6eeb95068b3b22d473a73a4c8c68fccc5719f08a..4b5376684d2c0c95b47160b0ebd9ed263b7f69a2 100644 (file)
@@ -159,13 +159,6 @@ static inline bool is_available_bucket(struct bucket_mark mark)
        return !mark.dirty_sectors && !mark.stripe;
 }
 
-static inline bool bucket_needs_journal_commit(struct bucket_mark m,
-                                              u16 last_seq_ondisk)
-{
-       return m.journal_seq_valid &&
-               ((s16) m.journal_seq - (s16) last_seq_ondisk > 0);
-}
-
 /* Device usage: */
 
 struct bch_dev_usage bch2_dev_usage_read(struct bch_dev *);
@@ -240,7 +233,6 @@ bch2_fs_usage_read_short(struct bch_fs *);
 
 /* key/bucket marking: */
 
-void bch2_bucket_seq_cleanup(struct bch_fs *);
 void bch2_fs_usage_initialize(struct bch_fs *);
 
 void bch2_mark_alloc_bucket(struct bch_fs *, struct bch_dev *, size_t, bool);
index 18bca269b7503f2a7d589d6842aee118d31a7e7e..24139831226d49f7912854a5fcf90f1550423231 100644 (file)
@@ -15,18 +15,9 @@ struct bucket_mark {
        u8              gen;
        u8              data_type:3,
                        owned_by_allocator:1,
-                       journal_seq_valid:1,
                        stripe:1;
        u16             dirty_sectors;
        u16             cached_sectors;
-
-       /*
-        * low bits of journal sequence number when this bucket was most
-        * recently modified: if journal_seq_valid is set, this bucket can't be
-        * reused until the journal sequence number written to disk is >= the
-        * bucket's journal sequence number:
-        */
-       u16             journal_seq;
        };
        };
 };
diff --git a/fs/bcachefs/buckets_waiting_for_journal.c b/fs/bcachefs/buckets_waiting_for_journal.c
new file mode 100644 (file)
index 0000000..f3774e3
--- /dev/null
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bcachefs.h"
+#include "buckets_waiting_for_journal.h"
+#include <linux/hash.h>
+#include <linux/random.h>
+
+static inline struct bucket_hashed *
+bucket_hash(struct buckets_waiting_for_journal_table *t,
+           unsigned hash_seed_idx, u64 dev_bucket)
+{
+       return t->d + hash_64(dev_bucket ^ t->hash_seeds[hash_seed_idx], t->bits);
+}
+
+static void bucket_table_init(struct buckets_waiting_for_journal_table *t, size_t bits)
+{
+       unsigned i;
+
+       t->bits = bits;
+       for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++)
+               get_random_bytes(&t->hash_seeds[i], sizeof(t->hash_seeds[i]));
+       memset(t->d, 0, sizeof(t->d[0]) << t->bits);
+}
+
+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
+                                     u64 flushed_seq,
+                                     unsigned dev, u64 bucket)
+{
+       struct buckets_waiting_for_journal_table *t;
+       u64 dev_bucket = (u64) dev << 56 | bucket;
+       bool ret = false;
+       unsigned i;
+
+       mutex_lock(&b->lock);
+       t = b->t;
+
+       for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+               struct bucket_hashed *h = bucket_hash(t, i, dev_bucket);
+
+               if (h->dev_bucket == dev_bucket) {
+                       ret = h->journal_seq > flushed_seq;
+                       break;
+               }
+       }
+
+       mutex_unlock(&b->lock);
+
+       return ret;
+}
+
+static bool bucket_table_insert(struct buckets_waiting_for_journal_table *t,
+                               struct bucket_hashed *new,
+                               u64 flushed_seq)
+{
+       struct bucket_hashed *last_evicted = NULL;
+       unsigned tries, i;
+
+       for (tries = 0; tries < 10; tries++) {
+               struct bucket_hashed *old, *victim = NULL;
+
+               for (i = 0; i < ARRAY_SIZE(t->hash_seeds); i++) {
+                       old = bucket_hash(t, i, new->dev_bucket);
+
+                       if (old->dev_bucket == new->dev_bucket ||
+                           old->journal_seq <= flushed_seq) {
+                               *old = *new;
+                               return true;
+                       }
+
+                       if (last_evicted != old)
+                               victim = old;
+               }
+
+               /* hashed to same slot 3 times: */
+               if (!victim)
+                       break;
+
+               /* Failed to find an empty slot: */
+               swap(*new, *victim);
+               last_evicted = victim;
+       }
+
+       return false;
+}
+
+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *b,
+                                        u64 flushed_seq,
+                                        unsigned dev, u64 bucket,
+                                        u64 journal_seq)
+{
+       struct buckets_waiting_for_journal_table *t, *n;
+       struct bucket_hashed tmp, new = {
+               .dev_bucket     = (u64) dev << 56 | bucket,
+               .journal_seq    = journal_seq,
+       };
+       size_t i, size, new_bits, nr_elements = 1, nr_rehashes = 0;
+       int ret = 0;
+
+       mutex_lock(&b->lock);
+
+       if (likely(bucket_table_insert(b->t, &new, flushed_seq)))
+               goto out;
+
+       t = b->t;
+       size = 1UL << t->bits;
+       for (i = 0; i < size; i++)
+               nr_elements += t->d[i].journal_seq > flushed_seq;
+
+       new_bits = t->bits + (nr_elements * 3 > size);
+
+       n = kvmalloc(sizeof(*n) + (sizeof(n->d[0]) << new_bits), GFP_KERNEL);
+       if (!n) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+retry_rehash:
+       nr_rehashes++;
+       bucket_table_init(n, new_bits);
+
+       tmp = new;
+       BUG_ON(!bucket_table_insert(n, &tmp, flushed_seq));
+
+       for (i = 0; i < 1UL << t->bits; i++) {
+               if (t->d[i].journal_seq <= flushed_seq)
+                       continue;
+
+               tmp = t->d[i];
+               if (!bucket_table_insert(n, &tmp, flushed_seq))
+                       goto retry_rehash;
+       }
+
+       b->t = n;
+       kvfree(t);
+
+       pr_debug("took %zu rehashes, table at %zu/%zu elements",
+                nr_rehashes, nr_elements, 1UL << b->t->bits);
+out:
+       mutex_unlock(&b->lock);
+
+       return ret;
+}
+
+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *c)
+{
+       struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
+
+       kvfree(b->t);
+}
+
+#define INITIAL_TABLE_BITS             3
+
+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *c)
+{
+       struct buckets_waiting_for_journal *b = &c->buckets_waiting_for_journal;
+
+       mutex_init(&b->lock);
+
+       b->t = kvmalloc(sizeof(*b->t) +
+                       (sizeof(b->t->d[0]) << INITIAL_TABLE_BITS), GFP_KERNEL);
+       if (!b->t)
+               return -ENOMEM;
+
+       bucket_table_init(b->t, INITIAL_TABLE_BITS);
+       return 0;
+}
diff --git a/fs/bcachefs/buckets_waiting_for_journal.h b/fs/bcachefs/buckets_waiting_for_journal.h
new file mode 100644 (file)
index 0000000..d2ae19c
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_H
+#define _BUCKETS_WAITING_FOR_JOURNAL_H
+
+#include "buckets_waiting_for_journal_types.h"
+
+bool bch2_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
+                                     u64, unsigned, u64);
+int bch2_set_bucket_needs_journal_commit(struct buckets_waiting_for_journal *,
+                                        u64, unsigned, u64, u64);
+
+void bch2_fs_buckets_waiting_for_journal_exit(struct bch_fs *);
+int bch2_fs_buckets_waiting_for_journal_init(struct bch_fs *);
+
+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_H */
diff --git a/fs/bcachefs/buckets_waiting_for_journal_types.h b/fs/bcachefs/buckets_waiting_for_journal_types.h
new file mode 100644 (file)
index 0000000..e593db0
--- /dev/null
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
+#define _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H
+
+#include <linux/siphash.h>
+
+struct bucket_hashed {
+       u64                     dev_bucket;
+       u64                     journal_seq;
+};
+
+struct buckets_waiting_for_journal_table {
+       unsigned                bits;
+       u64                     hash_seeds[3];
+       struct bucket_hashed    d[];
+};
+
+struct buckets_waiting_for_journal {
+       struct mutex            lock;
+       struct buckets_waiting_for_journal_table *t;
+};
+
+#endif /* _BUCKETS_WAITING_FOR_JOURNAL_TYPES_H */
index 4602f581198eadf28be03830c7c6551fb9c286f6..815310e2426f8db67b46a167cd09f7ce1a3267d5 100644 (file)
@@ -1666,13 +1666,9 @@ retry_alloc:
                }
        }
 
-       bch2_bucket_seq_cleanup(c);
-
        continue_at(cl, do_journal_write, c->io_complete_wq);
        return;
 no_io:
-       bch2_bucket_seq_cleanup(c);
-
        continue_at(cl, journal_write_done, c->io_complete_wq);
        return;
 err:
index 55bb263a0906ef74afc74bfd68ef627aeb04d586..3094eb1e340617424385d8ef654edc147c358504 100644 (file)
@@ -16,6 +16,7 @@
 #include "btree_key_cache.h"
 #include "btree_update_interior.h"
 #include "btree_io.h"
+#include "buckets_waiting_for_journal.h"
 #include "chardev.h"
 #include "checksum.h"
 #include "clock.h"
@@ -475,6 +476,7 @@ static void __bch2_fs_free(struct bch_fs *c)
        bch2_fs_ec_exit(c);
        bch2_fs_encryption_exit(c);
        bch2_fs_io_exit(c);
+       bch2_fs_buckets_waiting_for_journal_exit(c);
        bch2_fs_btree_interior_update_exit(c);
        bch2_fs_btree_iter_exit(c);
        bch2_fs_btree_key_cache_exit(&c->btree_key_cache);
@@ -818,6 +820,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
            bch2_fs_btree_key_cache_init(&c->btree_key_cache) ?:
            bch2_fs_btree_iter_init(c) ?:
            bch2_fs_btree_interior_update_init(c) ?:
+           bch2_fs_buckets_waiting_for_journal_init(c);
            bch2_fs_subvolumes_init(c) ?:
            bch2_fs_io_init(c) ?:
            bch2_fs_encryption_init(c) ?: