bcachefs: Rewrite journal_seq_blacklist machinery
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 5 Apr 2019 01:53:12 +0000 (21:53 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:20 +0000 (17:08 -0400)
Now, we store blacklisted journal sequence numbers in the superblock,
not the journal: this helps to greatly simplify the code, and more
importantly it's now implemented in a way that doesn't require all btree
nodes to be visited before starting the journal - instead, we
unconditionally blacklist the next 4 journal sequence numbers after an
unclean shutdown.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
15 files changed:
fs/bcachefs/bcachefs.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_iter.c
fs/bcachefs/inode.h
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_io.c
fs/bcachefs/journal_io.h
fs/bcachefs/journal_seq_blacklist.c
fs/bcachefs/journal_seq_blacklist.h
fs/bcachefs/journal_types.h
fs/bcachefs/recovery.c
fs/bcachefs/super-io.c
fs/bcachefs/super.c

index d8c487e3359253f3f77a95165056f8299121d0be..8acdc7ffeca346fdffb5bb09ab54b48407e4d6a7 100644 (file)
 #include <linux/closure.h>
 #include <linux/kobject.h>
 #include <linux/list.h>
+#include <linux/math64.h>
 #include <linux/mutex.h>
 #include <linux/percpu-refcount.h>
 #include <linux/percpu-rwsem.h>
@@ -486,6 +487,7 @@ enum {
        BCH_FS_RW,
 
        /* shutdown: */
+       BCH_FS_STOPPING,
        BCH_FS_EMERGENCY_RO,
        BCH_FS_WRITE_DISABLE_COMPLETE,
 
@@ -511,6 +513,15 @@ struct bch_fs_pcpu {
        u64                     sectors_available;
 };
 
+struct journal_seq_blacklist_table {
+       size_t                  nr;
+       struct journal_seq_blacklist_table_entry {
+               u64             start;
+               u64             end;
+               bool            dirty;
+       }                       entries[0];
+};
+
 struct bch_fs {
        struct closure          cl;
 
@@ -646,6 +657,11 @@ struct bch_fs {
 
        struct io_clock         io_clock[2];
 
+       /* JOURNAL SEQ BLACKLIST */
+       struct journal_seq_blacklist_table *
+                               journal_seq_blacklist_table;
+       struct work_struct      journal_seq_blacklist_gc_work;
+
        /* ALLOCATOR */
        spinlock_t              freelist_lock;
        struct closure_waitlist freelist_wait;
index 646910a6a4bb248fa4bbe65291fb179646aa53d9..7edc410c53910d6a31241d6e077513ccdbcf5f97 100644 (file)
@@ -909,7 +909,8 @@ struct bch_sb_field {
        x(quota,        4)      \
        x(disk_groups,  5)      \
        x(clean,        6)      \
-       x(replicas,     7)
+       x(replicas,     7)      \
+       x(journal_seq_blacklist, 8)
 
 enum bch_sb_field_type {
 #define x(f, nr)       BCH_SB_FIELD_##f = nr,
@@ -1124,6 +1125,20 @@ struct bch_sb_field_clean {
        };
 };
 
+struct journal_seq_blacklist_entry {
+       __le64                  start;
+       __le64                  end;
+};
+
+struct bch_sb_field_journal_seq_blacklist {
+       struct bch_sb_field     field;
+
+       union {
+               struct journal_seq_blacklist_entry start[0];
+               __u64           _data[0];
+       };
+};
+
 /* Superblock: */
 
 /*
@@ -1279,6 +1294,7 @@ enum bch_sb_features {
        BCH_FEATURE_ZSTD                = 2,
        BCH_FEATURE_ATOMIC_NLINK        = 3, /* should have gone under compat */
        BCH_FEATURE_EC                  = 4,
+       BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3 = 5,
        BCH_FEATURE_NR,
 };
 
index 10b3d53b6ebb336e32aaa9c50543718867c89872..fa261a175f5ee11fa29b64151b80f7034b29554a 100644 (file)
@@ -770,7 +770,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
        struct btree_node *sorted;
        struct bkey_packed *k;
        struct bset *i;
-       bool used_mempool;
+       bool used_mempool, blacklisted;
        unsigned u64s;
        int ret, retry_read = 0, write = READ;
 
@@ -844,20 +844,15 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
 
                b->written += sectors;
 
-               ret = bch2_journal_seq_should_ignore(c, le64_to_cpu(i->journal_seq), b);
-               if (ret < 0) {
-                       btree_err(BTREE_ERR_FATAL, c, b, i,
-                                 "insufficient memory");
-                       goto err;
-               }
+               blacklisted = bch2_journal_seq_is_blacklisted(c,
+                                       le64_to_cpu(i->journal_seq),
+                                       true);
 
-               if (ret) {
-                       btree_err_on(first,
-                                    BTREE_ERR_FIXABLE, c, b, i,
-                                    "first btree node bset has blacklisted journal seq");
-                       if (!first)
-                               continue;
-               }
+               btree_err_on(blacklisted && first,
+                            BTREE_ERR_FIXABLE, c, b, i,
+                            "first btree node bset has blacklisted journal seq");
+               if (blacklisted && !first)
+                       continue;
 
                bch2_btree_node_iter_large_push(iter, b,
                                           i->start,
@@ -930,7 +925,6 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct btree *b, bool have_retry
 out:
        mempool_free(iter, &c->fill_iter);
        return retry_read;
-err:
 fsck_err:
        if (ret == BTREE_RETRY_READ) {
                retry_read = 1;
index 02eb28bfe9b97142bb42af38960fbe1817106901..6b9af53a3e775faecf6ebfcb3fdbfe33499e6947 100644 (file)
@@ -1156,6 +1156,8 @@ struct btree *bch2_btree_iter_next_node(struct btree_iter *iter, unsigned depth)
        if (!btree_iter_node(iter, iter->level))
                return NULL;
 
+       bch2_trans_cond_resched(iter->trans);
+
        btree_iter_up(iter);
 
        if (!bch2_btree_node_relock(iter, iter->level))
index ada639c066198dfe7978d98e8186460d3dfc9555..af0c355f2f04e4511826c2023117cc0057a8de76 100644 (file)
@@ -4,8 +4,6 @@
 
 #include "opts.h"
 
-#include <linux/math64.h>
-
 extern const char * const bch2_inode_opts[];
 
 const char *bch2_inode_invalid(const struct bch_fs *, struct bkey_s_c);
index dbecb4072af00fa245eb7c0b800b3b621f97fec5..2e84af8a044c5050fde750cb163930e120574951 100644 (file)
@@ -988,27 +988,57 @@ void bch2_fs_journal_stop(struct journal *j)
        cancel_delayed_work_sync(&j->reclaim_work);
 }
 
-void bch2_fs_journal_start(struct journal *j)
+int bch2_fs_journal_start(struct journal *j, u64 cur_seq,
+                         struct list_head *journal_entries)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct journal_seq_blacklist *bl;
-       u64 blacklist = 0;
+       struct journal_entry_pin_list *p;
+       struct journal_replay *i;
+       u64 last_seq = cur_seq, nr, seq;
+
+       if (!list_empty(journal_entries))
+               last_seq = le64_to_cpu(list_last_entry(journal_entries,
+                                                      struct journal_replay,
+                                                      list)->j.last_seq);
+
+       nr = cur_seq - last_seq;
+
+       if (nr + 1 > j->pin.size) {
+               free_fifo(&j->pin);
+               init_fifo(&j->pin, roundup_pow_of_two(nr + 1), GFP_KERNEL);
+               if (!j->pin.data) {
+                       bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
+                       return -ENOMEM;
+               }
+       }
+
+       j->last_seq_ondisk      = last_seq;
+       j->pin.front            = last_seq;
+       j->pin.back             = cur_seq;
+       atomic64_set(&j->seq, cur_seq - 1);
+
+       fifo_for_each_entry_ptr(p, &j->pin, seq) {
+               INIT_LIST_HEAD(&p->list);
+               INIT_LIST_HEAD(&p->flushed);
+               atomic_set(&p->count, 0);
+               p->devs.nr = 0;
+       }
+
+       list_for_each_entry(i, journal_entries, list) {
+               seq = le64_to_cpu(i->j.seq);
+
+               BUG_ON(seq < last_seq || seq >= cur_seq);
 
-       list_for_each_entry(bl, &j->seq_blacklist, list)
-               blacklist = max(blacklist, bl->end);
+               p = journal_seq_pin(j, seq);
+
+               atomic_set(&p->count, 1);
+               p->devs = i->devs;
+       }
 
        spin_lock(&j->lock);
 
        set_bit(JOURNAL_STARTED, &j->flags);
 
-       while (journal_cur_seq(j) < blacklist)
-               journal_pin_new_entry(j, 0);
-
-       /*
-        * __journal_entry_close() only inits the next journal entry when it
-        * closes an open journal entry - the very first journal entry gets
-        * initialized here:
-        */
        journal_pin_new_entry(j, 1);
        bch2_journal_buf_init(j);
 
@@ -1017,12 +1047,7 @@ void bch2_fs_journal_start(struct journal *j)
        bch2_journal_space_available(j);
        spin_unlock(&j->lock);
 
-       /*
-        * Adding entries to the next journal entry before allocating space on
-        * disk for the next journal entry - this is ok, because these entries
-        * only have to go down with the next journal entry we write:
-        */
-       bch2_journal_seq_blacklist_write(j);
+       return 0;
 }
 
 /* init/exit: */
@@ -1090,8 +1115,6 @@ int bch2_fs_journal_init(struct journal *j)
        INIT_DELAYED_WORK(&j->write_work, journal_write_work);
        INIT_DELAYED_WORK(&j->reclaim_work, bch2_journal_reclaim_work);
        init_waitqueue_head(&j->pin_flush_wait);
-       mutex_init(&j->blacklist_lock);
-       INIT_LIST_HEAD(&j->seq_blacklist);
        mutex_init(&j->reclaim_lock);
        mutex_init(&j->discard_lock);
 
index 809cf25f5a03cc18341a5bf05f5f4e6108446467..3447b4ad462dd297e4236024a2f377cb63cdaeec 100644 (file)
@@ -472,8 +472,10 @@ int bch2_set_nr_journal_buckets(struct bch_fs *, struct bch_dev *,
 int bch2_dev_journal_alloc(struct bch_dev *);
 
 void bch2_dev_journal_stop(struct journal *, struct bch_dev *);
+
 void bch2_fs_journal_stop(struct journal *);
-void bch2_fs_journal_start(struct journal *);
+int bch2_fs_journal_start(struct journal *, u64, struct list_head *);
+
 void bch2_dev_journal_exit(struct bch_dev *);
 int bch2_dev_journal_init(struct bch_dev *, struct bch_sb *);
 void bch2_fs_journal_exit(struct journal *);
index 1293bb66e62cc769d2e2a07942c02331f029d857..8010b38114ac1789869105defbd7d5f0e26bc712 100644 (file)
@@ -10,7 +10,6 @@
 #include "journal.h"
 #include "journal_io.h"
 #include "journal_reclaim.h"
-#include "journal_seq_blacklist.h"
 #include "replicas.h"
 #include "trace.h"
 
@@ -655,45 +654,11 @@ void bch2_journal_entries_free(struct list_head *list)
        }
 }
 
-int bch2_journal_set_seq(struct bch_fs *c, u64 last_seq, u64 end_seq)
-{
-       struct journal *j = &c->journal;
-       struct journal_entry_pin_list *p;
-       u64 seq, nr = end_seq - last_seq + 1;
-
-       if (nr > j->pin.size) {
-               free_fifo(&j->pin);
-               init_fifo(&j->pin, roundup_pow_of_two(nr), GFP_KERNEL);
-               if (!j->pin.data) {
-                       bch_err(c, "error reallocating journal fifo (%llu open entries)", nr);
-                       return -ENOMEM;
-               }
-       }
-
-       atomic64_set(&j->seq, end_seq);
-       j->last_seq_ondisk = last_seq;
-
-       j->pin.front    = last_seq;
-       j->pin.back     = end_seq + 1;
-
-       fifo_for_each_entry_ptr(p, &j->pin, seq) {
-               INIT_LIST_HEAD(&p->list);
-               INIT_LIST_HEAD(&p->flushed);
-               atomic_set(&p->count, 0);
-               p->devs.nr = 0;
-       }
-
-       return 0;
-}
-
 int bch2_journal_read(struct bch_fs *c, struct list_head *list)
 {
-       struct journal *j = &c->journal;
        struct journal_list jlist;
        struct journal_replay *i;
-       struct journal_entry_pin_list *p;
        struct bch_dev *ca;
-       u64 cur_seq, end_seq;
        unsigned iter;
        size_t keys = 0, entries = 0;
        bool degraded = false;
@@ -725,17 +690,12 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
        if (jlist.ret)
                return jlist.ret;
 
-       if (list_empty(list)){
-               bch_err(c, "no journal entries found");
-               return BCH_FSCK_REPAIR_IMPOSSIBLE;
-       }
-
        list_for_each_entry(i, list, list) {
+               struct jset_entry *entry;
+               struct bkey_i *k, *_n;
                struct bch_replicas_padded replicas;
                char buf[80];
 
-               bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
-
                ret = jset_validate_entries(c, &i->j, READ);
                if (ret)
                        goto fsck_err;
@@ -745,6 +705,8 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
                 * the devices - this is wrong:
                 */
 
+               bch2_devlist_to_replicas(&replicas.e, BCH_DATA_JOURNAL, i->devs);
+
                if (!degraded &&
                    (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
                     fsck_err_on(!bch2_replicas_marked(c, &replicas.e, false), c,
@@ -755,68 +717,18 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
                        if (ret)
                                return ret;
                }
-       }
-
-       i = list_last_entry(list, struct journal_replay, list);
-
-       ret = bch2_journal_set_seq(c,
-                                  le64_to_cpu(i->j.last_seq),
-                                  le64_to_cpu(i->j.seq));
-       if (ret)
-               return ret;
-
-       mutex_lock(&j->blacklist_lock);
-
-       list_for_each_entry(i, list, list) {
-               p = journal_seq_pin(j, le64_to_cpu(i->j.seq));
-
-               atomic_set(&p->count, 1);
-               p->devs = i->devs;
-
-               if (bch2_journal_seq_blacklist_read(j, i)) {
-                       mutex_unlock(&j->blacklist_lock);
-                       return -ENOMEM;
-               }
-       }
-
-       mutex_unlock(&j->blacklist_lock);
-
-       cur_seq = journal_last_seq(j);
-       end_seq = le64_to_cpu(list_last_entry(list,
-                               struct journal_replay, list)->j.seq);
-
-       list_for_each_entry(i, list, list) {
-               struct jset_entry *entry;
-               struct bkey_i *k, *_n;
-               bool blacklisted;
-
-               mutex_lock(&j->blacklist_lock);
-               while (cur_seq < le64_to_cpu(i->j.seq) &&
-                      bch2_journal_seq_blacklist_find(j, cur_seq))
-                       cur_seq++;
-
-               blacklisted = bch2_journal_seq_blacklist_find(j,
-                                                        le64_to_cpu(i->j.seq));
-               mutex_unlock(&j->blacklist_lock);
-
-               fsck_err_on(blacklisted, c,
-                           "found blacklisted journal entry %llu",
-                           le64_to_cpu(i->j.seq));
-
-               fsck_err_on(le64_to_cpu(i->j.seq) != cur_seq, c,
-                       "journal entries %llu-%llu missing! (replaying %llu-%llu)",
-                       cur_seq, le64_to_cpu(i->j.seq) - 1,
-                       journal_last_seq(j), end_seq);
-
-               cur_seq = le64_to_cpu(i->j.seq) + 1;
 
                for_each_jset_key(k, _n, entry, &i->j)
                        keys++;
                entries++;
        }
 
-       bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
-                keys, entries, journal_cur_seq(j));
+       if (!list_empty(list)) {
+               i = list_last_entry(list, struct journal_replay, list);
+
+               bch_info(c, "journal read done, %zu keys in %zu entries, seq %llu",
+                        keys, entries, le64_to_cpu(i->j.seq));
+       }
 fsck_err:
        return ret;
 }
index a79c396903f0bbb5a043bf0c3559c023f0e5c2f2..4bb174839956ecb8f3b479a5294500e56c585e7c 100644 (file)
@@ -35,7 +35,6 @@ static inline struct jset_entry *__jset_entry_type_next(struct jset *jset,
        for_each_jset_entry_type(entry, jset, BCH_JSET_ENTRY_btree_keys)        \
                vstruct_for_each_safe(entry, k, _n)
 
-int bch2_journal_set_seq(struct bch_fs *c, u64, u64);
 int bch2_journal_read(struct bch_fs *, struct list_head *);
 void bch2_journal_entries_free(struct list_head *);
 int bch2_journal_replay(struct bch_fs *, struct list_head *);
index 45c8d38d12deb2155a9a2e883822295d470d147b..0df8dfccd5b5339be3f38ad07e0b8f242b240b09 100644 (file)
@@ -1,13 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 
 #include "bcachefs.h"
-#include "btree_update.h"
-#include "btree_update_interior.h"
-#include "error.h"
-#include "journal.h"
-#include "journal_io.h"
-#include "journal_reclaim.h"
+#include "btree_iter.h"
+#include "eytzinger.h"
 #include "journal_seq_blacklist.h"
+#include "super-io.h"
 
 /*
  * journal_seq_blacklist machinery:
  * record that it was blacklisted so that a) on recovery we don't think we have
  * missing journal entries and b) so that the btree code continues to ignore
  * that bset, until that btree node is rewritten.
- *
- * Blacklisted journal sequence numbers are themselves recorded as entries in
- * the journal.
  */
 
-/*
- * Called when journal needs to evict a blacklist entry to reclaim space: find
- * any btree nodes that refer to the blacklist journal sequence numbers, and
- * rewrite them:
- */
-static void journal_seq_blacklist_flush(struct journal *j,
-                                       struct journal_entry_pin *pin, u64 seq)
+static unsigned
+blacklist_nr_entries(struct bch_sb_field_journal_seq_blacklist *bl)
 {
-       struct bch_fs *c =
-               container_of(j, struct bch_fs, journal);
-       struct journal_seq_blacklist *bl =
-               container_of(pin, struct journal_seq_blacklist, pin);
-       struct blacklisted_node n;
-       struct closure cl;
-       unsigned i;
-       int ret;
+       return bl
+               ? ((vstruct_end(&bl->field) - (void *) &bl->start[0]) /
+                  sizeof(struct journal_seq_blacklist_entry))
+               : 0;
+}
 
-       closure_init_stack(&cl);
+static unsigned sb_blacklist_u64s(unsigned nr)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl;
 
-       for (i = 0;; i++) {
-               struct btree_trans trans;
-               struct btree_iter *iter;
-               struct btree *b;
+       return (sizeof(*bl) + sizeof(bl->start[0]) * nr) / sizeof(u64);
+}
 
-               bch2_trans_init(&trans, c);
+static struct bch_sb_field_journal_seq_blacklist *
+blacklist_entry_try_merge(struct bch_fs *c,
+                         struct bch_sb_field_journal_seq_blacklist *bl,
+                         unsigned i)
+{
+       unsigned nr = blacklist_nr_entries(bl);
+
+       if (le64_to_cpu(bl->start[i].end) >=
+           le64_to_cpu(bl->start[i + 1].start)) {
+               bl->start[i].end = bl->start[i + 1].end;
+               --nr;
+               memmove(&bl->start[i],
+                       &bl->start[i + 1],
+                       sizeof(bl->start[0]) * (nr - i));
+
+               bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                                                       sb_blacklist_u64s(nr));
+               BUG_ON(!bl);
+       }
 
-               mutex_lock(&j->blacklist_lock);
-               if (i >= bl->nr_entries) {
-                       mutex_unlock(&j->blacklist_lock);
-                       break;
-               }
-               n = bl->entries[i];
-               mutex_unlock(&j->blacklist_lock);
+       return bl;
+}
 
-               iter = bch2_trans_get_node_iter(&trans, n.btree_id, n.pos,
-                                               0, 0, 0);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl;
+       unsigned i, nr;
+       int ret = 0;
 
-               b = bch2_btree_iter_peek_node(iter);
+       mutex_lock(&c->sb_lock);
+       bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+       nr = blacklist_nr_entries(bl);
 
-               /* The node might have already been rewritten: */
+       if (bl) {
+               for (i = 0; i < nr; i++) {
+                       struct journal_seq_blacklist_entry *e =
+                               bl->start + i;
 
-               if (b->data->keys.seq == n.seq) {
-                       ret = bch2_btree_node_rewrite(c, iter, n.seq, 0);
-                       if (ret) {
-                               bch2_trans_exit(&trans);
-                               bch2_fs_fatal_error(c,
-                                       "error %i rewriting btree node with blacklisted journal seq",
-                                       ret);
-                               bch2_journal_halt(j);
-                               return;
+                       if (start == le64_to_cpu(e->start) &&
+                           end   == le64_to_cpu(e->end))
+                               goto out;
+
+                       if (start <= le64_to_cpu(e->start) &&
+                           end   >= le64_to_cpu(e->end)) {
+                               e->start = cpu_to_le64(start);
+                               e->end  = cpu_to_le64(end);
+
+                               if (i + 1 < nr)
+                                       bl = blacklist_entry_try_merge(c,
+                                                               bl, i);
+                               if (i)
+                                       bl = blacklist_entry_try_merge(c,
+                                                               bl, i - 1);
+                               goto out_write_sb;
                        }
                }
-
-               bch2_trans_exit(&trans);
        }
 
-       for (i = 0;; i++) {
-               struct btree_update *as;
-               struct pending_btree_node_free *d;
-
-               mutex_lock(&j->blacklist_lock);
-               if (i >= bl->nr_entries) {
-                       mutex_unlock(&j->blacklist_lock);
-                       break;
-               }
-               n = bl->entries[i];
-               mutex_unlock(&j->blacklist_lock);
-redo_wait:
-               mutex_lock(&c->btree_interior_update_lock);
-
-               /*
-                * Is the node on the list of pending interior node updates -
-                * being freed? If so, wait for that to finish:
-                */
-               for_each_pending_btree_node_free(c, as, d)
-                       if (n.seq       == d->seq &&
-                           n.btree_id  == d->btree_id &&
-                           !d->level &&
-                           !bkey_cmp(n.pos, d->key.k.p)) {
-                               closure_wait(&as->wait, &cl);
-                               mutex_unlock(&c->btree_interior_update_lock);
-                               closure_sync(&cl);
-                               goto redo_wait;
-                       }
-
-               mutex_unlock(&c->btree_interior_update_lock);
+       bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                                       sb_blacklist_u64s(nr + 1));
+       if (!bl) {
+               ret = -ENOMEM;
+               goto out;
        }
 
-       mutex_lock(&j->blacklist_lock);
+       bl->start[nr].start     = cpu_to_le64(start);
+       bl->start[nr].end       = cpu_to_le64(end);
+out_write_sb:
+       c->disk_sb.sb->features[0] |=
+               1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3;
 
-       bch2_journal_pin_drop(j, &bl->pin);
-       list_del(&bl->list);
-       kfree(bl->entries);
-       kfree(bl);
+       ret = bch2_write_super(c);
+out:
+       mutex_unlock(&c->sb_lock);
 
-       mutex_unlock(&j->blacklist_lock);
+       return ret;
 }
 
-/*
- * Determine if a particular sequence number is blacklisted - if so, return
- * blacklist entry:
- */
-struct journal_seq_blacklist *
-bch2_journal_seq_blacklist_find(struct journal *j, u64 seq)
+static int journal_seq_blacklist_table_cmp(const void *_l,
+                                          const void *_r, size_t size)
 {
-       struct journal_seq_blacklist *bl;
+       const struct journal_seq_blacklist_table_entry *l = _l;
+       const struct journal_seq_blacklist_table_entry *r = _r;
 
-       lockdep_assert_held(&j->blacklist_lock);
-
-       list_for_each_entry(bl, &j->seq_blacklist, list)
-               if (seq >= bl->start && seq <= bl->end)
-                       return bl;
-
-       return NULL;
+       return (l->start > r->start) - (l->start < r->start);
 }
 
-/*
- * Allocate a new, in memory blacklist entry:
- */
-static struct journal_seq_blacklist *
-bch2_journal_seq_blacklisted_new(struct journal *j, u64 start, u64 end)
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *c, u64 seq,
+                                    bool dirty)
 {
-       struct journal_seq_blacklist *bl;
+       struct journal_seq_blacklist_table *t = c->journal_seq_blacklist_table;
+       struct journal_seq_blacklist_table_entry search = { .start = seq };
+       int idx;
 
-       lockdep_assert_held(&j->blacklist_lock);
+       if (!t)
+               return false;
 
-       /*
-        * When we start the journal, bch2_journal_start() will skip over @seq:
-        */
+       idx = eytzinger0_find_le(t->entries, t->nr,
+                                sizeof(t->entries[0]),
+                                journal_seq_blacklist_table_cmp,
+                                &search);
+       if (idx < 0)
+               return false;
 
-       bl = kzalloc(sizeof(*bl), GFP_KERNEL);
-       if (!bl)
-               return NULL;
+       BUG_ON(t->entries[idx].start > seq);
 
-       bl->start       = start;
-       bl->end         = end;
+       if (seq >= t->entries[idx].end)
+               return false;
 
-       list_add_tail(&bl->list, &j->seq_blacklist);
-       return bl;
+       if (dirty)
+               t->entries[idx].dirty = true;
+       return true;
 }
 
-/*
- * Returns true if @seq is newer than the most recent journal entry that got
- * written, and data corresponding to @seq should be ignored - also marks @seq
- * as blacklisted so that on future restarts the corresponding data will still
- * be ignored:
- */
-int bch2_journal_seq_should_ignore(struct bch_fs *c, u64 seq, struct btree *b)
+int bch2_blacklist_table_initialize(struct bch_fs *c)
 {
-       struct journal *j = &c->journal;
-       struct journal_seq_blacklist *bl = NULL;
-       struct blacklisted_node *n;
-       u64 journal_seq;
-       int ret = 0;
-
-       if (!seq)
-               return 0;
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
+       struct journal_seq_blacklist_table *t;
+       unsigned i, nr = blacklist_nr_entries(bl);
 
-       spin_lock(&j->lock);
-       journal_seq = journal_cur_seq(j);
-       spin_unlock(&j->lock);
+       BUG_ON(c->journal_seq_blacklist_table);
 
-       /* Interier updates aren't journalled: */
-       BUG_ON(b->level);
-       BUG_ON(seq > journal_seq && test_bit(BCH_FS_INITIAL_GC_DONE, &c->flags));
+       if (!bl)
+               return 0;
 
-       /*
-        * Decrease this back to j->seq + 2 when we next rev the on disk format:
-        * increasing it temporarily to work around bug in old kernels
-        */
-       fsck_err_on(seq > journal_seq + 4, c,
-                   "bset journal seq too far in the future: %llu > %llu",
-                   seq, journal_seq);
+       t = kzalloc(sizeof(*t) + sizeof(t->entries[0]) * nr,
+                   GFP_KERNEL);
+       if (!t)
+               return -ENOMEM;
 
-       if (seq <= journal_seq &&
-           list_empty_careful(&j->seq_blacklist))
-               return 0;
+       t->nr = nr;
 
-       mutex_lock(&j->blacklist_lock);
-
-       if (seq <= journal_seq) {
-               bl = bch2_journal_seq_blacklist_find(j, seq);
-               if (!bl)
-                       goto out;
-       } else {
-               bch_verbose(c, "btree node %u:%llu:%llu has future journal sequence number %llu, blacklisting",
-                           b->btree_id, b->key.k.p.inode, b->key.k.p.offset, seq);
-
-               if (!j->new_blacklist) {
-                       j->new_blacklist = bch2_journal_seq_blacklisted_new(j,
-                                               journal_seq + 1,
-                                               journal_seq + 1);
-                       if (!j->new_blacklist) {
-                               ret = -ENOMEM;
-                               goto out;
-                       }
-               }
-               bl = j->new_blacklist;
-               bl->end = max(bl->end, seq);
+       for (i = 0; i < nr; i++) {
+               t->entries[i].start     = le64_to_cpu(bl->start[i].start);
+               t->entries[i].end       = le64_to_cpu(bl->start[i].end);
        }
 
-       for (n = bl->entries; n < bl->entries + bl->nr_entries; n++)
-               if (b->data->keys.seq   == n->seq &&
-                   b->btree_id         == n->btree_id &&
-                   !bkey_cmp(b->key.k.p, n->pos))
-                       goto found_entry;
-
-       if (!bl->nr_entries ||
-           is_power_of_2(bl->nr_entries)) {
-               n = krealloc(bl->entries,
-                            max_t(size_t, bl->nr_entries * 2, 8) * sizeof(*n),
-                            GFP_KERNEL);
-               if (!n) {
-                       ret = -ENOMEM;
-                       goto out;
-               }
-               bl->entries = n;
-       }
+       eytzinger0_sort(t->entries,
+                       t->nr,
+                       sizeof(t->entries[0]),
+                       journal_seq_blacklist_table_cmp,
+                       NULL);
 
-       bl->entries[bl->nr_entries++] = (struct blacklisted_node) {
-               .seq            = b->data->keys.seq,
-               .btree_id       = b->btree_id,
-               .pos            = b->key.k.p,
-       };
-found_entry:
-       ret = 1;
-out:
-fsck_err:
-       mutex_unlock(&j->blacklist_lock);
-       return ret;
+       c->journal_seq_blacklist_table = t;
+       return 0;
 }
 
-static int __bch2_journal_seq_blacklist_read(struct journal *j,
-                                            struct journal_replay *i,
-                                            u64 start, u64 end)
+static const char *
+bch2_sb_journal_seq_blacklist_validate(struct bch_sb *sb,
+                                      struct bch_sb_field *f)
 {
-       struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct journal_seq_blacklist *bl;
-
-       bch_verbose(c, "blacklisting existing journal seq %llu-%llu",
-                   start, end);
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               field_to_type(f, journal_seq_blacklist);
+       struct journal_seq_blacklist_entry *i;
+       unsigned nr = blacklist_nr_entries(bl);
+
+       for (i = bl->start; i < bl->start + nr; i++) {
+               if (le64_to_cpu(i->start) >=
+                   le64_to_cpu(i->end))
+                       return "entry start >= end";
+
+               if (i + 1 < bl->start + nr &&
+                   le64_to_cpu(i[0].end) >
+                   le64_to_cpu(i[1].start))
+                       return "entries out of order";
+       }
 
-       bl = bch2_journal_seq_blacklisted_new(j, start, end);
-       if (!bl)
-               return -ENOMEM;
+       return NULL;
+}
 
-       bch2_journal_pin_add(j, le64_to_cpu(i->j.seq), &bl->pin,
-                            journal_seq_blacklist_flush);
-       return 0;
+static void bch2_sb_journal_seq_blacklist_to_text(struct printbuf *out,
+                                                 struct bch_sb *sb,
+                                                 struct bch_sb_field *f)
+{
+       struct bch_sb_field_journal_seq_blacklist *bl =
+               field_to_type(f, journal_seq_blacklist);
+       struct journal_seq_blacklist_entry *i;
+       unsigned nr = blacklist_nr_entries(bl);
+
+       for (i = bl->start; i < bl->start + nr; i++) {
+               if (i != bl->start)
+                       pr_buf(out, " ");
+
+               pr_buf(out, "%llu-%llu",
+                      le64_to_cpu(i->start),
+                      le64_to_cpu(i->end));
+       }
 }
 
-/*
- * After reading the journal, find existing journal seq blacklist entries and
- * read them into memory:
- */
-int bch2_journal_seq_blacklist_read(struct journal *j,
-                                   struct journal_replay *i)
+const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist = {
+       .validate       = bch2_sb_journal_seq_blacklist_validate,
+       .to_text        = bch2_sb_journal_seq_blacklist_to_text
+};
+
+void bch2_blacklist_entries_gc(struct work_struct *work)
 {
-       struct jset_entry *entry;
-       int ret = 0;
+       struct bch_fs *c = container_of(work, struct bch_fs,
+                                       journal_seq_blacklist_gc_work);
+       struct journal_seq_blacklist_table *t;
+       struct bch_sb_field_journal_seq_blacklist *bl;
+       struct journal_seq_blacklist_entry *src, *dst;
+       struct btree_trans trans;
+       unsigned i, nr, new_nr;
+       int ret;
 
-       vstruct_for_each(&i->j, entry) {
-               switch (entry->type) {
-               case BCH_JSET_ENTRY_blacklist: {
-                       struct jset_entry_blacklist *bl_entry =
-                               container_of(entry, struct jset_entry_blacklist, entry);
+       bch2_trans_init(&trans, c);
 
-                       ret = __bch2_journal_seq_blacklist_read(j, i,
-                                       le64_to_cpu(bl_entry->seq),
-                                       le64_to_cpu(bl_entry->seq));
-                       break;
-               }
-               case BCH_JSET_ENTRY_blacklist_v2: {
-                       struct jset_entry_blacklist_v2 *bl_entry =
-                               container_of(entry, struct jset_entry_blacklist_v2, entry);
-
-                       ret = __bch2_journal_seq_blacklist_read(j, i,
-                                       le64_to_cpu(bl_entry->start),
-                                       le64_to_cpu(bl_entry->end));
-                       break;
-               }
-               }
+       for (i = 0; i < BTREE_ID_NR; i++) {
+               struct btree_iter *iter;
+               struct btree *b;
 
-               if (ret)
-                       break;
+               for_each_btree_node(&trans, iter, i, POS_MIN,
+                                   BTREE_ITER_PREFETCH, b)
+                       if (test_bit(BCH_FS_STOPPING, &c->flags)) {
+                               bch2_trans_exit(&trans);
+                               return;
+                       }
+               bch2_trans_iter_free(&trans, iter);
        }
 
-       return ret;
-}
-
-/*
- * After reading the journal and walking the btree, we might have new journal
- * sequence numbers to blacklist - add entries to the next journal entry to be
- * written:
- */
-void bch2_journal_seq_blacklist_write(struct journal *j)
-{
-       struct journal_seq_blacklist *bl = j->new_blacklist;
-       struct jset_entry_blacklist_v2 *bl_entry;
-       struct jset_entry *entry;
+       ret = bch2_trans_exit(&trans);
+       if (ret)
+               return;
 
+       mutex_lock(&c->sb_lock);
+       bl = bch2_sb_get_journal_seq_blacklist(c->disk_sb.sb);
        if (!bl)
-               return;
+               goto out;
 
-       entry = bch2_journal_add_entry_noreservation(journal_cur_buf(j),
-                       (sizeof(*bl_entry) - sizeof(*entry)) / sizeof(u64));
+       nr = blacklist_nr_entries(bl);
+       dst = bl->start;
 
-       bl_entry = container_of(entry, struct jset_entry_blacklist_v2, entry);
-       bl_entry->entry.type    = BCH_JSET_ENTRY_blacklist_v2;
-       bl_entry->start         = cpu_to_le64(bl->start);
-       bl_entry->end           = cpu_to_le64(bl->end);
+       t = c->journal_seq_blacklist_table;
+       BUG_ON(nr != t->nr);
+
+       for (src = bl->start, i = eytzinger0_first(t->nr);
+            src < bl->start + nr;
+            src++, i = eytzinger0_next(i, nr)) {
+               BUG_ON(t->entries[i].start      != le64_to_cpu(src->start));
+               BUG_ON(t->entries[i].end        != le64_to_cpu(src->end));
+
+               if (t->entries[i].dirty)
+                       *dst++ = *src;
+       }
 
-       bch2_journal_pin_add(j,
-                            journal_cur_seq(j),
-                            &bl->pin,
-                            journal_seq_blacklist_flush);
+       new_nr = dst - bl->start;
 
-       j->new_blacklist = NULL;
+       bch_info(c, "nr blacklist entries was %u, now %u", nr, new_nr);
+
+       if (new_nr != nr) {
+               bl = bch2_sb_resize_journal_seq_blacklist(&c->disk_sb,
+                               new_nr ? sb_blacklist_u64s(new_nr) : 0);
+               BUG_ON(new_nr && !bl);
+
+               if (!new_nr)
+                       c->disk_sb.sb->features[0] &=
+                               ~(1ULL << BCH_FEATURE_JOURNAL_SEQ_BLACKLIST_V3);
+
+               bch2_write_super(c);
+       }
+out:
+       mutex_unlock(&c->sb_lock);
 }
index b4a3b270e9d28f114b95c3d2009cb866d1335331..03f4b97247fd63be8226751296d411dacebdfcf9 100644 (file)
@@ -2,13 +2,12 @@
 #ifndef _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
 #define _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H
 
-struct journal_replay;
-
-struct journal_seq_blacklist *
-bch2_journal_seq_blacklist_find(struct journal *, u64);
-int bch2_journal_seq_should_ignore(struct bch_fs *, u64, struct btree *);
-int bch2_journal_seq_blacklist_read(struct journal *,
-                                   struct journal_replay *);
-void bch2_journal_seq_blacklist_write(struct journal *);
+bool bch2_journal_seq_is_blacklisted(struct bch_fs *, u64, bool);
+int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64, u64);
+int bch2_blacklist_table_initialize(struct bch_fs *);
+
+extern const struct bch_sb_field_ops bch_sb_field_ops_journal_seq_blacklist;
+
+void bch2_blacklist_entries_gc(struct work_struct *);
 
 #endif /* _BCACHEFS_JOURNAL_SEQ_BLACKLIST_H */
index 85bf5e2706f7be6ca2e9a32d007826fba78d6b65..7349b50bc5e77cc022abc2f6200c39d29db41a20 100644 (file)
@@ -54,24 +54,6 @@ struct journal_entry_pin {
        u64                             seq;
 };
 
-/* corresponds to a btree node with a blacklisted bset: */
-struct blacklisted_node {
-       __le64                  seq;
-       enum btree_id           btree_id;
-       struct bpos             pos;
-};
-
-struct journal_seq_blacklist {
-       struct list_head        list;
-       u64                     start;
-       u64                     end;
-
-       struct journal_entry_pin pin;
-
-       struct blacklisted_node *entries;
-       size_t                  nr_entries;
-};
-
 struct journal_res {
        bool                    ref;
        u8                      idx;
@@ -222,10 +204,6 @@ struct journal {
 
        u64                     replay_journal_seq;
 
-       struct mutex            blacklist_lock;
-       struct list_head        seq_blacklist;
-       struct journal_seq_blacklist *new_blacklist;
-
        struct write_point      wp;
        spinlock_t              err_lock;
 
index 67b4dda9cfeb7c692e8a7a85bb5294b0654c4382..9411a1f550f3af05a226a740b1a1ac8da4943d9d 100644 (file)
@@ -12,6 +12,7 @@
 #include "error.h"
 #include "fsck.h"
 #include "journal_io.h"
+#include "journal_seq_blacklist.h"
 #include "quota.h"
 #include "recovery.h"
 #include "replicas.h"
@@ -99,18 +100,49 @@ fsck_err:
        return ret;
 }
 
+static int
+verify_journal_entries_not_blacklisted_or_missing(struct bch_fs *c,
+                                                 struct list_head *journal)
+{
+       struct journal_replay *i =
+               list_last_entry(journal, struct journal_replay, list);
+       u64 start_seq   = le64_to_cpu(i->j.last_seq);
+       u64 end_seq     = le64_to_cpu(i->j.seq);
+       u64 seq         = start_seq;
+       int ret = 0;
+
+       list_for_each_entry(i, journal, list) {
+               fsck_err_on(seq != le64_to_cpu(i->j.seq), c,
+                       "journal entries %llu-%llu missing! (replaying %llu-%llu)",
+                       seq, le64_to_cpu(i->j.seq) - 1,
+                       start_seq, end_seq);
+
+               seq = le64_to_cpu(i->j.seq);
+
+               fsck_err_on(bch2_journal_seq_is_blacklisted(c, seq, false), c,
+                           "found blacklisted journal entry %llu", seq);
+
+               do {
+                       seq++;
+               } while (bch2_journal_seq_is_blacklisted(c, seq, false));
+       }
+fsck_err:
+       return ret;
+}
+
 static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
 {
        struct bch_sb_field_clean *clean, *sb_clean;
-
-       if (!c->sb.clean)
-               return NULL;
+       int ret;
 
        mutex_lock(&c->sb_lock);
        sb_clean = bch2_sb_get_clean(c->disk_sb.sb);
-       if (!sb_clean) {
+
+       if (fsck_err_on(!sb_clean, c,
+                       "superblock marked clean but clean section not present")) {
+               SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+               c->sb.clean = false;
                mutex_unlock(&c->sb_lock);
-               bch_err(c, "superblock marked clean but clean section not present");
                return NULL;
        }
 
@@ -128,6 +160,9 @@ static struct bch_sb_field_clean *read_superblock_clean(struct bch_fs *c)
        mutex_unlock(&c->sb_lock);
 
        return clean;
+fsck_err:
+       mutex_unlock(&c->sb_lock);
+       return ERR_PTR(ret);
 }
 
 static int journal_replay_entry_early(struct bch_fs *c,
@@ -179,14 +214,32 @@ static int journal_replay_entry_early(struct bch_fs *c,
                                              le64_to_cpu(u->v));
                break;
        }
+       case BCH_JSET_ENTRY_blacklist: {
+               struct jset_entry_blacklist *bl_entry =
+                       container_of(entry, struct jset_entry_blacklist, entry);
+
+               ret = bch2_journal_seq_blacklist_add(c,
+                               le64_to_cpu(bl_entry->seq),
+                               le64_to_cpu(bl_entry->seq) + 1);
+               break;
+       }
+       case BCH_JSET_ENTRY_blacklist_v2: {
+               struct jset_entry_blacklist_v2 *bl_entry =
+                       container_of(entry, struct jset_entry_blacklist_v2, entry);
+
+               ret = bch2_journal_seq_blacklist_add(c,
+                               le64_to_cpu(bl_entry->start),
+                               le64_to_cpu(bl_entry->end) + 1);
+               break;
+       }
        }
 
        return ret;
 }
 
-static int load_journal_metadata(struct bch_fs *c,
-                                struct bch_sb_field_clean *clean,
-                                struct list_head *journal)
+static int journal_replay_early(struct bch_fs *c,
+                               struct bch_sb_field_clean *clean,
+                               struct list_head *journal)
 {
        struct jset_entry *entry;
        int ret;
@@ -300,37 +353,76 @@ static bool journal_empty(struct list_head *journal)
 int bch2_fs_recovery(struct bch_fs *c)
 {
        const char *err = "cannot allocate memory";
-       struct bch_sb_field_clean *clean;
+       struct bch_sb_field_clean *clean = NULL;
+       u64 journal_seq;
        LIST_HEAD(journal);
        int ret;
 
-       clean = read_superblock_clean(c);
-       if (clean)
+       if (c->sb.clean)
+               clean = read_superblock_clean(c);
+       ret = PTR_ERR_OR_ZERO(clean);
+       if (ret)
+               goto err;
+
+       if (c->sb.clean)
                bch_info(c, "recovering from clean shutdown, journal seq %llu",
                         le64_to_cpu(clean->journal_seq));
 
-       if (!clean || c->opts.fsck) {
+       if (!c->replicas.entries) {
+               bch_info(c, "building replicas info");
+               set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
+       }
+
+       if (!c->sb.clean || c->opts.fsck) {
+               struct jset *j;
+
                ret = bch2_journal_read(c, &journal);
                if (ret)
                        goto err;
 
-               ret = verify_superblock_clean(c, &clean,
-                       &list_last_entry(&journal, struct journal_replay,
-                                        list)->j);
+               fsck_err_on(c->sb.clean && !journal_empty(&journal), c,
+                           "filesystem marked clean but journal not empty");
+
+               if (!c->sb.clean && list_empty(&journal)){
+                       bch_err(c, "no journal entries found");
+                       ret = BCH_FSCK_REPAIR_IMPOSSIBLE;
+                       goto err;
+               }
+
+               j = &list_last_entry(&journal, struct journal_replay, list)->j;
+
+               ret = verify_superblock_clean(c, &clean, j);
                if (ret)
                        goto err;
+
+               journal_seq = le64_to_cpu(j->seq) + 1;
        } else {
-               ret = bch2_journal_set_seq(c,
-                                          le64_to_cpu(clean->journal_seq),
-                                          le64_to_cpu(clean->journal_seq));
-               if (ret)
+               journal_seq = le64_to_cpu(clean->journal_seq) + 1;
+       }
+
+       ret = journal_replay_early(c, clean, &journal);
+       if (ret)
+               goto err;
+
+       if (!c->sb.clean) {
+               ret = bch2_journal_seq_blacklist_add(c,
+                               journal_seq,
+                               journal_seq + 4);
+               if (ret) {
+                       bch_err(c, "error creating new journal seq blacklist entry");
                        goto err;
+               }
+
+               journal_seq += 4;
        }
 
-       fsck_err_on(clean && !journal_empty(&journal), c,
-                   "filesystem marked clean but journal not empty");
+       ret = bch2_blacklist_table_initialize(c);
+
+       ret = verify_journal_entries_not_blacklisted_or_missing(c, &journal);
+       if (ret)
+               goto err;
 
-       ret = load_journal_metadata(c, clean, &journal);
+       ret = bch2_fs_journal_start(&c->journal, journal_seq, &journal);
        if (ret)
                goto err;
 
@@ -351,11 +443,6 @@ int bch2_fs_recovery(struct bch_fs *c)
 
        set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
 
-       if (!c->replicas.entries) {
-               bch_info(c, "building replicas info");
-               set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
-       }
-
        if (c->opts.fsck ||
            !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
            test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
@@ -377,13 +464,6 @@ int bch2_fs_recovery(struct bch_fs *c)
        if (c->sb.encryption_type && !c->sb.clean)
                atomic64_add(1 << 16, &c->key_version);
 
-       /*
-        * bch2_fs_journal_start() can't happen sooner, or btree_gc_finish()
-        * will give spurious errors about oldest_gen > bucket_gen -
-        * this is a hack but oh well.
-        */
-       bch2_fs_journal_start(&c->journal);
-
        if (c->opts.noreplay)
                goto out;
 
@@ -424,6 +504,10 @@ int bch2_fs_recovery(struct bch_fs *c)
                SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
        }
        mutex_unlock(&c->sb_lock);
+
+       if (c->journal_seq_blacklist_table &&
+           c->journal_seq_blacklist_table->nr > 128)
+               queue_work(system_long_wq, &c->journal_seq_blacklist_gc_work);
 out:
        bch2_journal_entries_free(&journal);
        kfree(clean);
@@ -472,7 +556,7 @@ int bch2_fs_initialize(struct bch_fs *c)
         * journal_res_get() will crash if called before this has
         * set up the journal.pin FIFO and journal.cur pointer:
         */
-       bch2_fs_journal_start(&c->journal);
+       bch2_fs_journal_start(&c->journal, 1, &journal);
        bch2_journal_set_replay_done(&c->journal);
 
        err = "error going read write";
index 9fd77e57cafe0ed6b681ec286ccb54e03942c267..7aaa8b785d5716dc489b8369cb7dd25c6b44e0cc 100644 (file)
@@ -7,6 +7,7 @@
 #include "error.h"
 #include "io.h"
 #include "journal.h"
+#include "journal_seq_blacklist.h"
 #include "replicas.h"
 #include "quota.h"
 #include "super-io.h"
index 8c31a9a67eee6811a7a6a352071df2c365c9be52..27eacb1cd14481306ae3f59f3333f567bd941b79 100644 (file)
@@ -30,6 +30,7 @@
 #include "io.h"
 #include "journal.h"
 #include "journal_reclaim.h"
+#include "journal_seq_blacklist.h"
 #include "move.h"
 #include "migrate.h"
 #include "movinggc.h"
@@ -468,6 +469,7 @@ static void bch2_fs_free(struct bch_fs *c)
        kfree(c->replicas.entries);
        kfree(c->replicas_gc.entries);
        kfree(rcu_dereference_protected(c->disk_groups, 1));
+       kfree(c->journal_seq_blacklist_table);
 
        if (c->journal_reclaim_wq)
                destroy_workqueue(c->journal_reclaim_wq);
@@ -496,6 +498,10 @@ void bch2_fs_stop(struct bch_fs *c)
 
        bch_verbose(c, "shutting down");
 
+       set_bit(BCH_FS_STOPPING, &c->flags);
+
+       cancel_work_sync(&c->journal_seq_blacklist_gc_work);
+
        for_each_member_device(ca, c, i)
                if (ca->kobj.state_in_sysfs &&
                    ca->disk_sb.bdev)
@@ -631,6 +637,9 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
        spin_lock_init(&c->btree_write_error_lock);
        INIT_WORK(&c->btree_write_error_work, bch2_btree_write_error_work);
 
+       INIT_WORK(&c->journal_seq_blacklist_gc_work,
+                 bch2_blacklist_entries_gc);
+
        INIT_LIST_HEAD(&c->fsck_errors);
        mutex_init(&c->fsck_error_lock);