From: Kent Overstreet Date: Thu, 28 May 2020 20:06:13 +0000 (-0400) Subject: bcachefs: Fixes for going RO X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=039fc4c5221f7433d8383e25a7c70b30793b4916;p=linux.git bcachefs: Fixes for going RO Now that interior btree updates are fully transactional, we don't need to write out alloc info in a loop. However, interior btree updates do put more things in the journal, so we still need a loop in the RO sequence. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 38173f662d1ef..09a719b256b3d 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -869,6 +869,15 @@ static int bch2_invalidate_one_bucket2(struct btree_trans *trans, if (!invalidating_cached_data) goto out; + /* + * If the read-only path is trying to shut down, we can't be generating + * new btree updates: + */ + if (test_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags)) { + ret = 1; + goto out; + } + BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8); bch2_btree_iter_set_pos(iter, POS(ca->dev_idx, b)); @@ -956,7 +965,7 @@ out: percpu_up_read(&c->mark_lock); } - return ret; + return ret < 0 ? ret : 0; } static bool bch2_invalidate_one_bucket(struct bch_fs *c, struct bch_dev *ca, diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h index e12946d686dde..a900725088196 100644 --- a/fs/bcachefs/bcachefs.h +++ b/fs/bcachefs/bcachefs.h @@ -482,6 +482,7 @@ enum { BCH_FS_ALLOC_CLEAN, BCH_FS_ALLOCATOR_STARTED, BCH_FS_ALLOCATOR_RUNNING, + BCH_FS_ALLOCATOR_STOPPING, BCH_FS_INITIAL_GC_DONE, BCH_FS_FSCK_DONE, BCH_FS_STARTED, diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 6cb37045cf685..556f12602fcfa 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -413,10 +413,12 @@ journal_get_next_pin(struct journal *j, u64 max_seq, u64 *seq) return ret; } -static void journal_flush_pins(struct journal *j, u64 seq_to_flush, +/* returns true if we did work */ +static bool journal_flush_pins(struct journal *j, u64 seq_to_flush, unsigned min_nr) { struct journal_entry_pin *pin; + bool ret = false; u64 seq; lockdep_assert_held(&j->reclaim_lock); @@ -431,7 +433,10 @@ static void journal_flush_pins(struct journal *j, u64 seq_to_flush, BUG_ON(j->flush_in_progress != pin); j->flush_in_progress = NULL; wake_up(&j->pin_flush_wait); + ret = true; } + + return ret; } /** @@ -523,7 +528,8 @@ void bch2_journal_reclaim_work(struct work_struct *work) mutex_unlock(&j->reclaim_lock); } -static int journal_flush_done(struct journal *j, u64 seq_to_flush) +static int journal_flush_done(struct journal *j, u64 seq_to_flush, + bool *did_work) { int ret; @@ -533,7 +539,7 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush) mutex_lock(&j->reclaim_lock); - journal_flush_pins(j, seq_to_flush, 0); + *did_work = journal_flush_pins(j, seq_to_flush, 0); spin_lock(&j->lock); /* @@ -551,12 +557,17 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush) return ret; } -void bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) +bool bch2_journal_flush_pins(struct journal *j, u64 seq_to_flush) { + bool did_work = false; + if (!test_bit(JOURNAL_STARTED, &j->flags)) - return; + return false; + + closure_wait_event(&j->async_wait, + journal_flush_done(j, seq_to_flush, &did_work)); - closure_wait_event(&j->async_wait, journal_flush_done(j, seq_to_flush)); + return did_work; } int bch2_journal_flush_device_pins(struct journal *j, int dev_idx) diff --git a/fs/bcachefs/journal_reclaim.h b/fs/bcachefs/journal_reclaim.h index 3ef641f7ce303..272ba8a37967c 100644 --- a/fs/bcachefs/journal_reclaim.h +++ b/fs/bcachefs/journal_reclaim.h @@ -53,11 +53,11 @@ void bch2_journal_do_discards(struct journal *); void bch2_journal_reclaim(struct journal *); void bch2_journal_reclaim_work(struct work_struct *); -void bch2_journal_flush_pins(struct journal *, u64); +bool bch2_journal_flush_pins(struct journal *, u64); -static inline void bch2_journal_flush_all_pins(struct journal *j) +static inline bool bch2_journal_flush_all_pins(struct journal *j) { - bch2_journal_flush_pins(j, U64_MAX); + return bch2_journal_flush_pins(j, U64_MAX); } int bch2_journal_flush_device_pins(struct journal *, int); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 3cf75ac1b8047..9da64d9d52e57 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -175,7 +175,7 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid) static void __bch2_fs_read_only(struct bch_fs *c) { struct bch_dev *ca; - bool wrote; + bool wrote = false; unsigned i, clean_passes = 0; int ret; @@ -200,39 +200,46 @@ static void __bch2_fs_read_only(struct bch_fs *c) goto nowrote_alloc; bch_verbose(c, "writing alloc info"); + /* + * This should normally just be writing the bucket read/write clocks: + */ + ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: + bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); + bch_verbose(c, "writing alloc info complete"); - do { - wrote = false; + if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) + bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); - ret = bch2_stripes_write(c, BTREE_INSERT_NOCHECK_RW, &wrote) ?: - bch2_alloc_write(c, BTREE_INSERT_NOCHECK_RW, &wrote); + if (ret) + goto nowrote_alloc; - if (ret && !test_bit(BCH_FS_EMERGENCY_RO, &c->flags)) - bch2_fs_inconsistent(c, "error writing out alloc info %i", ret); + bch_verbose(c, "flushing journal and stopping allocators"); - if (ret) - goto nowrote_alloc; + bch2_journal_flush_all_pins(&c->journal); + set_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags); - for_each_member_device(ca, c, i) - bch2_dev_allocator_quiesce(c, ca); + do { + clean_passes++; - bch2_journal_flush_all_pins(&c->journal); + if (bch2_journal_flush_all_pins(&c->journal)) + clean_passes = 0; /* - * We need to explicitly wait on btree interior updates to complete - * before stopping the journal, flushing all journal pins isn't - * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree - * interior updates have to drop their journal pin before they're - * fully complete: + * In flight interior btree updates will generate more journal + * updates and btree updates (alloc btree): */ - closure_wait_event(&c->btree_interior_update_wait, - !bch2_btree_interior_updates_nr_pending(c)); + if (bch2_btree_interior_updates_nr_pending(c)) { + closure_wait_event(&c->btree_interior_update_wait, + !bch2_btree_interior_updates_nr_pending(c)); + clean_passes = 0; + } flush_work(&c->btree_interior_update_work); - clean_passes = wrote ? 0 : clean_passes + 1; + if (bch2_journal_flush_all_pins(&c->journal)) + clean_passes = 0; } while (clean_passes < 2); + bch_verbose(c, "flushing journal and stopping allocators complete"); - bch_verbose(c, "writing alloc info complete"); set_bit(BCH_FS_ALLOC_CLEAN, &c->flags); nowrote_alloc: closure_wait_event(&c->btree_interior_update_wait, @@ -243,11 +250,10 @@ nowrote_alloc: bch2_dev_allocator_stop(ca); clear_bit(BCH_FS_ALLOCATOR_RUNNING, &c->flags); + clear_bit(BCH_FS_ALLOCATOR_STOPPING, &c->flags); bch2_fs_journal_stop(&c->journal); - /* XXX: mark super that alloc info is persistent */ - /* * the journal kicks off btree writes via reclaim - wait for in flight * writes after stopping journal: