bcachefs: Persist alloc info on clean shutdown

author Kent Overstreet <kent.overstreet@gmail.com>

Mon, 19 Nov 2018 06:31:41 +0000 (01:31 -0500)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sun, 22 Oct 2023 21:08:14 +0000 (17:08 -0400)
author Kent Overstreet <kent.overstreet@gmail.com>
Mon, 19 Nov 2018 06:31:41 +0000 (01:31 -0500)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:14 +0000 (17:08 -0400)
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c

index b79d5b05962159ac4cba1fa39dde55c7e9911ea5..686287d12d14c92c914459df64cc02b29c651d3b 100644 (file)
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -250,6 +250,9 @@ int bch2_alloc_read(struct bch_fs *c, struct list_head *journal_replay_list)
                                 bch2_alloc_read_key(c, bkey_i_to_s_c(k));
         }
  
+       for_each_member_device(ca, c, i)
+               bch2_dev_usage_from_buckets(c, ca);
+
         mutex_lock(&c->bucket_clock[READ].lock);
         for_each_member_device(ca, c, i) {
                 down_read(&ca->bucket_lock);
@@ -281,35 +284,51 @@ static int __bch2_alloc_write_key(struct bch_fs *c, struct bch_dev *ca,
  #endif
         struct bkey_i_alloc *a = bkey_alloc_init(&alloc_key.k);
         struct bucket *g;
-       struct bucket_mark m;
+       struct bucket_mark m, new;
         int ret;
  
         BUG_ON(BKEY_ALLOC_VAL_U64s_MAX > 8);
  
         a->k.p = POS(ca->dev_idx, b);
  
+       bch2_btree_iter_set_pos(iter, a->k.p);
+
+       ret = bch2_btree_iter_traverse(iter);
+       if (ret)
+               return ret;
+
         percpu_down_read(&c->mark_lock);
         g = bucket(ca, b);
-       m = bucket_cmpxchg(g, m, m.dirty = false);
+       m = READ_ONCE(g->mark);
+
+       if (!m.dirty) {
+               percpu_up_read(&c->mark_lock);
+               return 0;
+       }
  
         __alloc_write_key(a, g, m);
         percpu_up_read(&c->mark_lock);
  
         bch2_btree_iter_cond_resched(iter);
  
-       bch2_btree_iter_set_pos(iter, a->k.p);
-
         ret = bch2_btree_insert_at(c, NULL, journal_seq,
+                                  BTREE_INSERT_NOCHECK_RW|
                                    BTREE_INSERT_NOFAIL|
                                    BTREE_INSERT_USE_RESERVE|
                                    BTREE_INSERT_USE_ALLOC_RESERVE|
                                    flags,
                                    BTREE_INSERT_ENTRY(iter, &a->k_i));
+       if (ret)
+               return ret;
  
-       if (!ret && ca->buckets_written)
+       new = m;
+       new.dirty = false;
+       atomic64_cmpxchg(&g->_mark.v, m.v.counter, new.v.counter);
+
+       if (ca->buckets_written)
                 set_bit(b, ca->buckets_written);
  
-       return ret;
+       return 0;
  }
  
  int bch2_alloc_replay_key(struct bch_fs *c, struct bkey_i *k)
@@ -899,10 +918,19 @@ static int push_invalidated_bucket(struct bch_fs *c, struct bch_dev *ca, size_t
                 for (i = 0; i < RESERVE_NR; i++)
                         if (fifo_push(&ca->free[i], bucket)) {
                                 fifo_pop(&ca->free_inc, bucket);
+
                                 closure_wake_up(&c->freelist_wait);
+                               ca->allocator_blocked_full = false;
+
                                 spin_unlock(&c->freelist_lock);
                                 goto out;
                         }
+
+               if (!ca->allocator_blocked_full) {
+                       ca->allocator_blocked_full = true;
+                       closure_wake_up(&c->freelist_wait);
+               }
+
                 spin_unlock(&c->freelist_lock);
  
                 if ((current->flags & PF_KTHREAD) &&
@@ -1227,6 +1255,11 @@ void bch2_dev_allocator_add(struct bch_fs *c, struct bch_dev *ca)
                         set_bit(ca->dev_idx, c->rw_devs[i].d);
  }
  
+void bch2_dev_allocator_quiesce(struct bch_fs *c, struct bch_dev *ca)
+{
+       closure_wait_event(&c->freelist_wait, ca->allocator_blocked_full);
+}
+
  /* stop allocator thread: */
  void bch2_dev_allocator_stop(struct bch_dev *ca)
  {
diff --git a/fs/bcachefs/alloc_background.h b/fs/bcachefs/alloc_background.h

index ef5ec659b05dc53183f3b6bee28b7962e73e9826..04f1e9152494a0f19ee2a301938a61ad8f0cd914 100644 (file)
--- a/fs/bcachefs/alloc_background.h
+++ b/fs/bcachefs/alloc_background.h
@@ -52,6 +52,7 @@ void bch2_recalc_capacity(struct bch_fs *);
  void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
  void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
  
+void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
  void bch2_dev_allocator_stop(struct bch_dev *);
  int bch2_dev_allocator_start(struct bch_dev *);
  
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h

index 17eb0dd657a8724dfafd04ed68d51b4e14977b51..2d67c9911fbba72f7609e022df2a389f344520c9 100644 (file)
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -431,7 +431,13 @@ struct bch_dev {
  
         size_t                  inc_gen_needs_gc;
         size_t                  inc_gen_really_needs_gc;
+
+       /*
+        * XXX: this should be an enum for allocator state, so as to include
+        * error state
+        */
         bool                    allocator_blocked;
+       bool                    allocator_blocked_full;
  
         alloc_heap              alloc_heap;
  
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h

index 824fb0d1b7f0eb925aaeda91e2e8c25302f4e7d6..9bcab29bd03340ef4d88493d1ea04bdddb9325fa 100644 (file)
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -78,6 +78,7 @@ enum {
         __BTREE_INSERT_ATOMIC,
         __BTREE_INSERT_NOUNLOCK,
         __BTREE_INSERT_NOFAIL,
+       __BTREE_INSERT_NOCHECK_RW,
         __BTREE_INSERT_USE_RESERVE,
         __BTREE_INSERT_USE_ALLOC_RESERVE,
         __BTREE_INSERT_JOURNAL_REPLAY,
@@ -101,6 +102,8 @@ enum {
  /* Don't check for -ENOSPC: */
  #define BTREE_INSERT_NOFAIL            (1 << __BTREE_INSERT_NOFAIL)
  
+#define BTREE_INSERT_NOCHECK_RW                (1 << __BTREE_INSERT_NOCHECK_RW)
+
  /* for copygc, or when merging btree nodes */
  #define BTREE_INSERT_USE_RESERVE       (1 << __BTREE_INSERT_USE_RESERVE)
  #define BTREE_INSERT_USE_ALLOC_RESERVE (1 << __BTREE_INSERT_USE_ALLOC_RESERVE)
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c

index d55778696bcdd85f268d2ac90ffa5f884bc7b2a2..4bc7be9b5298113cd476093ca7828f49fa113b55 100644 (file)
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -1172,6 +1172,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
  
         mutex_lock(&c->btree_interior_update_lock);
         percpu_down_read(&c->mark_lock);
+       preempt_disable();
         fs_usage = bch2_fs_usage_get_scratch(c);
  
         bch2_mark_key_locked(c, bkey_i_to_s_c(insert),
@@ -1194,6 +1195,7 @@ static void bch2_insert_fixup_btree_ptr(struct btree_update *as, struct btree *b
         bch2_fs_usage_apply(c, fs_usage, &as->reserve->disk_res,
                             gc_pos_btree_node(b));
  
+       preempt_enable();
         percpu_up_read(&c->mark_lock);
         mutex_unlock(&c->btree_interior_update_lock);
  
diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c

index 12fd7fba3e9ae8b61b7e8ebacb40416842da4a58..e052a3debadb4961f7744c157ab1dac93cb53ad7 100644 (file)
--- a/fs/bcachefs/btree_update_leaf.c
+++ b/fs/bcachefs/btree_update_leaf.c
@@ -629,7 +629,8 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
         trans_for_each_entry(trans, i)
                 btree_insert_entry_checks(c, i);
  
-       if (unlikely(!percpu_ref_tryget(&c->writes)))
+       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) &&
+                    !percpu_ref_tryget(&c->writes)))
                 return -EROFS;
  retry:
         trans_for_each_iter(trans, i) {
@@ -659,7 +660,8 @@ retry:
         trans_for_each_iter(trans, i)
                 bch2_btree_iter_downgrade(i->iter);
  out:
-       percpu_ref_put(&c->writes);
+       if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW)))
+               percpu_ref_put(&c->writes);
  
         /* make sure we didn't drop or screw up locks: */
         trans_for_each_iter(trans, i) {
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c

index cbebc712a1da42cc2df078aa5d546cc23b77f55d..3e92a1f6d7fc212f1d07b2fb0696a5921d12ad49 100644 (file)
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -387,7 +387,8 @@ static void __bch2_invalidate_bucket(struct bch_fs *c, struct bch_dev *ca,
         *old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
                 BUG_ON(!is_available_bucket(new));
  
-               new.owned_by_allocator  = 1;
+               new.owned_by_allocator  = true;
+               new.dirty               = true;
                 new.data_type           = 0;
                 new.cached_sectors      = 0;
                 new.dirty_sectors       = 0;
@@ -460,6 +461,7 @@ static void __bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                type != BCH_DATA_JOURNAL);
  
         bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+               new.dirty       = true;
                 new.data_type   = type;
                 checked_add(new.dirty_sectors, sectors);
         }));
@@ -487,13 +489,14 @@ void bch2_mark_metadata_bucket(struct bch_fs *c, struct bch_dev *ca,
                                                     true);
         } else {
                 struct bucket *g;
-               struct bucket_mark old, new;
+               struct bucket_mark new;
  
                 rcu_read_lock();
  
                 g = bucket(ca, b);
-               old = bucket_cmpxchg(g, new, ({
-                       new.data_type = type;
+               bucket_cmpxchg(g, new, ({
+                       new.dirty       = true;
+                       new.data_type   = type;
                         checked_add(new.dirty_sectors, sectors);
                 }));
  
@@ -546,6 +549,8 @@ static void bch2_mark_pointer(struct bch_fs *c,
         do {
                 new.v.counter = old.v.counter = v;
  
+               new.dirty = true;
+
                 /*
                  * Check this after reading bucket mark to guard against
                  * the allocator invalidating a bucket after we've already
@@ -709,6 +714,7 @@ static void bucket_set_stripe(struct bch_fs *c,
                 BUG_ON(ptr_stale(ca, ptr));
  
                 old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+                       new.dirty                       = true;
                         new.stripe                      = enabled;
                         if (journal_seq) {
                                 new.journal_seq_valid   = 1;
diff --git a/fs/bcachefs/buckets.h b/fs/bcachefs/buckets.h

index 107cb48e3929d84b1c548c767078c60646830e25..ee8c9e9a1f23c80074dd261c1adaa598c30c9691 100644 (file)
--- a/fs/bcachefs/buckets.h
+++ b/fs/bcachefs/buckets.h
@@ -182,6 +182,8 @@ static inline bool bucket_needs_journal_commit(struct bucket_mark m,
  
  struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *, struct bch_dev *);
  
+void bch2_dev_usage_from_buckets(struct bch_fs *, struct bch_dev *);
+
  static inline u64 __dev_buckets_available(struct bch_dev *ca,
                                           struct bch_dev_usage stats)
  {
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c

index 9a862b19ce2265582cc49cc99a4d744836035bd3..0ad62429405296ac0edb3236fc1f598ad3a17f63 100644 (file)
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -174,7 +174,9 @@ struct bch_fs *bch2_uuid_to_fs(__uuid_t uuid)
  static void __bch2_fs_read_only(struct bch_fs *c)
  {
         struct bch_dev *ca;
+       bool wrote;
         unsigned i;
+       int ret;
  
         bch2_rebalance_stop(c);
  
@@ -189,23 +191,36 @@ static void __bch2_fs_read_only(struct bch_fs *c)
          */
         bch2_journal_flush_all_pins(&c->journal);
  
-       for_each_member_device(ca, c, i)
-               bch2_dev_allocator_stop(ca);
+       do {
+               ret = bch2_alloc_write(c, false, &wrote);
+               if (ret) {
+                       bch2_fs_inconsistent(c, "error writing out alloc info %i", ret);
+                       break;
+               }
  
-       bch2_journal_flush_all_pins(&c->journal);
+               for_each_member_device(ca, c, i)
+                       bch2_dev_allocator_quiesce(c, ca);
  
-       /*
-        * We need to explicitly wait on btree interior updates to complete
-        * before stopping the journal, flushing all journal pins isn't
-        * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
-        * interior updates have to drop their journal pin before they're
-        * fully complete:
-        */
-       closure_wait_event(&c->btree_interior_update_wait,
-                          !bch2_btree_interior_updates_nr_pending(c));
+               bch2_journal_flush_all_pins(&c->journal);
+
+               /*
+                * We need to explicitly wait on btree interior updates to complete
+                * before stopping the journal, flushing all journal pins isn't
+                * sufficient, because in the BTREE_INTERIOR_UPDATING_ROOT case btree
+                * interior updates have to drop their journal pin before they're
+                * fully complete:
+                */
+               closure_wait_event(&c->btree_interior_update_wait,
+                                  !bch2_btree_interior_updates_nr_pending(c));
+       } while (wrote);
+
+       for_each_member_device(ca, c, i)
+               bch2_dev_allocator_stop(ca);
  
         bch2_fs_journal_stop(&c->journal);
  
+       /* XXX: mark super that alloc info is persistent */
+
         /*
          * the journal kicks off btree writes via reclaim - wait for in flight
          * writes after stopping journal:
author	Kent Overstreet <kent.overstreet@gmail.com>
	Mon, 19 Nov 2018 06:31:41 +0000 (01:31 -0500)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sun, 22 Oct 2023 21:08:14 +0000 (17:08 -0400)
fs/bcachefs/alloc_background.c		patch \| blob \| history
fs/bcachefs/alloc_background.h		patch \| blob \| history
fs/bcachefs/bcachefs.h		patch \| blob \| history
fs/bcachefs/btree_update.h		patch \| blob \| history
fs/bcachefs/btree_update_interior.c		patch \| blob \| history
fs/bcachefs/btree_update_leaf.c		patch \| blob \| history
fs/bcachefs/buckets.c		patch \| blob \| history
fs/bcachefs/buckets.h		patch \| blob \| history
fs/bcachefs/super.c		patch \| blob \| history