bcachefs: Mark superblocks transactionally
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 22 Jan 2021 22:56:34 +0000 (17:56 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:52 +0000 (17:08 -0400)
More work towards getting rid of the in memory struct bucket: this path
adds code for marking superblock and journal buckets via the btree, and
uses it in the device add and journal resize paths.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/journal.c
fs/bcachefs/super.c

index b306eed02a6d9fdec794a69d23a88415ffc38624..206134fec32099d9e27593374e4a0e0f22502ae5 100644 (file)
@@ -323,48 +323,36 @@ err:
        return ret;
 }
 
-int bch2_dev_alloc_write(struct bch_fs *c, struct bch_dev *ca, unsigned flags)
+int bch2_alloc_write(struct bch_fs *c, unsigned flags)
 {
        struct btree_trans trans;
        struct btree_iter *iter;
-       u64 first_bucket        = ca->mi.first_bucket;
-       u64 nbuckets            = ca->mi.nbuckets;
+       struct bch_dev *ca;
+       unsigned i;
        int ret = 0;
 
        bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
 
-       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC,
-                                  POS(ca->dev_idx, first_bucket),
+       iter = bch2_trans_get_iter(&trans, BTREE_ID_ALLOC, POS_MIN,
                                   BTREE_ITER_SLOTS|BTREE_ITER_INTENT);
 
-       while (iter->pos.offset < nbuckets) {
-               bch2_trans_cond_resched(&trans);
-
-               ret = bch2_alloc_write_key(&trans, iter, flags);
-               if (ret)
-                       break;
-               bch2_btree_iter_next_slot(iter);
-       }
-
-       bch2_trans_exit(&trans);
-
-       return ret;
-}
+       for_each_member_device(ca, c, i) {
+               bch2_btree_iter_set_pos(iter,
+                       POS(ca->dev_idx, ca->mi.first_bucket));
 
-int bch2_alloc_write(struct bch_fs *c, unsigned flags)
-{
-       struct bch_dev *ca;
-       unsigned i;
-       int ret = 0;
+               while (iter->pos.offset < ca->mi.nbuckets) {
+                       bch2_trans_cond_resched(&trans);
 
-       for_each_member_device(ca, c, i) {
-               bch2_dev_alloc_write(c, ca, flags);
-               if (ret) {
-                       percpu_ref_put(&ca->io_ref);
-                       break;
+                       ret = bch2_alloc_write_key(&trans, iter, flags);
+                       if (ret) {
+                               percpu_ref_put(&ca->io_ref);
+                               goto err;
+                       }
+                       bch2_btree_iter_next_slot(iter);
                }
        }
-
+err:
+       bch2_trans_exit(&trans);
        return ret;
 }
 
index d10ff56e4de169fc5bbb63823f4a5c49cfa8fe1d..f60fcebff2cec9114b3984f96667ac67942335e2 100644 (file)
@@ -98,7 +98,6 @@ void bch2_dev_allocator_quiesce(struct bch_fs *, struct bch_dev *);
 void bch2_dev_allocator_stop(struct bch_dev *);
 int bch2_dev_allocator_start(struct bch_dev *);
 
-int bch2_dev_alloc_write(struct bch_fs *, struct bch_dev *, unsigned);
 int bch2_alloc_write(struct bch_fs *, unsigned);
 void bch2_fs_allocator_background_init(struct bch_fs *);
 
index c3d63a190154f31eba68460c238d796812b47851..1be527ab1416cdd941c3d4dbf6b7dbb46ca657eb 100644 (file)
@@ -2060,6 +2060,168 @@ int bch2_trans_mark_update(struct btree_trans *trans,
        return ret;
 }
 
+static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+                                   struct bch_dev *ca, size_t b,
+                                   enum bch_data_type type,
+                                   unsigned sectors)
+{
+       struct bch_fs *c = trans->c;
+       struct btree_iter *iter;
+       struct bkey_alloc_unpacked u;
+       struct bkey_i_alloc *a;
+       struct bch_extent_ptr ptr = {
+               .dev = ca->dev_idx,
+               .offset = bucket_to_sector(ca, b),
+       };
+       int ret = 0;
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               return ret;
+
+       ret = bch2_trans_start_alloc_update(trans, &iter, &ptr, &u);
+       if (ret)
+               return ret;
+
+       if (u.data_type && u.data_type != type) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n"
+                       "while marking %s",
+                       iter->pos.inode, iter->pos.offset, u.gen,
+                       bch2_data_types[u.data_type],
+                       bch2_data_types[type],
+                       bch2_data_types[type]);
+               ret = -EIO;
+               goto out;
+       }
+
+       if ((unsigned) (u.dirty_sectors + sectors) > ca->mi.bucket_size) {
+               bch2_fsck_err(c, FSCK_CAN_IGNORE|FSCK_NEED_FSCK,
+                       "bucket %llu:%llu gen %u data type %s sector count overflow: %u + %u > %u\n"
+                       "while marking %s",
+                       iter->pos.inode, iter->pos.offset, u.gen,
+                       bch2_data_types[u.data_type ?: type],
+                       u.dirty_sectors, sectors, ca->mi.bucket_size,
+                       bch2_data_types[type]);
+               ret = -EIO;
+               goto out;
+       }
+
+       if (u.data_type         == type &&
+           u.dirty_sectors     == sectors)
+               goto out;
+
+       u.data_type     = type;
+       u.dirty_sectors = sectors;
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, u);
+       bch2_trans_update(trans, iter, &a->k_i, 0);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+int bch2_trans_mark_metadata_bucket(struct btree_trans *trans,
+                                   struct disk_reservation *res,
+                                   struct bch_dev *ca, size_t b,
+                                   enum bch_data_type type,
+                                   unsigned sectors)
+{
+       return __bch2_trans_do(trans, res, NULL, 0,
+                       __bch2_trans_mark_metadata_bucket(trans, ca, b, BCH_DATA_journal,
+                                                       ca->mi.bucket_size));
+
+}
+
+static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans,
+                                           struct disk_reservation *res,
+                                           struct bch_dev *ca,
+                                           u64 start, u64 end,
+                                           enum bch_data_type type,
+                                           u64 *bucket, unsigned *bucket_sectors)
+{
+       int ret;
+
+       do {
+               u64 b = sector_to_bucket(ca, start);
+               unsigned sectors =
+                       min_t(u64, bucket_to_sector(ca, b + 1), end) - start;
+
+               if (b != *bucket) {
+                       if (*bucket_sectors) {
+                               ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+                                               *bucket, type, *bucket_sectors);
+                               if (ret)
+                                       return ret;
+                       }
+
+                       *bucket         = b;
+                       *bucket_sectors = 0;
+               }
+
+               *bucket_sectors += sectors;
+               start += sectors;
+       } while (!ret && start < end);
+
+       return 0;
+}
+
+static int __bch2_trans_mark_dev_sb(struct btree_trans *trans,
+                            struct disk_reservation *res,
+                            struct bch_dev *ca)
+{
+       struct bch_sb_layout *layout = &ca->disk_sb.sb->layout;
+       u64 bucket = 0;
+       unsigned i, bucket_sectors = 0;
+       int ret;
+
+       for (i = 0; i < layout->nr_superblocks; i++) {
+               u64 offset = le64_to_cpu(layout->sb_offset[i]);
+
+               if (offset == BCH_SB_SECTOR) {
+                       ret = bch2_trans_mark_metadata_sectors(trans, res, ca,
+                                               0, BCH_SB_SECTOR,
+                                               BCH_DATA_sb, &bucket, &bucket_sectors);
+                       if (ret)
+                               return ret;
+               }
+
+               ret = bch2_trans_mark_metadata_sectors(trans, res, ca, offset,
+                                     offset + (1 << layout->sb_max_size_bits),
+                                     BCH_DATA_sb, &bucket, &bucket_sectors);
+               if (ret)
+                       return ret;
+       }
+
+       if (bucket_sectors) {
+               ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+                               bucket, BCH_DATA_sb, bucket_sectors);
+               if (ret)
+                       return ret;
+       }
+
+       for (i = 0; i < ca->journal.nr; i++) {
+               ret = bch2_trans_mark_metadata_bucket(trans, res, ca,
+                               ca->journal.buckets[i],
+                               BCH_DATA_journal, ca->mi.bucket_size);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+int bch2_trans_mark_dev_sb(struct bch_fs *c,
+                          struct disk_reservation *res,
+                          struct bch_dev *ca)
+{
+       return bch2_trans_do(c, res, NULL, 0,
+                       __bch2_trans_mark_dev_sb(&trans, res, ca));
+}
+
 /* Disk reservations: */
 
 #define SECTORS_CACHE  1024
index 7eebae7c439df0682432793db98088fb08e79b8a..4103ea7e769a4c10823639ee32cc826d72c75383 100644 (file)
@@ -259,6 +259,12 @@ int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
                           struct bkey_i *insert, unsigned);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);
 
+int bch2_trans_mark_metadata_bucket(struct btree_trans *,
+                       struct disk_reservation *, struct bch_dev *,
+                       size_t, enum bch_data_type, unsigned);
+int bch2_trans_mark_dev_sb(struct bch_fs *, struct disk_reservation *,
+                          struct bch_dev *);
+
 /* disk reservations: */
 
 static inline void bch2_disk_reservation_put(struct bch_fs *c,
index ecc3629bcd4c8dcd7dcd9196b6e05a87ccaf99b5..d4c5c6306928fac10b18656ec7266aa9466dd969 100644 (file)
@@ -9,6 +9,7 @@
 #include "alloc_foreground.h"
 #include "bkey_methods.h"
 #include "btree_gc.h"
+#include "btree_update.h"
 #include "buckets.h"
 #include "journal.h"
 #include "journal_io.h"
@@ -823,18 +824,28 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
                if (pos <= ja->cur_idx)
                        ja->cur_idx = (ja->cur_idx + 1) % ja->nr;
 
-               bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
-                                         ca->mi.bucket_size,
-                                         gc_phase(GC_PHASE_SB),
-                                         0);
+               if (!c || new_fs)
+                       bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_journal,
+                                                 ca->mi.bucket_size,
+                                                 gc_phase(GC_PHASE_SB),
+                                                 0);
 
                if (c) {
                        spin_unlock(&c->journal.lock);
                        percpu_up_read(&c->mark_lock);
                }
 
+               if (c && !new_fs)
+                       ret = bch2_trans_do(c, NULL, NULL, BTREE_INSERT_NOFAIL,
+                               bch2_trans_mark_metadata_bucket(&trans, NULL, ca,
+                                               bucket, BCH_DATA_journal,
+                                               ca->mi.bucket_size));
+
                if (!new_fs)
                        bch2_open_bucket_put(c, ob);
+
+               if (ret)
+                       goto err;
        }
 err:
        bch2_sb_resize_journal(&ca->disk_sb,
index e04d68ceb55b80264c8bd1601d98092533d03d78..bdaea336be85f8ee5a04087c558fcc9ae5ff9301 100644 (file)
@@ -1220,13 +1220,6 @@ static int bch2_dev_attach_bdev(struct bch_fs *c, struct bch_sb_handle *sb)
        if (ret)
                return ret;
 
-       if (test_bit(BCH_FS_ALLOC_READ_DONE, &c->flags) &&
-           !percpu_u64_get(&ca->usage[0]->d[BCH_DATA_sb].buckets)) {
-               mutex_lock(&c->sb_lock);
-               bch2_mark_dev_superblock(ca->fs, ca, 0);
-               mutex_unlock(&c->sb_lock);
-       }
-
        bch2_dev_sysfs_online(c, ca);
 
        if (c->sb.nr_devices == 1)
@@ -1600,7 +1593,7 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
         * allocate the journal, reset all the marks, then remark after we
         * attach...
         */
-       bch2_mark_dev_superblock(ca->fs, ca, 0);
+       bch2_mark_dev_superblock(NULL, ca, 0);
 
        err = "journal alloc failed";
        ret = bch2_dev_journal_alloc(ca);
@@ -1659,15 +1652,13 @@ have_slot:
        ca->disk_sb.sb->dev_idx = dev_idx;
        bch2_dev_attach(c, ca, dev_idx);
 
-       bch2_mark_dev_superblock(c, ca, 0);
-
        bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 
-       err = "alloc write failed";
-       ret = bch2_dev_alloc_write(c, ca, 0);
+       err = "error marking superblock";
+       ret = bch2_trans_mark_dev_sb(c, NULL, ca);
        if (ret)
-               goto err;
+               goto err_late;
 
        if (ca->mi.state == BCH_MEMBER_STATE_RW) {
                err = __bch2_dev_read_write(c, ca);
@@ -1688,6 +1679,7 @@ err:
        bch_err(c, "Unable to add device: %s", err);
        return ret;
 err_late:
+       up_write(&c->state_lock);
        bch_err(c, "Error going rw after adding device: %s", err);
        return -EINVAL;
 }
@@ -1723,6 +1715,12 @@ int bch2_dev_online(struct bch_fs *c, const char *path)
        }
 
        ca = bch_dev_locked(c, dev_idx);
+
+       if (bch2_trans_mark_dev_sb(c, NULL, ca)) {
+               err = "bch2_trans_mark_dev_sb() error";
+               goto err;
+       }
+
        if (ca->mi.state == BCH_MEMBER_STATE_RW) {
                err = __bch2_dev_read_write(c, ca);
                if (err)