bcachefs: Add code to scan for/rewite old btree nodes
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 14 Mar 2021 23:01:14 +0000 (19:01 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:54 +0000 (17:08 -0400)
This adds a new data job type to scan for btree nodes in the old extent
format, and rewrite them.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs_ioctl.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.c
fs/bcachefs/move.c
fs/bcachefs/move.h
fs/bcachefs/movinggc.c
fs/bcachefs/rebalance.c

index 38c6ac96e12fbcd32ec99eac02e5c9a942fa2f97..1ef9907e07ad1ced838f641d234a234c9fbec51e 100644 (file)
@@ -171,10 +171,11 @@ struct bch_ioctl_disk_set_state {
 };
 
 enum bch_data_ops {
-       BCH_DATA_OP_SCRUB       = 0,
-       BCH_DATA_OP_REREPLICATE = 1,
-       BCH_DATA_OP_MIGRATE     = 2,
-       BCH_DATA_OP_NR          = 3,
+       BCH_DATA_OP_SCRUB               = 0,
+       BCH_DATA_OP_REREPLICATE         = 1,
+       BCH_DATA_OP_MIGRATE             = 2,
+       BCH_DATA_OP_REWRITE_OLD_NODES   = 3,
+       BCH_DATA_OP_NR                  = 4,
 };
 
 /*
@@ -187,11 +188,13 @@ enum bch_data_ops {
  * job. The file descriptor is O_CLOEXEC.
  */
 struct bch_ioctl_data {
-       __u32                   op;
+       __u16                   op;
+       __u8                    start_btree;
+       __u8                    end_btree;
        __u32                   flags;
 
-       struct bpos             start;
-       struct bpos             end;
+       struct bpos             start_pos;
+       struct bpos             end_pos;
 
        union {
        struct {
index 6e656ed6b32a90c1e247592ea74a1106c2135e90..eac51c39fc6ca3779f66bdba4178e6284c67da04 100644 (file)
@@ -920,6 +920,8 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
        unsigned u64s;
        int ret, retry_read = 0, write = READ;
 
+       b->version_ondisk = U16_MAX;
+
        iter = mempool_alloc(&c->fill_iter, GFP_NOIO);
        sort_iter_init(iter, b);
        iter->size = (btree_blocks(c) + 1) * 2;
@@ -1000,6 +1002,9 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
                        sectors = vstruct_sectors(bne, c->block_bits);
                }
 
+               b->version_ondisk = min(b->version_ondisk,
+                                       le16_to_cpu(i->version));
+
                ret = validate_bset(c, ca, b, i, sectors,
                                    READ, have_retry);
                if (ret)
index 80bb31a53339505b409ae7811c0792132191b2bb..55d8d815a04ad2d95034c9f581541273485924fe 100644 (file)
@@ -76,6 +76,7 @@ struct btree {
        u16                     written;
        u8                      nsets;
        u8                      nr_key_bits;
+       u16                     version_ondisk;
 
        struct bkey_format      format;
 
index 285365ba70122b0b41d4e49e6651490d793b3c17..989ba81207c908b483e2d53c62f2ee3716205e30 100644 (file)
@@ -286,6 +286,7 @@ static struct btree *bch2_btree_node_alloc(struct btree_update *as, unsigned lev
        bch2_bset_init_first(b, &b->data->keys);
        b->c.level      = level;
        b->c.btree_id   = as->btree_id;
+       b->version_ondisk = c->sb.version;
 
        memset(&b->nr, 0, sizeof(b->nr));
        b->data->magic = cpu_to_le64(bset_magic(c));
index 28e2125c12ed1c7efbfde7d747ce6bb8808296ef..72958b8670141ad4dfab4a8dcdea6ee955fee006 100644 (file)
@@ -531,7 +531,7 @@ static int __bch2_move_data(struct bch_fs *c,
 
        stats->data_type = BCH_DATA_user;
        stats->btree_id = btree_id;
-       stats->pos      = POS_MIN;
+       stats->pos      = start;
 
        iter = bch2_trans_get_iter(&trans, btree_id, start,
                                   BTREE_ITER_PREFETCH);
@@ -646,14 +646,15 @@ out:
 }
 
 int bch2_move_data(struct bch_fs *c,
+                  enum btree_id start_btree_id, struct bpos start_pos,
+                  enum btree_id end_btree_id,   struct bpos end_pos,
                   struct bch_ratelimit *rate,
                   struct write_point_specifier wp,
-                  struct bpos start,
-                  struct bpos end,
                   move_pred_fn pred, void *arg,
                   struct bch_move_stats *stats)
 {
        struct moving_context ctxt = { .stats = stats };
+       enum btree_id id;
        int ret;
 
        closure_init_stack(&ctxt.cl);
@@ -662,10 +663,23 @@ int bch2_move_data(struct bch_fs *c,
 
        stats->data_type = BCH_DATA_user;
 
-       ret =   __bch2_move_data(c, &ctxt, rate, wp, start, end,
-                                pred, arg, stats, BTREE_ID_EXTENTS) ?:
-               __bch2_move_data(c, &ctxt, rate, wp, start, end,
-                                pred, arg, stats, BTREE_ID_REFLINK);
+       for (id = start_btree_id;
+            id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+            id++) {
+               stats->btree_id = id;
+
+               if (id != BTREE_ID_EXTENTS &&
+                   id != BTREE_ID_REFLINK)
+                       continue;
+
+               ret = __bch2_move_data(c, &ctxt, rate, wp,
+                                      id == start_btree_id ? start_pos : POS_MIN,
+                                      id == end_btree_id   ? end_pos   : POS_MAX,
+                                      pred, arg, stats, id);
+               if (ret)
+                       break;
+       }
+
 
        move_ctxt_wait_event(&ctxt, list_empty(&ctxt.reads));
        closure_sync(&ctxt.cl);
@@ -679,16 +693,22 @@ int bch2_move_data(struct bch_fs *c,
        return ret;
 }
 
+typedef enum data_cmd (*move_btree_pred)(struct bch_fs *, void *,
+                                        struct btree *, struct bch_io_opts *,
+                                        struct data_opts *);
+
 static int bch2_move_btree(struct bch_fs *c,
-                          move_pred_fn pred,
-                          void *arg,
+                          enum btree_id start_btree_id, struct bpos start_pos,
+                          enum btree_id end_btree_id,   struct bpos end_pos,
+                          move_btree_pred pred, void *arg,
                           struct bch_move_stats *stats)
 {
+       bool kthread = (current->flags & PF_KTHREAD) != 0;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct btree_trans trans;
        struct btree_iter *iter;
        struct btree *b;
-       unsigned id;
+       enum btree_id id;
        struct data_opts data_opts;
        enum data_cmd cmd;
        int ret = 0;
@@ -697,16 +717,24 @@ static int bch2_move_btree(struct bch_fs *c,
 
        stats->data_type = BCH_DATA_btree;
 
-       for (id = 0; id < BTREE_ID_NR; id++) {
+       for (id = start_btree_id;
+            id <= min_t(unsigned, end_btree_id, BTREE_ID_NR - 1);
+            id++) {
                stats->btree_id = id;
 
-               for_each_btree_node(&trans, iter, id, POS_MIN,
+               for_each_btree_node(&trans, iter, id,
+                                   id == start_btree_id ? start_pos : POS_MIN,
                                    BTREE_ITER_PREFETCH, b) {
+                       if (kthread && kthread_should_stop())
+                               goto out;
+
+                       if ((cmp_int(id, end_btree_id) ?:
+                            bkey_cmp(b->key.k.p, end_pos)) > 0)
+                               break;
+
                        stats->pos = iter->pos;
 
-                       switch ((cmd = pred(c, arg,
-                                           bkey_i_to_s_c(&b->key),
-                                           &io_opts, &data_opts))) {
+                       switch ((cmd = pred(c, arg, b, &io_opts, &data_opts))) {
                        case DATA_SKIP:
                                goto next;
                        case DATA_SCRUB:
@@ -726,7 +754,7 @@ next:
 
                ret = bch2_trans_iter_free(&trans, iter) ?: ret;
        }
-
+out:
        bch2_trans_exit(&trans);
 
        return ret;
@@ -785,6 +813,38 @@ static enum data_cmd migrate_pred(struct bch_fs *c, void *arg,
        return DATA_REWRITE;
 }
 
+static enum data_cmd rereplicate_btree_pred(struct bch_fs *c, void *arg,
+                                           struct btree *b,
+                                           struct bch_io_opts *io_opts,
+                                           struct data_opts *data_opts)
+{
+       return rereplicate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd migrate_btree_pred(struct bch_fs *c, void *arg,
+                                       struct btree *b,
+                                       struct bch_io_opts *io_opts,
+                                       struct data_opts *data_opts)
+{
+       return migrate_pred(c, arg, bkey_i_to_s_c(&b->key), io_opts, data_opts);
+}
+
+static enum data_cmd rewrite_old_nodes_pred(struct bch_fs *c, void *arg,
+                                           struct btree *b,
+                                           struct bch_io_opts *io_opts,
+                                           struct data_opts *data_opts)
+{
+       if (b->version_ondisk != c->sb.version ||
+           btree_node_need_rewrite(b)) {
+               data_opts->target               = 0;
+               data_opts->nr_replicas          = 1;
+               data_opts->btree_insert_flags   = 0;
+               return DATA_REWRITE;
+       }
+
+       return DATA_SKIP;
+}
+
 int bch2_data_job(struct bch_fs *c,
                  struct bch_move_stats *stats,
                  struct bch_ioctl_data op)
@@ -796,17 +856,20 @@ int bch2_data_job(struct bch_fs *c,
                stats->data_type = BCH_DATA_journal;
                ret = bch2_journal_flush_device_pins(&c->journal, -1);
 
-               ret = bch2_move_btree(c, rereplicate_pred, c, stats) ?: ret;
+               ret = bch2_move_btree(c,
+                                     op.start_btree,   op.start_pos,
+                                     op.end_btree,     op.end_pos,
+                                     rereplicate_btree_pred, c, stats) ?: ret;
 
                closure_wait_event(&c->btree_interior_update_wait,
                                   !bch2_btree_interior_updates_nr_pending(c));
 
                ret = bch2_replicas_gc2(c) ?: ret;
 
-               ret = bch2_move_data(c, NULL,
-                                    writepoint_hashed((unsigned long) current),
-                                    op.start,
-                                    op.end,
+               ret = bch2_move_data(c,
+                                    op.start_btree,    op.start_pos,
+                                    op.end_btree,      op.end_pos,
+                                    NULL, writepoint_hashed((unsigned long) current),
                                     rereplicate_pred, c, stats) ?: ret;
                ret = bch2_replicas_gc2(c) ?: ret;
                break;
@@ -817,16 +880,32 @@ int bch2_data_job(struct bch_fs *c,
                stats->data_type = BCH_DATA_journal;
                ret = bch2_journal_flush_device_pins(&c->journal, op.migrate.dev);
 
-               ret = bch2_move_btree(c, migrate_pred, &op, stats) ?: ret;
+               ret = bch2_move_btree(c,
+                                     op.start_btree,   op.start_pos,
+                                     op.end_btree,     op.end_pos,
+                                     migrate_btree_pred, &op, stats) ?: ret;
                ret = bch2_replicas_gc2(c) ?: ret;
 
-               ret = bch2_move_data(c, NULL,
-                                    writepoint_hashed((unsigned long) current),
-                                    op.start,
-                                    op.end,
+               ret = bch2_move_data(c,
+                                    op.start_btree,    op.start_pos,
+                                    op.end_btree,      op.end_pos,
+                                    NULL, writepoint_hashed((unsigned long) current),
                                     migrate_pred, &op, stats) ?: ret;
                ret = bch2_replicas_gc2(c) ?: ret;
                break;
+       case BCH_DATA_OP_REWRITE_OLD_NODES:
+               ret = bch2_move_btree(c,
+                                     op.start_btree,   op.start_pos,
+                                     op.end_btree,     op.end_pos,
+                                     rewrite_old_nodes_pred, &op, stats) ?: ret;
+
+               if (!ret) {
+                       mutex_lock(&c->sb_lock);
+                       c->disk_sb.sb->version_min = c->disk_sb.sb->version;
+                       bch2_write_super(c);
+                       mutex_unlock(&c->sb_lock);
+               }
+               break;
        default:
                ret = -EINVAL;
        }
index b04bc669226de68582c59bdd8cb13ac035b219b2..403ca695c8751306fac786e493ab8626042aa0ea 100644 (file)
@@ -52,9 +52,11 @@ typedef enum data_cmd (*move_pred_fn)(struct bch_fs *, void *,
                                struct bkey_s_c,
                                struct bch_io_opts *, struct data_opts *);
 
-int bch2_move_data(struct bch_fs *, struct bch_ratelimit *,
+int bch2_move_data(struct bch_fs *,
+                  enum btree_id, struct bpos,
+                  enum btree_id, struct bpos,
+                  struct bch_ratelimit *,
                   struct write_point_specifier,
-                  struct bpos, struct bpos,
                   move_pred_fn, void *,
                   struct bch_move_stats *);
 
index b61bbc18a0aae324348bd524ba8838fe0f266e78..65a8cd14ee75dd5d4a7c0495af379af550fb53c8 100644 (file)
@@ -219,9 +219,11 @@ static int bch2_copygc(struct bch_fs *c)
                        sizeof(h->data[0]),
                        bucket_offset_cmp, NULL);
 
-       ret = bch2_move_data(c, &c->copygc_pd.rate,
+       ret = bch2_move_data(c,
+                            0,                 POS_MIN,
+                            BTREE_ID_NR,       POS_MAX,
+                            &c->copygc_pd.rate,
                             writepoint_ptr(&c->copygc_write_point),
-                            POS_MIN, POS_MAX,
                             copygc_pred, NULL,
                             &move_stats);
 
index c75411af4622fbec8bacad9a158c7949573a4800..c83c12dbb0d2a02f0b91da5deeb90b8701759f42 100644 (file)
@@ -239,10 +239,11 @@ static int bch2_rebalance_thread(void *arg)
                rebalance_work_reset(c);
 
                bch2_move_data(c,
+                              0,               POS_MIN,
+                              BTREE_ID_NR,     POS_MAX,
                               /* ratelimiting disabled for now */
                               NULL, /*  &r->pd.rate, */
                               writepoint_ptr(&c->rebalance_write_point),
-                              POS_MIN, POS_MAX,
                               rebalance_pred, NULL,
                               &r->move_stats);
        }