bcachefs: Improve io option handling in data move path
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 21 Oct 2023 19:03:05 +0000 (15:03 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 31 Oct 2023 16:18:37 +0000 (12:18 -0400)
The data move path now correctly picks IO options when inodes in
different snapshots have different options applied.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/move.c
fs/bcachefs/move.h

index 82f60c7883ba6b08396e763c4eeaea0328d07b44..38b076ff190631877b8ea86a1176c3afda151de1 100644 (file)
@@ -20,6 +20,7 @@
 #include "keylist.h"
 #include "move.h"
 #include "replicas.h"
+#include "snapshot.h"
 #include "super-io.h"
 #include "trace.h"
 
@@ -413,35 +414,87 @@ err:
        return ret;
 }
 
-static int lookup_inode(struct btree_trans *trans, struct bpos pos,
-                       struct bch_inode_unpacked *inode)
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *trans,
+                         struct per_snapshot_io_opts *io_opts,
+                         struct bkey_s_c extent_k)
+{
+       struct bch_fs *c = trans->c;
+       u32 restart_count = trans->restart_count;
+       int ret = 0;
+
+       if (io_opts->cur_inum != extent_k.k->p.inode) {
+               struct btree_iter iter;
+               struct bkey_s_c k;
+
+               io_opts->d.nr = 0;
+
+               for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, extent_k.k->p.inode),
+                                  BTREE_ITER_ALL_SNAPSHOTS, k, ret) {
+                       if (k.k->p.offset != extent_k.k->p.inode)
+                               break;
+
+                       if (!bkey_is_inode(k.k))
+                               continue;
+
+                       struct bch_inode_unpacked inode;
+                       BUG_ON(bch2_inode_unpack(k, &inode));
+
+                       struct snapshot_io_opts_entry e = { .snapshot = k.k->p.snapshot };
+                       bch2_inode_opts_get(&e.io_opts, trans->c, &inode);
+
+                       ret = darray_push(&io_opts->d, e);
+                       if (ret)
+                               break;
+               }
+               bch2_trans_iter_exit(trans, &iter);
+               io_opts->cur_inum = extent_k.k->p.inode;
+       }
+
+       ret = ret ?: trans_was_restarted(trans, restart_count);
+       if (ret)
+               return ERR_PTR(ret);
+
+       if (extent_k.k->p.snapshot) {
+               struct snapshot_io_opts_entry *i;
+               darray_for_each(io_opts->d, i)
+                       if (bch2_snapshot_is_ancestor(c, extent_k.k->p.snapshot, i->snapshot))
+                               return &i->io_opts;
+       }
+
+       return &io_opts->fs_io_opts;
+}
+
+static int bch2_move_get_io_opts_one(struct btree_trans *trans,
+                                    struct bch_io_opts *io_opts,
+                                    struct bkey_s_c extent_k)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
 
-       bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, pos,
-                            BTREE_ITER_ALL_SNAPSHOTS);
-       k = bch2_btree_iter_peek(&iter);
+       /* reflink btree? */
+       if (!extent_k.k->p.inode) {
+               *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
+               return 0;
+       }
+
+       k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
+                              SPOS(0, extent_k.k->p.inode, extent_k.k->p.snapshot),
+                              BTREE_ITER_CACHED);
        ret = bkey_err(k);
-       if (ret)
-               goto err;
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               return ret;
 
-       if (!k.k || !bkey_eq(k.k->p, pos)) {
-               ret = -BCH_ERR_ENOENT_inode;
-               goto err;
+       if (!ret && bkey_is_inode(k.k)) {
+               struct bch_inode_unpacked inode;
+               bch2_inode_unpack(k, &inode);
+               bch2_inode_opts_get(io_opts, trans->c, &inode);
+       } else {
+               *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
        }
 
-       ret = bkey_is_inode(k.k) ? 0 : -EIO;
-       if (ret)
-               goto err;
-
-       ret = bch2_inode_unpack(k, inode);
-       if (ret)
-               goto err;
-err:
        bch2_trans_iter_exit(trans, &iter);
-       return ret;
+       return 0;
 }
 
 static int move_ratelimit(struct btree_trans *trans,
@@ -492,30 +545,6 @@ static int move_ratelimit(struct btree_trans *trans,
        return 0;
 }
 
-static int move_get_io_opts(struct btree_trans *trans,
-                           struct bch_io_opts *io_opts,
-                           struct bkey_s_c k, u64 *cur_inum)
-{
-       struct bch_inode_unpacked inode;
-       int ret;
-
-       if (*cur_inum == k.k->p.inode)
-               return 0;
-
-       ret = lookup_inode(trans,
-                          SPOS(0, k.k->p.inode, k.k->p.snapshot),
-                          &inode);
-       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
-               return ret;
-
-       if (!ret)
-               bch2_inode_opts_get(io_opts, trans->c, &inode);
-       else
-               *io_opts = bch2_opts_to_inode_opts(trans->c->opts);
-       *cur_inum = k.k->p.inode;
-       return 0;
-}
-
 static int __bch2_move_data(struct moving_context *ctxt,
                            struct bpos start,
                            struct bpos end,
@@ -523,15 +552,16 @@ static int __bch2_move_data(struct moving_context *ctxt,
                            enum btree_id btree_id)
 {
        struct bch_fs *c = ctxt->c;
-       struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
+       struct per_snapshot_io_opts snapshot_io_opts;
+       struct bch_io_opts *io_opts;
        struct bkey_buf sk;
        struct btree_trans *trans = bch2_trans_get(c);
        struct btree_iter iter;
        struct bkey_s_c k;
        struct data_update_opts data_opts;
-       u64 cur_inum = U64_MAX;
        int ret = 0, ret2;
 
+       per_snapshot_io_opts_init(&snapshot_io_opts, c);
        bch2_bkey_buf_init(&sk);
 
        if (ctxt->stats) {
@@ -569,12 +599,13 @@ static int __bch2_move_data(struct moving_context *ctxt,
                if (!bkey_extent_is_direct_data(k.k))
                        goto next_nondata;
 
-               ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+               io_opts = bch2_move_get_io_opts(trans, &snapshot_io_opts, k);
+               ret = PTR_ERR_OR_ZERO(io_opts);
                if (ret)
                        continue;
 
                memset(&data_opts, 0, sizeof(data_opts));
-               if (!pred(c, arg, k, &io_opts, &data_opts))
+               if (!pred(c, arg, k, io_opts, &data_opts))
                        goto next;
 
                /*
@@ -585,7 +616,7 @@ static int __bch2_move_data(struct moving_context *ctxt,
                k = bkey_i_to_s_c(sk.k);
 
                ret2 = bch2_move_extent(trans, &iter, ctxt, NULL,
-                                       io_opts, btree_id, k, data_opts);
+                                       *io_opts, btree_id, k, data_opts);
                if (ret2) {
                        if (bch2_err_matches(ret2, BCH_ERR_transaction_restart))
                                continue;
@@ -612,6 +643,7 @@ next_nondata:
        bch2_trans_iter_exit(trans, &iter);
        bch2_trans_put(trans);
        bch2_bkey_buf_exit(&sk, c);
+       per_snapshot_io_opts_exit(&snapshot_io_opts);
 
        return ret;
 }
@@ -673,7 +705,6 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
        struct data_update_opts data_opts;
        unsigned dirty_sectors, bucket_size;
        u64 fragmentation;
-       u64 cur_inum = U64_MAX;
        struct bpos bp_pos = POS_MIN;
        int ret = 0;
 
@@ -737,7 +768,7 @@ int __bch2_evacuate_bucket(struct btree_trans *trans,
                        bch2_bkey_buf_reassemble(&sk, c, k);
                        k = bkey_i_to_s_c(sk.k);
 
-                       ret = move_get_io_opts(trans, &io_opts, k, &cur_inum);
+                       ret = bch2_move_get_io_opts_one(trans, &io_opts, k);
                        if (ret) {
                                bch2_trans_iter_exit(trans, &iter);
                                continue;
index cbdd58db8782b24e06f03fbecd23efeaf8aaace7..aa4b65c4f960b7e5dc7f8839dedbebb0f09b8ff1 100644 (file)
@@ -62,6 +62,32 @@ struct moving_io *bch2_moving_ctxt_next_pending_write(struct moving_context *);
 void bch2_moving_ctxt_do_pending_writes(struct moving_context *,
                                        struct btree_trans *);
 
+/* Inodes in different snapshots may have different IO options: */
+struct snapshot_io_opts_entry {
+       u32                     snapshot;
+       struct bch_io_opts      io_opts;
+};
+
+struct per_snapshot_io_opts {
+       u64                     cur_inum;
+       struct bch_io_opts      fs_io_opts;
+       DARRAY(struct snapshot_io_opts_entry) d;
+};
+
+static inline void per_snapshot_io_opts_init(struct per_snapshot_io_opts *io_opts, struct bch_fs *c)
+{
+       memset(io_opts, 0, sizeof(*io_opts));
+       io_opts->fs_io_opts = bch2_opts_to_inode_opts(c->opts);
+}
+
+static inline void per_snapshot_io_opts_exit(struct per_snapshot_io_opts *io_opts)
+{
+       darray_exit(&io_opts->d);
+}
+
+struct bch_io_opts *bch2_move_get_io_opts(struct btree_trans *,
+                               struct per_snapshot_io_opts *, struct bkey_s_c);
+
 int bch2_scan_old_btree_nodes(struct bch_fs *, struct bch_move_stats *);
 
 int bch2_move_data(struct bch_fs *,