bcachefs: Change inode allocation code for snapshots
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 15 Mar 2021 23:18:30 +0000 (19:18 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:58 +0000 (17:08 -0400)
For snapshots, when we allocate a new inode we want to allocate an inode
number that isn't in use in any other subvolume. We won't be able to use
ITER_SLOTS for this, inode allocation needs to change to use
BTREE_ITER_ALL_SNAPSHOTS.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fs-common.c
fs/bcachefs/inode.c
fs/bcachefs/inode.h

index 83c2168ce480a92169b68a22c82ea9a64a8a02dd..281a6135e59977f7d057c8b7958a85ec87435dfd 100644 (file)
@@ -36,7 +36,7 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
        if (!name)
                new_inode->bi_flags |= BCH_INODE_UNLINKED;
 
-       inode_iter = bch2_inode_create(trans, new_inode);
+       inode_iter = bch2_inode_create(trans, new_inode, U32_MAX);
        ret = PTR_ERR_OR_ZERO(inode_iter);
        if (ret)
                goto err;
@@ -80,6 +80,10 @@ int bch2_create_trans(struct btree_trans *trans, u64 dir_inum,
                new_inode->bi_dir_offset        = dir_offset;
        }
 
+       /* XXX use bch2_btree_iter_set_snapshot() */
+       inode_iter->snapshot = U32_MAX;
+       bch2_btree_iter_set_pos(inode_iter, SPOS(0, new_inode->bi_inum, U32_MAX));
+
        ret = bch2_inode_write(trans, inode_iter, new_inode);
 err:
        bch2_trans_iter_put(trans, inode_iter);
index b72b3578bbe259bdd1ab8cd3e8c7c14e354c626f..e650c2a0d7d7507c3c210f7247779b105c69e234 100644 (file)
@@ -471,12 +471,13 @@ static inline u32 bkey_generation(struct bkey_s_c k)
 }
 
 struct btree_iter *bch2_inode_create(struct btree_trans *trans,
-                                    struct bch_inode_unpacked *inode_u)
+                                    struct bch_inode_unpacked *inode_u,
+                                    u32 snapshot)
 {
        struct bch_fs *c = trans->c;
        struct btree_iter *iter = NULL;
        struct bkey_s_c k;
-       u64 min, max, start, *hint;
+       u64 min, max, start, pos, *hint;
        int ret;
 
        u64 cpu = raw_smp_processor_id();
@@ -493,39 +494,70 @@ struct btree_iter *bch2_inode_create(struct btree_trans *trans,
 
        if (start >= max || start < min)
                start = min;
+
+       pos = start;
+       iter = bch2_trans_get_iter(trans, BTREE_ID_inodes, POS(0, pos),
+                                  BTREE_ITER_ALL_SNAPSHOTS|
+                                  BTREE_ITER_INTENT);
 again:
-       for_each_btree_key(trans, iter, BTREE_ID_inodes, POS(0, start),
-                          BTREE_ITER_SLOTS|BTREE_ITER_INTENT, k, ret) {
-               if (bkey_cmp(iter->pos, POS(0, max)) > 0)
-                       break;
+       while ((k = bch2_btree_iter_peek(iter)).k &&
+              !(ret = bkey_err(k)) &&
+              bkey_cmp(k.k->p, POS(0, max)) < 0) {
+               while (pos < iter->pos.offset) {
+                       if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
+                               goto found_slot;
+
+                       pos++;
+               }
+
+               if (k.k->p.snapshot == snapshot &&
+                   k.k->type != KEY_TYPE_inode &&
+                   !bch2_btree_key_cache_find(c, BTREE_ID_inodes, SPOS(0, pos, snapshot))) {
+                       bch2_btree_iter_next(iter);
+                       continue;
+               }
 
                /*
-                * There's a potential cache coherency issue with the btree key
-                * cache code here - we're iterating over the btree, skipping
-                * that cache. We should never see an empty slot that isn't
-                * actually empty due to a pending update in the key cache
-                * because the update that creates the inode isn't done with a
-                * cached iterator, but - better safe than sorry, check the
-                * cache before using a slot:
+                * We don't need to iterate over keys in every snapshot once
+                * we've found just one:
                 */
-               if (k.k->type != KEY_TYPE_inode &&
-                   !bch2_btree_key_cache_find(c, BTREE_ID_inodes, iter->pos))
+               pos = iter->pos.offset + 1;
+               bch2_btree_iter_set_pos(iter, POS(0, pos));
+       }
+
+       while (!ret && pos < max) {
+               if (!bch2_btree_key_cache_find(c, BTREE_ID_inodes, POS(0, pos)))
                        goto found_slot;
+
+               pos++;
        }
 
-       bch2_trans_iter_put(trans, iter);
+       if (!ret && start == min)
+               ret = -ENOSPC;
 
-       if (ret)
+       if (ret) {
+               bch2_trans_iter_put(trans, iter);
                return ERR_PTR(ret);
-
-       if (start != min) {
-               /* Retry from start */
-               start = min;
-               goto again;
        }
 
-       return ERR_PTR(-ENOSPC);
+       /* Retry from start */
+       pos = start = min;
+       bch2_btree_iter_set_pos(iter, POS(0, pos));
+       goto again;
 found_slot:
+       bch2_btree_iter_set_pos(iter, SPOS(0, pos, snapshot));
+       k = bch2_btree_iter_peek_slot(iter);
+       ret = bkey_err(k);
+       if (ret) {
+               bch2_trans_iter_put(trans, iter);
+               return ERR_PTR(ret);
+       }
+
+       /* We may have raced while the iterator wasn't pointing at pos: */
+       if (k.k->type == KEY_TYPE_inode ||
+           bch2_btree_key_cache_find(c, BTREE_ID_inodes, k.k->p))
+               goto again;
+
        *hint                   = k.k->p.offset;
        inode_u->bi_inum        = k.k->p.offset;
        inode_u->bi_generation  = bkey_generation(k);
index 6bad6dfb79891ccf941ded70f125bb4d33b61bfb..23c322d9a85b0e64a0fea7717a0a5618b2d47a98 100644 (file)
@@ -70,7 +70,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
                     struct bch_inode_unpacked *);
 
 struct btree_iter *bch2_inode_create(struct btree_trans *,
-                                    struct bch_inode_unpacked *);
+                                    struct bch_inode_unpacked *, u32);
 
 int bch2_inode_rm(struct bch_fs *, u64, bool);