bcachefs: New io_misc.c helpers
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 4 Sep 2023 09:38:30 +0000 (05:38 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:10:12 +0000 (17:10 -0400)
This pulls the non vfs specific parts of truncate and finsert/fcollapse
out of fs-io.c, and moves them to io_misc.c.

This is prep work for logging these operations, to make them atomic in
the event of a crash.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fs-io.c
fs/bcachefs/fs-io.h
fs/bcachefs/fs.c
fs/bcachefs/io_misc.c
fs/bcachefs/io_misc.h

index 0b0b3b0d6c7d6bea2344a192fcf12fb94a7494f4..b36513eb3d168ebae4cca17d94e786f9412e63cf 100644 (file)
@@ -391,33 +391,12 @@ static int bch2_extend(struct mnt_idmap *idmap,
        return bch2_setattr_nonsize(idmap, inode, iattr);
 }
 
-static int bch2_truncate_finish_fn(struct btree_trans *trans,
-                                  struct bch_inode_info *inode,
-                                  struct bch_inode_unpacked *bi,
-                                  void *p)
-{
-       bi->bi_flags &= ~BCH_INODE_I_SIZE_DIRTY;
-       return 0;
-}
-
-static int bch2_truncate_start_fn(struct btree_trans *trans,
-                                 struct bch_inode_info *inode,
-                                 struct bch_inode_unpacked *bi, void *p)
-{
-       u64 *new_i_size = p;
-
-       bi->bi_flags |= BCH_INODE_I_SIZE_DIRTY;
-       bi->bi_size = *new_i_size;
-       return 0;
-}
-
-int bch2_truncate(struct mnt_idmap *idmap,
+int bchfs_truncate(struct mnt_idmap *idmap,
                  struct bch_inode_info *inode, struct iattr *iattr)
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct address_space *mapping = inode->v.i_mapping;
        struct bch_inode_unpacked inode_u;
-       u64 new_i_size = iattr->ia_size;
        s64 i_sectors_delta = 0;
        int ret = 0;
 
@@ -466,6 +445,8 @@ int bch2_truncate(struct mnt_idmap *idmap,
        if (unlikely(ret < 0))
                goto err;
 
+       truncate_setsize(&inode->v, iattr->ia_size);
+
        /*
         * When extending, we're going to write the new i_size to disk
         * immediately so we need to flush anything above the current on disk
@@ -487,32 +468,22 @@ int bch2_truncate(struct mnt_idmap *idmap,
        if (ret)
                goto err;
 
-       mutex_lock(&inode->ei_update_lock);
-       ret = bch2_write_inode(c, inode, bch2_truncate_start_fn,
-                              &new_i_size, 0);
-       mutex_unlock(&inode->ei_update_lock);
+       ret = bch2_truncate(c, inode_inum(inode), iattr->ia_size, &i_sectors_delta);
+       bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
 
-       if (unlikely(ret))
+       if (unlikely(ret)) {
+               /*
+                * If we error here, VFS caches are now inconsistent with btree
+                */
+               set_bit(EI_INODE_ERROR, &inode->ei_flags);
                goto err;
-
-       truncate_setsize(&inode->v, iattr->ia_size);
-
-       ret = bch2_fpunch(c, inode_inum(inode),
-                       round_up(iattr->ia_size, block_bytes(c)) >> 9,
-                       U64_MAX, &i_sectors_delta);
-       bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+       }
 
        bch2_fs_inconsistent_on(!inode->v.i_size && inode->v.i_blocks &&
                                !bch2_journal_error(&c->journal), c,
                                "inode %lu truncated to 0 but i_blocks %llu (ondisk %lli)",
                                inode->v.i_ino, (u64) inode->v.i_blocks,
                                inode->ei_inode.bi_sectors);
-       if (unlikely(ret))
-               goto err;
-
-       mutex_lock(&inode->ei_update_lock);
-       ret = bch2_write_inode(c, inode, bch2_truncate_finish_fn, NULL, 0);
-       mutex_unlock(&inode->ei_update_lock);
 
        ret = bch2_setattr_nonsize(idmap, inode, iattr);
 err:
@@ -577,175 +548,33 @@ static long bchfs_fcollapse_finsert(struct bch_inode_info *inode,
 {
        struct bch_fs *c = inode->v.i_sb->s_fs_info;
        struct address_space *mapping = inode->v.i_mapping;
-       struct bkey_buf copy;
-       struct btree_trans trans;
-       struct btree_iter src, dst, del;
-       loff_t shift, new_size;
-       u64 src_start;
+       s64 i_sectors_delta = 0;
        int ret = 0;
 
        if ((offset | len) & (block_bytes(c) - 1))
                return -EINVAL;
 
        if (insert) {
-               if (inode->v.i_sb->s_maxbytes - inode->v.i_size < len)
-                       return -EFBIG;
-
                if (offset >= inode->v.i_size)
                        return -EINVAL;
-
-               src_start       = U64_MAX;
-               shift           = len;
        } else {
                if (offset + len >= inode->v.i_size)
                        return -EINVAL;
-
-               src_start       = offset + len;
-               shift           = -len;
        }
 
-       new_size = inode->v.i_size + shift;
-
        ret = bch2_write_invalidate_inode_pages_range(mapping, offset, LLONG_MAX);
        if (ret)
                return ret;
 
-       if (insert) {
-               i_size_write(&inode->v, new_size);
-               mutex_lock(&inode->ei_update_lock);
-               ret = bch2_write_inode_size(c, inode, new_size,
-                                           ATTR_MTIME|ATTR_CTIME);
-               mutex_unlock(&inode->ei_update_lock);
-       } else {
-               s64 i_sectors_delta = 0;
-
-               ret = bch2_fpunch(c, inode_inum(inode),
-                                 offset >> 9, (offset + len) >> 9,
-                                 &i_sectors_delta);
-               bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
+       if (insert)
+               i_size_write(&inode->v, inode->v.i_size + len);
 
-               if (ret)
-                       return ret;
-       }
-
-       bch2_bkey_buf_init(&copy);
-       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
-       bch2_trans_iter_init(&trans, &src, BTREE_ID_extents,
-                       POS(inode->v.i_ino, src_start >> 9),
-                       BTREE_ITER_INTENT);
-       bch2_trans_copy_iter(&dst, &src);
-       bch2_trans_copy_iter(&del, &src);
-
-       while (ret == 0 ||
-              bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
-               struct disk_reservation disk_res =
-                       bch2_disk_reservation_init(c, 0);
-               struct bkey_i delete;
-               struct bkey_s_c k;
-               struct bpos next_pos;
-               struct bpos move_pos = POS(inode->v.i_ino, offset >> 9);
-               struct bpos atomic_end;
-               unsigned trigger_flags = 0;
-               u32 snapshot;
-
-               bch2_trans_begin(&trans);
-
-               ret = bch2_subvolume_get_snapshot(&trans,
-                                       inode->ei_subvol, &snapshot);
-               if (ret)
-                       continue;
-
-               bch2_btree_iter_set_snapshot(&src, snapshot);
-               bch2_btree_iter_set_snapshot(&dst, snapshot);
-               bch2_btree_iter_set_snapshot(&del, snapshot);
-
-               bch2_trans_begin(&trans);
-
-               k = insert
-                       ? bch2_btree_iter_peek_prev(&src)
-                       : bch2_btree_iter_peek_upto(&src, POS(inode->v.i_ino, U64_MAX));
-               if ((ret = bkey_err(k)))
-                       continue;
-
-               if (!k.k || k.k->p.inode != inode->v.i_ino)
-                       break;
-
-               if (insert &&
-                   bkey_le(k.k->p, POS(inode->v.i_ino, offset >> 9)))
-                       break;
-reassemble:
-               bch2_bkey_buf_reassemble(&copy, c, k);
-
-               if (insert &&
-                   bkey_lt(bkey_start_pos(k.k), move_pos))
-                       bch2_cut_front(move_pos, copy.k);
-
-               copy.k->k.p.offset += shift >> 9;
-               bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k));
-
-               ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end);
-               if (ret)
-                       continue;
-
-               if (!bkey_eq(atomic_end, copy.k->k.p)) {
-                       if (insert) {
-                               move_pos = atomic_end;
-                               move_pos.offset -= shift >> 9;
-                               goto reassemble;
-                       } else {
-                               bch2_cut_back(atomic_end, copy.k);
-                       }
-               }
-
-               bkey_init(&delete.k);
-               delete.k.p = copy.k->k.p;
-               delete.k.size = copy.k->k.size;
-               delete.k.p.offset -= shift >> 9;
-               bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k));
-
-               next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
-
-               if (copy.k->k.size != k.k->size) {
-                       /* We might end up splitting compressed extents: */
-                       unsigned nr_ptrs =
-                               bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
-
-                       ret = bch2_disk_reservation_get(c, &disk_res,
-                                       copy.k->k.size, nr_ptrs,
-                                       BCH_DISK_RESERVATION_NOFAIL);
-                       BUG_ON(ret);
-               }
-
-               ret =   bch2_btree_iter_traverse(&del) ?:
-                       bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
-                       bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
-                       bch2_trans_commit(&trans, &disk_res, NULL,
-                                         BTREE_INSERT_NOFAIL);
-               bch2_disk_reservation_put(c, &disk_res);
-
-               if (!ret)
-                       bch2_btree_iter_set_pos(&src, next_pos);
-       }
-       bch2_trans_iter_exit(&trans, &del);
-       bch2_trans_iter_exit(&trans, &dst);
-       bch2_trans_iter_exit(&trans, &src);
-       bch2_trans_exit(&trans);
-       bch2_bkey_buf_exit(&copy, c);
-
-       if (ret)
-               return ret;
+       ret = bch2_fcollapse_finsert(c, inode_inum(inode), offset >> 9, len >> 9,
+                                    insert, &i_sectors_delta);
+       if (!ret && !insert)
+               i_size_write(&inode->v, inode->v.i_size - len);
+       bch2_i_sectors_acct(c, inode, NULL, i_sectors_delta);
 
-       mutex_lock(&inode->ei_update_lock);
-       if (!insert) {
-               i_size_write(&inode->v, new_size);
-               ret = bch2_write_inode_size(c, inode, new_size,
-                                           ATTR_MTIME|ATTR_CTIME);
-       } else {
-               /* We need an inode update to update bi_journal_seq for fsync: */
-               ret = bch2_write_inode(c, inode, inode_update_times_fn, NULL,
-                                      ATTR_MTIME|ATTR_CTIME);
-       }
-       mutex_unlock(&inode->ei_update_lock);
        return ret;
 }
 
index bc6e8439d40bedd4f0e7ff5e070361c50a2281fa..ca70346e68dc3d9196c85ce7768b1c9f53e6e792 100644 (file)
@@ -165,7 +165,7 @@ int __must_check bch2_write_inode_size(struct bch_fs *,
 
 int bch2_fsync(struct file *, loff_t, loff_t, int);
 
-int bch2_truncate(struct mnt_idmap *,
+int bchfs_truncate(struct mnt_idmap *,
                  struct bch_inode_info *, struct iattr *);
 long bch2_fallocate_dispatch(struct file *, int, loff_t, loff_t);
 
index 0648874d54f3e33b73e4314b6c0671ba7c969193..0def3a57bd6d68f0c67e856b3d1dd1dcae3eb11c 100644 (file)
@@ -798,7 +798,7 @@ static int bch2_setattr(struct mnt_idmap *idmap,
                return ret;
 
        return iattr->ia_valid & ATTR_SIZE
-               ? bch2_truncate(idmap, inode, iattr)
+               ? bchfs_truncate(idmap, inode, iattr)
                : bch2_setattr_nonsize(idmap, inode, iattr);
 }
 
index c04e5dacfc8d1579eebaa0147a546fb1860e8c76..1afea613df4a9e476a25b3e33111223ec5ab9153 100644 (file)
@@ -9,7 +9,10 @@
 #include "btree_update.h"
 #include "buckets.h"
 #include "clock.h"
+#include "error.h"
 #include "extents.h"
+#include "extent_update.h"
+#include "inode.h"
 #include "io_misc.h"
 #include "io_write.h"
 #include "subvolume.h"
@@ -213,3 +216,226 @@ int bch2_fpunch(struct bch_fs *c, subvol_inum inum, u64 start, u64 end,
 
        return ret;
 }
+
+static int truncate_set_isize(struct btree_trans *trans,
+                             subvol_inum inum,
+                             u64 new_i_size)
+{
+       struct btree_iter iter = { NULL };
+       struct bch_inode_unpacked inode_u;
+       int ret;
+
+       ret   = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT) ?:
+               (inode_u.bi_size = new_i_size, 0) ?:
+               bch2_inode_write(trans, &iter, &inode_u);
+
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_truncate(struct bch_fs *c, subvol_inum inum, u64 new_i_size, u64 *i_sectors_delta)
+{
+       struct btree_trans trans;
+       struct btree_iter fpunch_iter;
+       int ret;
+
+       bch2_trans_init(&trans, c, BTREE_ITER_MAX, 1024);
+       bch2_trans_iter_init(&trans, &fpunch_iter, BTREE_ID_extents,
+                            POS(inum.inum, round_up(new_i_size, block_bytes(c)) >> 9),
+                            BTREE_ITER_INTENT);
+
+       ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+                       truncate_set_isize(&trans, inum, new_i_size));
+       if (ret)
+               goto err;
+
+       ret = bch2_fpunch_at(&trans, &fpunch_iter, inum, U64_MAX, i_sectors_delta);
+       if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               ret = 0;
+       if (ret)
+               goto err;
+err:
+       bch2_trans_iter_exit(&trans, &fpunch_iter);
+       bch2_trans_exit(&trans);
+
+       bch2_fs_fatal_err_on(ret, c, "%s: error truncating %u:%llu: %s",
+                           __func__, inum.subvol, inum.inum, bch2_err_str(ret));
+       return ret;
+}
+
+static int adjust_i_size(struct btree_trans *trans, subvol_inum inum, u64 offset, s64 len)
+{
+       struct btree_iter iter;
+       struct bch_inode_unpacked inode_u;
+       int ret;
+
+       offset  <<= 9;
+       len     <<= 9;
+
+       ret = bch2_inode_peek(trans, &iter, &inode_u, inum, BTREE_ITER_INTENT);
+       if (ret)
+               return ret;
+
+       if (len > 0) {
+               if (MAX_LFS_FILESIZE - inode_u.bi_size < len) {
+                       ret = -EFBIG;
+                       goto err;
+               }
+
+               if (offset >= inode_u.bi_size) {
+                       ret = -EINVAL;
+                       goto err;
+               }
+       }
+
+       inode_u.bi_size += len;
+       inode_u.bi_mtime = inode_u.bi_ctime = bch2_current_time(trans->c);
+
+       ret = bch2_inode_write(trans, &iter, &inode_u);
+err:
+       bch2_trans_iter_exit(trans, &iter);
+       return ret;
+}
+
+int bch2_fcollapse_finsert(struct bch_fs *c, subvol_inum inum,
+                          u64 offset, u64 len, bool insert,
+                          s64 *i_sectors_delta)
+{
+       struct bkey_buf copy;
+       struct btree_trans trans;
+       struct btree_iter src = { NULL }, dst = { NULL }, del = { NULL };
+       s64 shift = insert ? len : -len;
+       int ret = 0;
+
+       bch2_bkey_buf_init(&copy);
+       bch2_trans_init(&trans, c, 0, 1024);
+
+       bch2_trans_iter_init(&trans, &src, BTREE_ID_extents,
+                            POS(inum.inum, U64_MAX),
+                            BTREE_ITER_INTENT);
+       bch2_trans_copy_iter(&dst, &src);
+       bch2_trans_copy_iter(&del, &src);
+
+       if (insert) {
+               ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+                               adjust_i_size(&trans, inum, offset, len));
+               if (ret)
+                       goto err;
+       } else {
+               bch2_btree_iter_set_pos(&src, POS(inum.inum, offset));
+
+               ret = bch2_fpunch_at(&trans, &src, inum, offset + len, i_sectors_delta);
+               if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+                       goto err;
+
+               bch2_btree_iter_set_pos(&src, POS(inum.inum, offset + len));
+       }
+
+       while (ret == 0 || bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
+               struct disk_reservation disk_res =
+                       bch2_disk_reservation_init(c, 0);
+               struct bkey_i delete;
+               struct bkey_s_c k;
+               struct bpos next_pos;
+               struct bpos move_pos = POS(inum.inum, offset);
+               struct bpos atomic_end;
+               unsigned trigger_flags = 0;
+               u32 snapshot;
+
+               bch2_trans_begin(&trans);
+
+               ret = bch2_subvolume_get_snapshot(&trans, inum.subvol, &snapshot);
+               if (ret)
+                       continue;
+
+               bch2_btree_iter_set_snapshot(&src, snapshot);
+               bch2_btree_iter_set_snapshot(&dst, snapshot);
+               bch2_btree_iter_set_snapshot(&del, snapshot);
+
+               bch2_trans_begin(&trans);
+
+               k = insert
+                       ? bch2_btree_iter_peek_prev(&src)
+                       : bch2_btree_iter_peek_upto(&src, POS(inum.inum, U64_MAX));
+               if ((ret = bkey_err(k)))
+                       continue;
+
+               if (!k.k || k.k->p.inode != inum.inum)
+                       break;
+
+               if (insert &&
+                   bkey_le(k.k->p, POS(inum.inum, offset)))
+                       break;
+reassemble:
+               bch2_bkey_buf_reassemble(&copy, c, k);
+
+               if (insert &&
+                   bkey_lt(bkey_start_pos(k.k), move_pos))
+                       bch2_cut_front(move_pos, copy.k);
+
+               copy.k->k.p.offset += shift;
+               bch2_btree_iter_set_pos(&dst, bkey_start_pos(&copy.k->k));
+
+               ret = bch2_extent_atomic_end(&trans, &dst, copy.k, &atomic_end);
+               if (ret)
+                       continue;
+
+               if (!bkey_eq(atomic_end, copy.k->k.p)) {
+                       if (insert) {
+                               move_pos = atomic_end;
+                               move_pos.offset -= shift;
+                               goto reassemble;
+                       } else {
+                               bch2_cut_back(atomic_end, copy.k);
+                       }
+               }
+
+               bkey_init(&delete.k);
+               delete.k.p = copy.k->k.p;
+               delete.k.size = copy.k->k.size;
+               delete.k.p.offset -= shift;
+               bch2_btree_iter_set_pos(&del, bkey_start_pos(&delete.k));
+
+               next_pos = insert ? bkey_start_pos(&delete.k) : delete.k.p;
+
+               if (copy.k->k.size != k.k->size) {
+                       /* We might end up splitting compressed extents: */
+                       unsigned nr_ptrs =
+                               bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(copy.k));
+
+                       ret = bch2_disk_reservation_get(c, &disk_res,
+                                       copy.k->k.size, nr_ptrs,
+                                       BCH_DISK_RESERVATION_NOFAIL);
+                       BUG_ON(ret);
+               }
+
+               ret =   bch2_btree_iter_traverse(&del) ?:
+                       bch2_trans_update(&trans, &del, &delete, trigger_flags) ?:
+                       bch2_trans_update(&trans, &dst, copy.k, trigger_flags) ?:
+                       bch2_trans_commit(&trans, &disk_res, NULL,
+                                         BTREE_INSERT_NOFAIL);
+               bch2_disk_reservation_put(c, &disk_res);
+
+               if (!ret)
+                       bch2_btree_iter_set_pos(&src, next_pos);
+       }
+
+       if (ret && !bch2_err_matches(ret, BCH_ERR_transaction_restart))
+               goto err;
+
+       if (!insert) {
+               ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+                               adjust_i_size(&trans, inum, offset, -len));
+       } else {
+               /* We need an inode update to update bi_journal_seq for fsync: */
+               ret = commit_do(&trans, NULL, NULL, BTREE_INSERT_NOFAIL,
+                               adjust_i_size(&trans, inum, 0, 0));
+       }
+err:
+       bch2_trans_iter_exit(&trans, &del);
+       bch2_trans_iter_exit(&trans, &dst);
+       bch2_trans_iter_exit(&trans, &src);
+       bch2_trans_exit(&trans);
+       bch2_bkey_buf_exit(&copy, c);
+       return ret;
+}
index 46e9ce3251d610dd983f6f734dc838f6a6968a4e..894a7a04ba4bf7ff3d38615af3781db011224ca4 100644 (file)
@@ -9,4 +9,7 @@ int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
                   subvol_inum, u64, s64 *);
 int bch2_fpunch(struct bch_fs *c, subvol_inum, u64, u64, s64 *);
 
+int bch2_truncate(struct bch_fs *, subvol_inum, u64, u64 *);
+int bch2_fcollapse_finsert(struct bch_fs *, subvol_inum, u64, u64, bool, s64 *);
+
 #endif /* _BCACHEFS_IO_MISC_H */