btrfs: qgroup: fix qgroup meta rsv leak for subvolume operations
authorQu Wenruo <wqu@suse.com>
Fri, 24 Jul 2020 06:46:10 +0000 (14:46 +0800)
committerDavid Sterba <dsterba@suse.com>
Wed, 7 Oct 2020 10:12:13 +0000 (12:12 +0200)
[BUG]
When quota is enabled for TEST_DEV, generic/013 sometimes fails like this:

  generic/013 14s ... _check_dmesg: something found in dmesg (see xfstests-dev/results//generic/013.dmesg)

And with the following metadata leak:

  BTRFS warning (device dm-3): qgroup 0/1370 has unreleased space, type 2 rsv 49152
  ------------[ cut here ]------------
  WARNING: CPU: 2 PID: 47912 at fs/btrfs/disk-io.c:4078 close_ctree+0x1dc/0x323 [btrfs]
  Call Trace:
   btrfs_put_super+0x15/0x17 [btrfs]
   generic_shutdown_super+0x72/0x110
   kill_anon_super+0x18/0x30
   btrfs_kill_super+0x17/0x30 [btrfs]
   deactivate_locked_super+0x3b/0xa0
   deactivate_super+0x40/0x50
   cleanup_mnt+0x135/0x190
   __cleanup_mnt+0x12/0x20
   task_work_run+0x64/0xb0
   __prepare_exit_to_usermode+0x1bc/0x1c0
   __syscall_return_slowpath+0x47/0x230
   do_syscall_64+0x64/0xb0
   entry_SYSCALL_64_after_hwframe+0x44/0xa9
  ---[ end trace a6cfd45ba80e4e06 ]---
  BTRFS error (device dm-3): qgroup reserved space leaked
  BTRFS info (device dm-3): disk space caching is enabled
  BTRFS info (device dm-3): has skinny extents

[CAUSE]
The qgroup preallocated meta rsv operations of that offending root are:

  btrfs_delayed_inode_reserve_metadata: rsv_meta_prealloc root=1370 num_bytes=131072
  btrfs_delayed_inode_reserve_metadata: rsv_meta_prealloc root=1370 num_bytes=131072
  btrfs_subvolume_reserve_metadata: rsv_meta_prealloc root=1370 num_bytes=49152
  btrfs_delayed_inode_release_metadata: convert_meta_prealloc root=1370 num_bytes=-131072
  btrfs_delayed_inode_release_metadata: convert_meta_prealloc root=1370 num_bytes=-131072

It's pretty obvious that, we reserve qgroup meta rsv in
btrfs_subvolume_reserve_metadata(), but doesn't have corresponding
release/convert calls in btrfs_subvolume_release_metadata().

This leads to the leakage.

[FIX]
To fix this bug, we should follow what we're doing in
btrfs_delalloc_reserve_metadata(), where we reserve qgroup space, and
add it to block_rsv->qgroup_rsv_reserved.

And free the qgroup reserved metadata space when releasing the
block_rsv.

To do this, we need to change the btrfs_subvolume_release_metadata() to
accept btrfs_root, and record the qgroup_to_release number, and call
btrfs_qgroup_convert_reserved_meta() for it.

Fixes: 733e03a0b26a ("btrfs: qgroup: Split meta rsv type into meta_prealloc and meta_pertrans")
CC: stable@vger.kernel.org # 4.19+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Qu Wenruo <wqu@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
fs/btrfs/ctree.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/root-tree.c

index eb7adc069926ebb87c782e2a969b8c9777f2a533..f9d4e0958e2e5394c172e80ff7297653201c94d7 100644 (file)
@@ -2622,7 +2622,7 @@ enum btrfs_flush_state {
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int nitems, bool use_global_rsv);
-void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
+void btrfs_subvolume_release_metadata(struct btrfs_root *root,
                                      struct btrfs_block_rsv *rsv);
 void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes);
 
index a50a40f8bef204224a9c2d8f78ec031b63ea6f76..123521aa5595261c8a129a7c9238a6bac17d0fd1 100644 (file)
@@ -4051,7 +4051,7 @@ out_end_trans:
                err = ret;
        inode->i_flags |= S_DEAD;
 out_release:
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+       btrfs_subvolume_release_metadata(root, &block_rsv);
 out_up_write:
        up_write(&fs_info->subvol_sem);
        if (err) {
index a5355a16eabbb9af603d856d841812b6b840859e..3779a6c12184c4bd84a8db65ed2015cb0ebb0998 100644 (file)
@@ -618,7 +618,7 @@ static noinline int create_subvol(struct inode *dir,
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
-               btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+               btrfs_subvolume_release_metadata(root, &block_rsv);
                goto fail_free;
        }
        trans->block_rsv = &block_rsv;
@@ -742,7 +742,7 @@ fail:
        kfree(root_item);
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
-       btrfs_subvolume_release_metadata(fs_info, &block_rsv);
+       btrfs_subvolume_release_metadata(root, &block_rsv);
 
        err = btrfs_commit_transaction(trans);
        if (err && !ret)
@@ -856,7 +856,7 @@ fail:
        if (ret && pending_snapshot->snap)
                pending_snapshot->snap->anon_dev = 0;
        btrfs_put_root(pending_snapshot->snap);
-       btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv);
+       btrfs_subvolume_release_metadata(root, &pending_snapshot->block_rsv);
 free_pending:
        if (pending_snapshot->anon_dev)
                free_anon_bdev(pending_snapshot->anon_dev);
index c89697486366aca0e5b63aab04887e50f1afc429..702dc5441f039743b4ad1cb99d10624084898705 100644 (file)
@@ -512,11 +512,20 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
        if (ret && qgroup_num_bytes)
                btrfs_qgroup_free_meta_prealloc(root, qgroup_num_bytes);
 
+       if (!ret) {
+               spin_lock(&rsv->lock);
+               rsv->qgroup_rsv_reserved += qgroup_num_bytes;
+               spin_unlock(&rsv->lock);
+       }
        return ret;
 }
 
-void btrfs_subvolume_release_metadata(struct btrfs_fs_info *fs_info,
+void btrfs_subvolume_release_metadata(struct btrfs_root *root,
                                      struct btrfs_block_rsv *rsv)
 {
-       btrfs_block_rsv_release(fs_info, rsv, (u64)-1, NULL);
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 qgroup_to_release;
+
+       btrfs_block_rsv_release(fs_info, rsv, (u64)-1, &qgroup_to_release);
+       btrfs_qgroup_convert_reserved_meta(root, qgroup_to_release);
 }