xfs: parallelize block preallocation garbage collection
authorDarrick J. Wong <djwong@kernel.org>
Sat, 23 Jan 2021 00:48:44 +0000 (16:48 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Wed, 3 Feb 2021 17:18:50 +0000 (09:18 -0800)
Split the block preallocation garbage collection work into per-AG work
items so that we can take advantage of parallelization.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/xfs_icache.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c

index 35c735514565a5b500d9fa3db84fde98bc174d24..460fa7b3a31c0496ae0ede3a905e28a59405295a 100644 (file)
@@ -1333,12 +1333,12 @@ xfs_inode_free_eofblocks(
  */
 static inline void
 xfs_blockgc_queue(
-       struct xfs_mount        *mp)
+       struct xfs_perag        *pag)
 {
        rcu_read_lock();
-       if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_BLOCKGC_TAG))
-               queue_delayed_work(mp->m_blockgc_workqueue,
-                                  &mp->m_blockgc_work,
+       if (radix_tree_tagged(&pag->pag_ici_root, XFS_ICI_BLOCKGC_TAG))
+               queue_delayed_work(pag->pag_mount->m_blockgc_workqueue,
+                                  &pag->pag_blockgc_work,
                                   msecs_to_jiffies(xfs_blockgc_secs * 1000));
        rcu_read_unlock();
 }
@@ -1380,7 +1380,7 @@ xfs_blockgc_set_iflag(
                spin_unlock(&ip->i_mount->m_perag_lock);
 
                /* kick off background trimming */
-               xfs_blockgc_queue(ip->i_mount);
+               xfs_blockgc_queue(pag);
 
                trace_xfs_perag_set_blockgc(ip->i_mount, pag->pag_agno, -1,
                                _RET_IP_);
@@ -1555,12 +1555,24 @@ xfs_inode_clear_cowblocks_tag(
        return xfs_blockgc_clear_iflag(ip, XFS_ICOWBLOCKS);
 }
 
+#define for_each_perag_tag(mp, next_agno, pag, tag) \
+       for ((next_agno) = 0, (pag) = xfs_perag_get_tag((mp), 0, (tag)); \
+               (pag) != NULL; \
+               (next_agno) = (pag)->pag_agno + 1, \
+               xfs_perag_put(pag), \
+               (pag) = xfs_perag_get_tag((mp), (next_agno), (tag)))
+
+
 /* Disable post-EOF and CoW block auto-reclamation. */
 void
 xfs_blockgc_stop(
        struct xfs_mount        *mp)
 {
-       cancel_delayed_work_sync(&mp->m_blockgc_work);
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno;
+
+       for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+               cancel_delayed_work_sync(&pag->pag_blockgc_work);
 }
 
 /* Enable post-EOF and CoW block auto-reclamation. */
@@ -1568,7 +1580,11 @@ void
 xfs_blockgc_start(
        struct xfs_mount        *mp)
 {
-       xfs_blockgc_queue(mp);
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno;
+
+       for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
+               xfs_blockgc_queue(pag);
 }
 
 /* Scan one incore inode for block preallocations that we can remove. */
@@ -1595,18 +1611,20 @@ void
 xfs_blockgc_worker(
        struct work_struct      *work)
 {
-       struct xfs_mount        *mp = container_of(to_delayed_work(work),
-                                       struct xfs_mount, m_blockgc_work);
+       struct xfs_perag        *pag = container_of(to_delayed_work(work),
+                                       struct xfs_perag, pag_blockgc_work);
+       struct xfs_mount        *mp = pag->pag_mount;
        int                     error;
 
        if (!sb_start_write_trylock(mp->m_super))
                return;
-       error = xfs_inode_walk(mp, 0, xfs_blockgc_scan_inode, NULL,
+       error = xfs_inode_walk_ag(pag, 0, xfs_blockgc_scan_inode, NULL,
                        XFS_ICI_BLOCKGC_TAG);
        if (error)
-               xfs_info(mp, "preallocation gc worker failed, err=%d", error);
+               xfs_info(mp, "AG %u preallocation gc worker failed, err=%d",
+                               pag->pag_agno, error);
        sb_end_write(mp->m_super);
-       xfs_blockgc_queue(mp);
+       xfs_blockgc_queue(pag);
 }
 
 /*
index be9ce114527f56efb7236ed9501837f01facd92c..52370d0a3f4343a25795d571e4a17bd12118bd9c 100644 (file)
@@ -126,6 +126,7 @@ __xfs_free_perag(
 {
        struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 
+       ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
        ASSERT(atomic_read(&pag->pag_ref) == 0);
        kmem_free(pag);
 }
@@ -146,6 +147,7 @@ xfs_free_perag(
                spin_unlock(&mp->m_perag_lock);
                ASSERT(pag);
                ASSERT(atomic_read(&pag->pag_ref) == 0);
+               cancel_delayed_work_sync(&pag->pag_blockgc_work);
                xfs_iunlink_destroy(pag);
                xfs_buf_hash_destroy(pag);
                call_rcu(&pag->rcu_head, __xfs_free_perag);
@@ -201,6 +203,7 @@ xfs_initialize_perag(
                pag->pag_agno = index;
                pag->pag_mount = mp;
                spin_lock_init(&pag->pag_ici_lock);
+               INIT_DELAYED_WORK(&pag->pag_blockgc_work, xfs_blockgc_worker);
                INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC);
 
                error = xfs_buf_hash_init(pag);
index 316e0d79cc40a9f0245950d9a622495019f07c39..659ad95fe3e0baa912d3fb0500ca5a31bc7906b6 100644 (file)
@@ -177,8 +177,6 @@ typedef struct xfs_mount {
        uint64_t                m_resblks_avail;/* available reserved blocks */
        uint64_t                m_resblks_save; /* reserved blks @ remount,ro */
        struct delayed_work     m_reclaim_work; /* background inode reclaim */
-       struct delayed_work     m_blockgc_work; /* background prealloc blocks
-                                                    trimming */
        struct xfs_kobj         m_kobj;
        struct xfs_kobj         m_error_kobj;
        struct xfs_kobj         m_error_meta_kobj;
@@ -367,6 +365,9 @@ typedef struct xfs_perag {
        /* Blocks reserved for the reverse mapping btree. */
        struct xfs_ag_resv      pag_rmapbt_resv;
 
+       /* background prealloc block trimming */
+       struct delayed_work     pag_blockgc_work;
+
        /* reference count */
        uint8_t                 pagf_refcount_level;
 
index ea942089d07419dd3d88d2d2e99a2241f45bd067..2b04818627e985ac247dc19372404767328352dd 100644 (file)
@@ -35,6 +35,7 @@
 #include "xfs_refcount_item.h"
 #include "xfs_bmap_item.h"
 #include "xfs_reflink.h"
+#include "xfs_pwork.h"
 
 #include <linux/magic.h>
 #include <linux/fs_context.h>
@@ -519,7 +520,7 @@ xfs_init_mount_workqueues(
                goto out_destroy_cil;
 
        mp->m_blockgc_workqueue = alloc_workqueue("xfs-blockgc/%s",
-                       XFS_WQFLAGS(WQ_FREEZABLE | WQ_MEM_RECLAIM),
+                       XFS_WQFLAGS(WQ_UNBOUND | WQ_FREEZABLE | WQ_MEM_RECLAIM),
                        0, mp->m_super->s_id);
        if (!mp->m_blockgc_workqueue)
                goto out_destroy_reclaim;
@@ -1842,7 +1843,6 @@ static int xfs_init_fs_context(
        mutex_init(&mp->m_growlock);
        INIT_WORK(&mp->m_flush_inodes_work, xfs_flush_inodes_worker);
        INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker);
-       INIT_DELAYED_WORK(&mp->m_blockgc_work, xfs_blockgc_worker);
        mp->m_kobj.kobject.kset = xfs_kset;
        /*
         * We don't create the finobt per-ag space reservation until after log