xfs: introduce per-cpu CIL tracking structure
authorDave Chinner <dchinner@redhat.com>
Fri, 1 Jul 2022 16:13:52 +0000 (02:13 +1000)
committerDave Chinner <david@fromorbit.com>
Fri, 1 Jul 2022 16:13:52 +0000 (02:13 +1000)
The CIL push lock is highly contended on larger machines, becoming a
hard bottleneck that about 700,000 transaction commits/s on >16p
machines. To address this, start moving the CIL tracking
infrastructure to utilise per-CPU structures.

We need to track the space used, the amount of log reservation space
reserved to write the CIL, the log items in the CIL and the busy
extents that need to be completed by the CIL commit.  This requires
a couple of per-cpu counters, an unordered per-cpu list and a
globally ordered per-cpu list.

Create a per-cpu structure to hold these and all the management
interfaces needed, as well as the hooks to handle hotplug CPUs.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
fs/xfs/xfs_log_cil.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_super.c

index 880ea9536f827b2224c02e3cfa4891641649539e..c6d6322aabaa99f97807bc62d2aa5e7ee14c48c1 100644 (file)
@@ -1617,6 +1617,26 @@ out_shutdown:
        return 0;
 }
 
+/*
+ * Move dead percpu state to the relevant CIL context structures.
+ *
+ * We have to lock the CIL context here to ensure that nothing is modifying
+ * the percpu state, either addition or removal. Both of these are done under
+ * the CIL context lock, so grabbing that exclusively here will ensure we can
+ * safely drain the cilpcp for the CPU that is dying.
+ */
+void
+xlog_cil_pcp_dead(
+       struct xlog             *log,
+       unsigned int            cpu)
+{
+       struct xfs_cil          *cil = log->l_cilp;
+
+       down_write(&cil->xc_ctx_lock);
+       /* move stuff on dead CPU to context */
+       up_write(&cil->xc_ctx_lock);
+}
+
 /*
  * Perform initial CIL structure initialisation.
  */
@@ -1640,6 +1660,11 @@ xlog_cil_init(
        if (!cil->xc_push_wq)
                goto out_destroy_cil;
 
+       cil->xc_log = log;
+       cil->xc_pcp = alloc_percpu(struct xlog_cil_pcp);
+       if (!cil->xc_pcp)
+               goto out_destroy_wq;
+
        INIT_LIST_HEAD(&cil->xc_cil);
        INIT_LIST_HEAD(&cil->xc_committing);
        spin_lock_init(&cil->xc_cil_lock);
@@ -1648,14 +1673,14 @@ xlog_cil_init(
        init_rwsem(&cil->xc_ctx_lock);
        init_waitqueue_head(&cil->xc_start_wait);
        init_waitqueue_head(&cil->xc_commit_wait);
-       cil->xc_log = log;
        log->l_cilp = cil;
 
        ctx = xlog_cil_ctx_alloc();
        xlog_cil_ctx_switch(cil, ctx);
-
        return 0;
 
+out_destroy_wq:
+       destroy_workqueue(cil->xc_push_wq);
 out_destroy_cil:
        kmem_free(cil);
        return -ENOMEM;
@@ -1675,6 +1700,7 @@ xlog_cil_destroy(
 
        ASSERT(list_empty(&cil->xc_cil));
        ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
+       free_percpu(cil->xc_pcp);
        destroy_workqueue(cil->xc_push_wq);
        kmem_free(cil);
 }
index 74436482c28d4c988c51f2fd8fecfc14585fb2b7..70483c78953efcd206508242910444dde085aad4 100644 (file)
@@ -231,6 +231,14 @@ struct xfs_cil_ctx {
        struct work_struct      push_work;
 };
 
+/*
+ * Per-cpu CIL tracking items
+ */
+struct xlog_cil_pcp {
+       struct list_head        busy_extents;
+       struct list_head        log_items;
+};
+
 /*
  * Committed Item List structure
  *
@@ -266,6 +274,11 @@ struct xfs_cil {
        wait_queue_head_t       xc_start_wait;
        xfs_csn_t               xc_current_sequence;
        wait_queue_head_t       xc_push_wait;   /* background push throttle */
+
+       void __percpu           *xc_pcp;        /* percpu CIL structures */
+#ifdef CONFIG_HOTPLUG_CPU
+       struct list_head        xc_pcp_list;
+#endif
 } ____cacheline_aligned_in_smp;
 
 /* xc_flags bit values */
@@ -688,4 +701,9 @@ xlog_kvmalloc(
        return p;
 }
 
+/*
+ * CIL CPU dead notifier
+ */
+void xlog_cil_pcp_dead(struct xlog *log, unsigned int cpu);
+
 #endif /* __XFS_LOG_PRIV_H__ */
index aa977c7ea370b0fba0140bf9d9be8fc129aa9bb0..1e02ec67c3a087c5b3f369342cd624dea82e0113 100644 (file)
@@ -2213,6 +2213,7 @@ xfs_cpu_dead(
        list_for_each_entry_safe(mp, n, &xfs_mount_list, m_mount_list) {
                spin_unlock(&xfs_mount_list_lock);
                xfs_inodegc_cpu_dead(mp, cpu);
+               xlog_cil_pcp_dead(mp->m_log, cpu);
                spin_lock(&xfs_mount_list_lock);
        }
        spin_unlock(&xfs_mount_list_lock);