xfs: reduce transaction reservations with reflink
authorDarrick J. Wong <djwong@kernel.org>
Tue, 26 Apr 2022 01:38:14 +0000 (18:38 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 28 Apr 2022 17:25:42 +0000 (10:25 -0700)
Before to the introduction of deferred refcount operations, reflink
would try to cram refcount btree updates into the same transaction as an
allocation or a free event.  Mainline XFS has never actually done that,
but we never refactored the transaction reservations to reflect that we
now do all refcount updates in separate transactions.  Fix this to
reduce the transaction reservation size even farther, so that between
this patch and the previous one, we reduce the tr_write and tr_itruncate
sizes by 66%.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/libxfs/xfs_log_rlimit.c
fs/xfs/libxfs/xfs_refcount.c
fs/xfs/libxfs/xfs_trans_resv.c
fs/xfs/libxfs/xfs_trans_resv.h

index 60fff8c6716fcc790c043860aaeebb01a6193c89..9975b93a7412d8cd041d4fc8cbcc57f9e4c8be00 100644 (file)
@@ -80,6 +80,18 @@ xfs_log_calc_trans_resv_for_minlogblocks(
                resv->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
        }
 
+       /*
+        * In the early days of reflink, we did not use deferred refcount
+        * update log items, so log reservations must be recomputed using the
+        * old calculations.
+        */
+       resv->tr_write.tr_logres =
+                       xfs_calc_write_reservation_minlogsize(mp);
+       resv->tr_itruncate.tr_logres =
+                       xfs_calc_itruncate_reservation_minlogsize(mp);
+       resv->tr_qm_dqalloc.tr_logres =
+                       xfs_calc_qm_dqalloc_reservation_minlogsize(mp);
+
        /* Put everything back the way it was.  This goes at the end. */
        mp->m_rmap_maxlevels = rmap_maxlevels;
 }
index a07ebaecba73eb01a8bc2abe34a7a0bafa6d82de..e53544d52ee2ae978b9e68d0aaeab9c5801de002 100644 (file)
@@ -886,8 +886,13 @@ xfs_refcount_still_have_space(
 {
        unsigned long                   overhead;
 
-       overhead = cur->bc_ag.refc.shape_changes *
-                       xfs_allocfree_log_count(cur->bc_mp, 1);
+       /*
+        * Worst case estimate: full splits of the free space and rmap btrees
+        * to handle each of the shape changes to the refcount btree.
+        */
+       overhead = xfs_allocfree_log_count(cur->bc_mp,
+                               cur->bc_ag.refc.shape_changes);
+       overhead += cur->bc_mp->m_refc_maxlevels;
        overhead *= cur->bc_mp->m_sb.sb_blocksize;
 
        /*
index 60be82cd491b2f06007e60ae74e83c8031f680a1..ab688929d884adf2286729c4dccd62f45fa7948b 100644 (file)
@@ -56,8 +56,7 @@ xfs_calc_buf_res(
  * Per-extent log reservation for the btree changes involved in freeing or
  * allocating an extent.  In classic XFS there were two trees that will be
  * modified (bnobt + cntbt).  With rmap enabled, there are three trees
- * (rmapbt).  With reflink, there are four trees (refcountbt).  The number of
- * blocks reserved is based on the formula:
+ * (rmapbt).  The number of blocks reserved is based on the formula:
  *
  * num trees * ((2 blocks/level * max depth) - 1)
  *
@@ -73,12 +72,23 @@ xfs_allocfree_log_count(
        blocks = num_ops * 2 * (2 * mp->m_alloc_maxlevels - 1);
        if (xfs_has_rmapbt(mp))
                blocks += num_ops * (2 * mp->m_rmap_maxlevels - 1);
-       if (xfs_has_reflink(mp))
-               blocks += num_ops * (2 * mp->m_refc_maxlevels - 1);
 
        return blocks;
 }
 
+/*
+ * Per-extent log reservation for refcount btree changes.  These are never done
+ * in the same transaction as an allocation or a free, so we compute them
+ * separately.
+ */
+static unsigned int
+xfs_refcountbt_block_count(
+       struct xfs_mount        *mp,
+       unsigned int            num_ops)
+{
+       return num_ops * (2 * mp->m_refc_maxlevels - 1);
+}
+
 /*
  * Logging inodes is really tricksy. They are logged in memory format,
  * which means that what we write into the log doesn't directly translate into
@@ -233,6 +243,28 @@ xfs_rtalloc_log_count(
  * register overflow from temporaries in the calculations.
  */
 
+/*
+ * Compute the log reservation required to handle the refcount update
+ * transaction.  Refcount updates are always done via deferred log items.
+ *
+ * This is calculated as:
+ * Data device refcount updates (t1):
+ *    the agfs of the ags containing the blocks: nr_ops * sector size
+ *    the refcount btrees: nr_ops * 1 trees * (2 * max depth - 1) * block size
+ */
+static unsigned int
+xfs_calc_refcountbt_reservation(
+       struct xfs_mount        *mp,
+       unsigned int            nr_ops)
+{
+       unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
+
+       if (!xfs_has_reflink(mp))
+               return 0;
+
+       return xfs_calc_buf_res(nr_ops, mp->m_sb.sb_sectsize) +
+              xfs_calc_buf_res(xfs_refcountbt_block_count(mp, nr_ops), blksz);
+}
 
 /*
  * In a write transaction we can allocate a maximum of 2
@@ -255,12 +287,14 @@ xfs_rtalloc_log_count(
  *    the agfls of the ags containing the blocks: 2 * sector size
  *    the super block free block counter: sector size
  *    the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And any refcount updates that happen in a separate transaction (t4).
  */
 STATIC uint
 xfs_calc_write_reservation(
-       struct xfs_mount        *mp)
+       struct xfs_mount        *mp,
+       bool                    for_minlogsize)
 {
-       unsigned int            t1, t2, t3;
+       unsigned int            t1, t2, t3, t4;
        unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
 
        t1 = xfs_calc_inode_res(mp, 1) +
@@ -282,7 +316,36 @@ xfs_calc_write_reservation(
        t3 = xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) +
             xfs_calc_buf_res(xfs_allocfree_log_count(mp, 2), blksz);
 
-       return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+       /*
+        * In the early days of reflink, we included enough reservation to log
+        * two refcountbt splits for each transaction.  The codebase runs
+        * refcountbt updates in separate transactions now, so to compute the
+        * minimum log size, add the refcountbtree splits back to t1 and t3 and
+        * do not account them separately as t4.  Reflink did not support
+        * realtime when the reservations were established, so no adjustment to
+        * t2 is needed.
+        */
+       if (for_minlogsize) {
+               unsigned int    adj = 0;
+
+               if (xfs_has_reflink(mp))
+                       adj = xfs_calc_buf_res(
+                                       xfs_refcountbt_block_count(mp, 2),
+                                       blksz);
+               t1 += adj;
+               t3 += adj;
+               return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+       }
+
+       t4 = xfs_calc_refcountbt_reservation(mp, 1);
+       return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+}
+
+unsigned int
+xfs_calc_write_reservation_minlogsize(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_write_reservation(mp, true);
 }
 
 /*
@@ -304,12 +367,14 @@ xfs_calc_write_reservation(
  *    the realtime summary: 2 exts * 1 block
  *    worst case split in allocation btrees per extent assuming 2 extents:
  *             2 exts * 2 trees * (2 * max depth - 1) * block size
+ * And any refcount updates that happen in a separate transaction (t4).
  */
 STATIC uint
 xfs_calc_itruncate_reservation(
-       struct xfs_mount        *mp)
+       struct xfs_mount        *mp,
+       bool                    for_minlogsize)
 {
-       unsigned int            t1, t2, t3;
+       unsigned int            t1, t2, t3, t4;
        unsigned int            blksz = XFS_FSB_TO_B(mp, 1);
 
        t1 = xfs_calc_inode_res(mp, 1) +
@@ -326,7 +391,33 @@ xfs_calc_itruncate_reservation(
                t3 = 0;
        }
 
-       return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+       /*
+        * In the early days of reflink, we included enough reservation to log
+        * four refcountbt splits in the same transaction as bnobt/cntbt
+        * updates.  The codebase runs refcountbt updates in separate
+        * transactions now, so to compute the minimum log size, add the
+        * refcount btree splits back here and do not compute them separately
+        * as t4.  Reflink did not support realtime when the reservations were
+        * established, so do not adjust t3.
+        */
+       if (for_minlogsize) {
+               if (xfs_has_reflink(mp))
+                       t2 += xfs_calc_buf_res(
+                                       xfs_refcountbt_block_count(mp, 4),
+                                       blksz);
+
+               return XFS_DQUOT_LOGRES(mp) + max3(t1, t2, t3);
+       }
+
+       t4 = xfs_calc_refcountbt_reservation(mp, 2);
+       return XFS_DQUOT_LOGRES(mp) + max(t4, max3(t1, t2, t3));
+}
+
+unsigned int
+xfs_calc_itruncate_reservation_minlogsize(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_itruncate_reservation(mp, true);
 }
 
 /*
@@ -792,13 +883,21 @@ xfs_calc_qm_setqlim_reservation(void)
  */
 STATIC uint
 xfs_calc_qm_dqalloc_reservation(
-       struct xfs_mount        *mp)
+       struct xfs_mount        *mp,
+       bool                    for_minlogsize)
 {
-       return xfs_calc_write_reservation(mp) +
+       return xfs_calc_write_reservation(mp, for_minlogsize) +
                xfs_calc_buf_res(1,
                        XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);
 }
 
+unsigned int
+xfs_calc_qm_dqalloc_reservation_minlogsize(
+       struct xfs_mount        *mp)
+{
+       return xfs_calc_qm_dqalloc_reservation(mp, true);
+}
+
 /*
  * Syncing the incore super block changes to disk.
  *     the super block to reflect the changes: sector size
@@ -821,11 +920,11 @@ xfs_trans_resv_calc(
         * The following transactions are logged in physical format and
         * require a permanent reservation on space.
         */
-       resp->tr_write.tr_logres = xfs_calc_write_reservation(mp);
+       resp->tr_write.tr_logres = xfs_calc_write_reservation(mp, false);
        resp->tr_write.tr_logcount = XFS_WRITE_LOG_COUNT;
        resp->tr_write.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
-       resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp);
+       resp->tr_itruncate.tr_logres = xfs_calc_itruncate_reservation(mp, false);
        resp->tr_itruncate.tr_logcount = XFS_ITRUNCATE_LOG_COUNT;
        resp->tr_itruncate.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
@@ -882,7 +981,8 @@ xfs_trans_resv_calc(
        resp->tr_growrtalloc.tr_logcount = XFS_DEFAULT_PERM_LOG_COUNT;
        resp->tr_growrtalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
-       resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp);
+       resp->tr_qm_dqalloc.tr_logres = xfs_calc_qm_dqalloc_reservation(mp,
+                       false);
        resp->tr_qm_dqalloc.tr_logcount = XFS_WRITE_LOG_COUNT;
        resp->tr_qm_dqalloc.tr_logflags |= XFS_TRANS_PERM_LOG_RES;
 
index fa330e646dc55b4425c995dd17022e897457991f..22b99042127a03ba276824256deb24b193711e1f 100644 (file)
@@ -98,4 +98,8 @@ struct xfs_trans_resv {
 void xfs_trans_resv_calc(struct xfs_mount *mp, struct xfs_trans_resv *resp);
 uint xfs_allocfree_log_count(struct xfs_mount *mp, uint num_ops);
 
+unsigned int xfs_calc_itruncate_reservation_minlogsize(struct xfs_mount *mp);
+unsigned int xfs_calc_write_reservation_minlogsize(struct xfs_mount *mp);
+unsigned int xfs_calc_qm_dqalloc_reservation_minlogsize(struct xfs_mount *mp);
+
 #endif /* __XFS_TRANS_RESV_H__ */