xfs: Fix CIL throttle hang when CIL space used going backwards

author Dave Chinner <dchinner@redhat.com>

Fri, 18 Jun 2021 15:21:51 +0000 (08:21 -0700)

committer Darrick J. Wong <djwong@kernel.org>

Mon, 21 Jun 2021 17:06:14 +0000 (10:06 -0700)
author Dave Chinner <dchinner@redhat.com>
Fri, 18 Jun 2021 15:21:51 +0000 (08:21 -0700)
committer Darrick J. Wong <djwong@kernel.org>
Mon, 21 Jun 2021 17:06:14 +0000 (10:06 -0700)
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c

index fb69879e4b2ba7d03726b9b291a54b80251d24ed..14d1fefcbf4cda41da5fb993fb141d272b42ceb0 100644 (file)
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -74,14 +74,12 @@ xfs_buf_item_straddle(
  }
  
  /*
- * This returns the number of log iovecs needed to log the
- * given buf log item.
+ * Return the number of log iovecs and space needed to log the given buf log
+ * item segment.
   *
- * It calculates this as 1 iovec for the buf log format structure
- * and 1 for each stretch of non-contiguous chunks to be logged.
- * Contiguous chunks are logged in a single iovec.
- *
- * If the XFS_BLI_STALE flag has been set, then log nothing.
+ * It calculates this as 1 iovec for the buf log format structure and 1 for each
+ * stretch of non-contiguous chunks to be logged.  Contiguous chunks are logged
+ * in a single iovec.
   */
  STATIC void
  xfs_buf_item_size_segment(
@@ -168,11 +166,8 @@ slow_scan:
  }
  
  /*
- * This returns the number of log iovecs needed to log the given buf log item.
- *
- * It calculates this as 1 iovec for the buf log format structure and 1 for each
- * stretch of non-contiguous chunks to be logged.  Contiguous chunks are logged
- * in a single iovec.
+ * Return the number of log iovecs and space needed to log the given buf log
+ * item.
   *
   * Discontiguous buffers need a format structure per region that is being
   * logged. This makes the changes in the buffer appear to log recovery as though
@@ -182,7 +177,11 @@ slow_scan:
   * what ends up on disk.
   *
   * If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
- * format structures.
+ * format structures. If the item has previously been logged and has dirty
+ * regions, we do not relog them in stale buffers. This has the effect of
+ * reducing the size of the relogged item by the amount of dirty data tracked
+ * by the log item. This can result in the committing transaction reducing the
+ * amount of space being consumed by the CIL.
   */
  STATIC void
  xfs_buf_item_size(
@@ -199,9 +198,9 @@ xfs_buf_item_size(
         ASSERT(atomic_read(&bip->bli_refcount) > 0);
         if (bip->bli_flags & XFS_BLI_STALE) {
                 /*
-                * The buffer is stale, so all we need to log
-                * is the buf log format structure with the
-                * cancel flag in it.
+                * The buffer is stale, so all we need to log is the buf log
+                * format structure with the cancel flag in it as we are never
+                * going to replay the changes tracked in the log item.
                  */
                 trace_xfs_buf_item_size_stale(bip);
                 ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
@@ -216,9 +215,9 @@ xfs_buf_item_size(
  
         if (bip->bli_flags & XFS_BLI_ORDERED) {
                 /*
-                * The buffer has been logged just to order it.
-                * It is not being included in the transaction
-                * commit, so no vectors are used at all.
+                * The buffer has been logged just to order it. It is not being
+                * included in the transaction commit, so no vectors are used at
+                * all.
                  */
                 trace_xfs_buf_item_size_ordered(bip);
                 *nvecs = XFS_LOG_VEC_ORDERED;
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c

index 6764d12342da3e0b6ee8570afdd2d9399107b405..5a2dd33020e2dc34efd51b57f59b38639bf3911b 100644 (file)
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -28,6 +28,20 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)
         return container_of(lip, struct xfs_inode_log_item, ili_item);
  }
  
+/*
+ * The logged size of an inode fork is always the current size of the inode
+ * fork. This means that when an inode fork is relogged, the size of the logged
+ * region is determined by the current state, not the combination of the
+ * previously logged state + the current state. This is different relogging
+ * behaviour to most other log items which will retain the size of the
+ * previously logged changes when smaller regions are relogged.
+ *
+ * Hence operations that remove data from the inode fork (e.g. shortform
+ * dir/attr remove, extent form extent removal, etc), the size of the relogged
+ * inode gets -smaller- rather than stays the same size as the previously logged
+ * size and this can result in the committing transaction reducing the amount of
+ * space being consumed by the CIL.
+ */
  STATIC void
  xfs_inode_item_data_fork_size(
         struct xfs_inode_log_item *iip,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 9d2fa84642894d5dd7fffa00af520da30887e746..903617e6d054c7402f6c013e52649499c10d1fcd 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -670,9 +670,14 @@ xlog_cil_push_work(
         ASSERT(push_seq <= ctx->sequence);
  
         /*
-        * Wake up any background push waiters now this context is being pushed.
+        * As we are about to switch to a new, empty CIL context, we no longer
+        * need to throttle tasks on CIL space overruns. Wake any waiters that
+        * the hard push throttle may have caught so they can start committing
+        * to the new context. The ctx->xc_push_lock provides the serialisation
+        * necessary for safely using the lockless waitqueue_active() check in
+        * this context.
          */
-       if (ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log))
+       if (waitqueue_active(&cil->xc_push_wait))
                 wake_up_all(&cil->xc_push_wait);
  
         /*
@@ -944,7 +949,7 @@ xlog_cil_push_background(
         ASSERT(!list_empty(&cil->xc_cil));
  
         /*
-        * don't do a background push if we haven't used up all the
+        * Don't do a background push if we haven't used up all the
          * space available yet.
          */
         if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) {
@@ -968,9 +973,16 @@ xlog_cil_push_background(
  
         /*
          * If we are well over the space limit, throttle the work that is being
-        * done until the push work on this context has begun.
+        * done until the push work on this context has begun. Enforce the hard
+        * throttle on all transaction commits once it has been activated, even
+        * if the committing transactions have resulted in the space usage
+        * dipping back down under the hard limit.
+        *
+        * The ctx->xc_push_lock provides the serialisation necessary for safely
+        * using the lockless waitqueue_active() check in this context.
          */
-       if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log)) {
+       if (cil->xc_ctx->space_used >= XLOG_CIL_BLOCKING_SPACE_LIMIT(log) ||
+           waitqueue_active(&cil->xc_push_wait)) {
                 trace_xfs_log_cil_wait(log, cil->xc_ctx->ticket);
                 ASSERT(cil->xc_ctx->space_used < log->l_logsize);
                 xlog_wait(&cil->xc_push_wait, &cil->xc_push_lock);
author	Dave Chinner <dchinner@redhat.com>
	Fri, 18 Jun 2021 15:21:51 +0000 (08:21 -0700)
committer	Darrick J. Wong <djwong@kernel.org>
	Mon, 21 Jun 2021 17:06:14 +0000 (10:06 -0700)
fs/xfs/xfs_buf_item.c		patch \| blob \| history
fs/xfs/xfs_inode_item.c		patch \| blob \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| history