xfs: use xfs_defer_pending objects to recover intent items
authorDarrick J. Wong <djwong@kernel.org>
Wed, 22 Nov 2023 18:23:23 +0000 (10:23 -0800)
committerDarrick J. Wong <djwong@kernel.org>
Thu, 7 Dec 2023 02:45:14 +0000 (18:45 -0800)
One thing I never quite got around to doing is porting the log intent
item recovery code to reconstruct the deferred pending work state.  As a
result, each intent item open codes xfs_defer_finish_one in its recovery
method, because that's what the EFI code did before xfs_defer.c even
existed.

This is a gross thing to have left unfixed -- if an EFI cannot proceed
due to busy extents, we end up creating separate new EFIs for each
unfinished work item, which is a change in behavior from what runtime
would have done.

Worse yet, Long Li pointed out that there's a UAF in the recovery code.
The ->commit_pass2 function adds the intent item to the AIL and drops
the refcount.  The one remaining refcount is now owned by the recovery
mechanism (aka the log intent items in the AIL) with the intent of
giving the refcount to the intent done item in the ->iop_recover
function.

However, if something fails later in recovery, xlog_recover_finish will
walk the recovered intent items in the AIL and release them.  If the CIL
hasn't been pushed before that point (which is possible since we don't
force the log until later) then the intent done release will try to free
its associated intent, which has already been freed.

This patch starts to address this mess by having the ->commit_pass2
functions recreate the xfs_defer_pending state.  The next few patches
will fix the recovery functions.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
fs/xfs/libxfs/xfs_defer.c
fs/xfs/libxfs/xfs_defer.h
fs/xfs/libxfs/xfs_log_recover.h
fs/xfs/xfs_attr_item.c
fs/xfs/xfs_bmap_item.c
fs/xfs/xfs_extfree_item.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_priv.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_refcount_item.c
fs/xfs/xfs_rmap_item.c

index f71679ce23b95da4d07536581d15711eafa7fd46..363da37a8e7fbe3207a5da4bfa8c35eede55ff50 100644 (file)
@@ -245,23 +245,53 @@ xfs_defer_create_intents(
        return ret;
 }
 
-STATIC void
+static inline void
 xfs_defer_pending_abort(
+       struct xfs_mount                *mp,
+       struct xfs_defer_pending        *dfp)
+{
+       const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
+
+       trace_xfs_defer_pending_abort(mp, dfp);
+
+       if (dfp->dfp_intent && !dfp->dfp_done) {
+               ops->abort_intent(dfp->dfp_intent);
+               dfp->dfp_intent = NULL;
+       }
+}
+
+static inline void
+xfs_defer_pending_cancel_work(
+       struct xfs_mount                *mp,
+       struct xfs_defer_pending        *dfp)
+{
+       const struct xfs_defer_op_type  *ops = defer_op_types[dfp->dfp_type];
+       struct list_head                *pwi;
+       struct list_head                *n;
+
+       trace_xfs_defer_cancel_list(mp, dfp);
+
+       list_del(&dfp->dfp_list);
+       list_for_each_safe(pwi, n, &dfp->dfp_work) {
+               list_del(pwi);
+               dfp->dfp_count--;
+               trace_xfs_defer_cancel_item(mp, dfp, pwi);
+               ops->cancel_item(pwi);
+       }
+       ASSERT(dfp->dfp_count == 0);
+       kmem_cache_free(xfs_defer_pending_cache, dfp);
+}
+
+STATIC void
+xfs_defer_pending_abort_list(
        struct xfs_mount                *mp,
        struct list_head                *dop_list)
 {
        struct xfs_defer_pending        *dfp;
-       const struct xfs_defer_op_type  *ops;
 
        /* Abort intent items that don't have a done item. */
-       list_for_each_entry(dfp, dop_list, dfp_list) {
-               ops = defer_op_types[dfp->dfp_type];
-               trace_xfs_defer_pending_abort(mp, dfp);
-               if (dfp->dfp_intent && !dfp->dfp_done) {
-                       ops->abort_intent(dfp->dfp_intent);
-                       dfp->dfp_intent = NULL;
-               }
-       }
+       list_for_each_entry(dfp, dop_list, dfp_list)
+               xfs_defer_pending_abort(mp, dfp);
 }
 
 /* Abort all the intents that were committed. */
@@ -271,7 +301,7 @@ xfs_defer_trans_abort(
        struct list_head                *dop_pending)
 {
        trace_xfs_defer_trans_abort(tp, _RET_IP_);
-       xfs_defer_pending_abort(tp->t_mountp, dop_pending);
+       xfs_defer_pending_abort_list(tp->t_mountp, dop_pending);
 }
 
 /*
@@ -389,27 +419,13 @@ xfs_defer_cancel_list(
 {
        struct xfs_defer_pending        *dfp;
        struct xfs_defer_pending        *pli;
-       struct list_head                *pwi;
-       struct list_head                *n;
-       const struct xfs_defer_op_type  *ops;
 
        /*
         * Free the pending items.  Caller should already have arranged
         * for the intent items to be released.
         */
-       list_for_each_entry_safe(dfp, pli, dop_list, dfp_list) {
-               ops = defer_op_types[dfp->dfp_type];
-               trace_xfs_defer_cancel_list(mp, dfp);
-               list_del(&dfp->dfp_list);
-               list_for_each_safe(pwi, n, &dfp->dfp_work) {
-                       list_del(pwi);
-                       dfp->dfp_count--;
-                       trace_xfs_defer_cancel_item(mp, dfp, pwi);
-                       ops->cancel_item(pwi);
-               }
-               ASSERT(dfp->dfp_count == 0);
-               kmem_cache_free(xfs_defer_pending_cache, dfp);
-       }
+       list_for_each_entry_safe(dfp, pli, dop_list, dfp_list)
+               xfs_defer_pending_cancel_work(mp, dfp);
 }
 
 /*
@@ -665,6 +681,39 @@ xfs_defer_add(
        dfp->dfp_count++;
 }
 
+/*
+ * Create a pending deferred work item to replay the recovered intent item
+ * and add it to the list.
+ */
+void
+xfs_defer_start_recovery(
+       struct xfs_log_item             *lip,
+       enum xfs_defer_ops_type         dfp_type,
+       struct list_head                *r_dfops)
+{
+       struct xfs_defer_pending        *dfp;
+
+       dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
+                       GFP_NOFS | __GFP_NOFAIL);
+       dfp->dfp_type = dfp_type;
+       dfp->dfp_intent = lip;
+       INIT_LIST_HEAD(&dfp->dfp_work);
+       list_add_tail(&dfp->dfp_list, r_dfops);
+}
+
+/*
+ * Cancel a deferred work item created to recover a log intent item.  @dfp
+ * will be freed after this function returns.
+ */
+void
+xfs_defer_cancel_recovery(
+       struct xfs_mount                *mp,
+       struct xfs_defer_pending        *dfp)
+{
+       xfs_defer_pending_abort(mp, dfp);
+       xfs_defer_pending_cancel_work(mp, dfp);
+}
+
 /*
  * Move deferred ops from one transaction to another and reset the source to
  * initial state. This is primarily used to carry state forward across
@@ -769,7 +818,7 @@ xfs_defer_ops_capture_abort(
 {
        unsigned short                  i;
 
-       xfs_defer_pending_abort(mp, &dfc->dfc_dfops);
+       xfs_defer_pending_abort_list(mp, &dfc->dfc_dfops);
        xfs_defer_cancel_list(mp, &dfc->dfc_dfops);
 
        for (i = 0; i < dfc->dfc_held.dr_bufs; i++)
index 8788ad5f6a731fbe0fa20faa641eeaa8679a0a98..5dce938ba3d594c0cee79530430eddc463dc2d19 100644 (file)
@@ -125,6 +125,11 @@ void xfs_defer_ops_capture_abort(struct xfs_mount *mp,
                struct xfs_defer_capture *d);
 void xfs_defer_resources_rele(struct xfs_defer_resources *dres);
 
+void xfs_defer_start_recovery(struct xfs_log_item *lip,
+               enum xfs_defer_ops_type dfp_type, struct list_head *r_dfops);
+void xfs_defer_cancel_recovery(struct xfs_mount *mp,
+               struct xfs_defer_pending *dfp);
+
 int __init xfs_defer_init_item_caches(void);
 void xfs_defer_destroy_item_caches(void);
 
index a5100a11faf9cd26943a21410f6f7c0d9fb34e96..271a4ce7375cb576ef3b9069a389292c24f50f1d 100644 (file)
@@ -153,4 +153,7 @@ xlog_recover_resv(const struct xfs_trans_res *r)
        return ret;
 }
 
+void xlog_recover_intent_item(struct xlog *log, struct xfs_log_item *lip,
+               xfs_lsn_t lsn, unsigned int dfp_type);
+
 #endif /* __XFS_LOG_RECOVER_H__ */
index 11e88a76a33c00a99ac107fa4b1e63d534268737..a32716b8cbbd2412a3ba1d9795b94bda3c59a363 100644 (file)
@@ -772,14 +772,8 @@ xlog_recover_attri_commit_pass2(
        attrip = xfs_attri_init(mp, nv);
        memcpy(&attrip->attri_format, attri_formatp, len);
 
-       /*
-        * The ATTRI has two references. One for the ATTRD and one for ATTRI to
-        * ensure it makes it into the AIL. Insert the ATTRI into the AIL
-        * directly and drop the ATTRI reference. Note that
-        * xfs_trans_ail_update() drops the AIL lock.
-        */
-       xfs_trans_ail_insert(log->l_ailp, &attrip->attri_item, lsn);
-       xfs_attri_release(attrip);
+       xlog_recover_intent_item(log, &attrip->attri_item, lsn,
+                       XFS_DEFER_OPS_TYPE_ATTR);
        xfs_attri_log_nameval_put(nv);
        return 0;
 }
index e736a0844c89d655e717fec39f96b939e764dc73..6cbae4fdf43fe4c5883e83c562286efb702e248e 100644 (file)
@@ -681,12 +681,9 @@ xlog_recover_bui_commit_pass2(
        buip = xfs_bui_init(mp);
        xfs_bui_copy_format(&buip->bui_format, bui_formatp);
        atomic_set(&buip->bui_next_extent, bui_formatp->bui_nextents);
-       /*
-        * Insert the intent into the AIL directly and drop one reference so
-        * that finishing or canceling the work will drop the other.
-        */
-       xfs_trans_ail_insert(log->l_ailp, &buip->bui_item, lsn);
-       xfs_bui_release(buip);
+
+       xlog_recover_intent_item(log, &buip->bui_item, lsn,
+                       XFS_DEFER_OPS_TYPE_BMAP);
        return 0;
 }
 
index 3fa8789820ad9d4e5014aa4e00b1e4c3dc3ee874..cf0ddeb7058081a6b050ff133fc72f62b279e5f6 100644 (file)
@@ -820,12 +820,9 @@ xlog_recover_efi_commit_pass2(
                return error;
        }
        atomic_set(&efip->efi_next_extent, efi_formatp->efi_nextents);
-       /*
-        * Insert the intent into the AIL directly and drop one reference so
-        * that finishing or canceling the work will drop the other.
-        */
-       xfs_trans_ail_insert(log->l_ailp, &efip->efi_item, lsn);
-       xfs_efi_release(efip);
+
+       xlog_recover_intent_item(log, &efip->efi_item, lsn,
+                       XFS_DEFER_OPS_TYPE_FREE);
        return 0;
 }
 
index ee206facf0dc065d4328007f7f32c089989c9c11..a1650fc81382f953c515a0a302243cd7361ad03f 100644 (file)
@@ -1542,6 +1542,7 @@ xlog_alloc_log(
        log->l_covered_state = XLOG_STATE_COVER_IDLE;
        set_bit(XLOG_ACTIVE_RECOVERY, &log->l_opstate);
        INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
+       INIT_LIST_HEAD(&log->r_dfops);
 
        log->l_prev_block  = -1;
        /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
index fa3ad1d7b31c85b1196fc1b93ee416f21ab5e98b..e30c06ec20e33bdf0f86c94433be1e54d13964d0 100644 (file)
@@ -407,6 +407,7 @@ struct xlog {
        long                    l_opstate;      /* operational state */
        uint                    l_quotaoffs_flag; /* XFS_DQ_*, for QUOTAOFFs */
        struct list_head        *l_buf_cancel_table;
+       struct list_head        r_dfops;        /* recovered log intent items */
        int                     l_iclog_hsize;  /* size of iclog header */
        int                     l_iclog_heads;  /* # of iclog header sectors */
        uint                    l_sectBBsize;   /* sector size in BBs (2^n) */
index a1e18b24971a28eedcf79b4d43b16778b42133f8..b9d2152a2bad61f939b78c9d76f8419d4ad370bd 100644 (file)
@@ -1723,30 +1723,24 @@ xlog_clear_stale_blocks(
  */
 void
 xlog_recover_release_intent(
-       struct xlog             *log,
-       unsigned short          intent_type,
-       uint64_t                intent_id)
+       struct xlog                     *log,
+       unsigned short                  intent_type,
+       uint64_t                        intent_id)
 {
-       struct xfs_ail_cursor   cur;
-       struct xfs_log_item     *lip;
-       struct xfs_ail          *ailp = log->l_ailp;
+       struct xfs_defer_pending        *dfp, *n;
+
+       list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
+               struct xfs_log_item     *lip = dfp->dfp_intent;
 
-       spin_lock(&ailp->ail_lock);
-       for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0); lip != NULL;
-            lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
                if (lip->li_type != intent_type)
                        continue;
                if (!lip->li_ops->iop_match(lip, intent_id))
                        continue;
 
-               spin_unlock(&ailp->ail_lock);
-               lip->li_ops->iop_release(lip);
-               spin_lock(&ailp->ail_lock);
-               break;
-       }
+               ASSERT(xlog_item_is_intent(lip));
 
-       xfs_trans_ail_cursor_done(&cur);
-       spin_unlock(&ailp->ail_lock);
+               xfs_defer_cancel_recovery(log->l_mp, dfp);
+       }
 }
 
 int
@@ -1939,6 +1933,29 @@ xlog_buf_readahead(
                xfs_buf_readahead(log->l_mp->m_ddev_targp, blkno, len, ops);
 }
 
+/*
+ * Create a deferred work structure for resuming and tracking the progress of a
+ * log intent item that was found during recovery.
+ */
+void
+xlog_recover_intent_item(
+       struct xlog                     *log,
+       struct xfs_log_item             *lip,
+       xfs_lsn_t                       lsn,
+       unsigned int                    dfp_type)
+{
+       ASSERT(xlog_item_is_intent(lip));
+
+       xfs_defer_start_recovery(lip, dfp_type, &log->r_dfops);
+
+       /*
+        * Insert the intent into the AIL directly and drop one reference so
+        * that finishing or canceling the work will drop the other.
+        */
+       xfs_trans_ail_insert(log->l_ailp, lip, lsn);
+       lip->li_ops->iop_unpin(lip, 0);
+}
+
 STATIC int
 xlog_recover_items_pass2(
        struct xlog                     *log,
@@ -2533,29 +2550,22 @@ xlog_abort_defer_ops(
  */
 STATIC int
 xlog_recover_process_intents(
-       struct xlog             *log)
+       struct xlog                     *log)
 {
        LIST_HEAD(capture_list);
-       struct xfs_ail_cursor   cur;
-       struct xfs_log_item     *lip;
-       struct xfs_ail          *ailp;
-       int                     error = 0;
+       struct xfs_defer_pending        *dfp, *n;
+       int                             error = 0;
 #if defined(DEBUG) || defined(XFS_WARN)
-       xfs_lsn_t               last_lsn;
-#endif
+       xfs_lsn_t                       last_lsn;
 
-       ailp = log->l_ailp;
-       spin_lock(&ailp->ail_lock);
-#if defined(DEBUG) || defined(XFS_WARN)
        last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
 #endif
-       for (lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-            lip != NULL;
-            lip = xfs_trans_ail_cursor_next(ailp, &cur)) {
-               const struct xfs_item_ops       *ops;
 
-               if (!xlog_item_is_intent(lip))
-                       break;
+       list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
+               struct xfs_log_item     *lip = dfp->dfp_intent;
+               const struct xfs_item_ops *ops = lip->li_ops;
+
+               ASSERT(xlog_item_is_intent(lip));
 
                /*
                 * We should never see a redo item with a LSN higher than
@@ -2573,19 +2583,22 @@ xlog_recover_process_intents(
                 * The recovery function can free the log item, so we must not
                 * access lip after it returns.
                 */
-               spin_unlock(&ailp->ail_lock);
-               ops = lip->li_ops;
                error = ops->iop_recover(lip, &capture_list);
-               spin_lock(&ailp->ail_lock);
                if (error) {
                        trace_xlog_intent_recovery_failed(log->l_mp, error,
                                        ops->iop_recover);
                        break;
                }
-       }
 
-       xfs_trans_ail_cursor_done(&cur);
-       spin_unlock(&ailp->ail_lock);
+               /*
+                * XXX: @lip could have been freed, so detach the log item from
+                * the pending item before freeing the pending item.  This does
+                * not fix the existing UAF bug that occurs if ->iop_recover
+                * fails after creating the intent done item.
+                */
+               dfp->dfp_intent = NULL;
+               xfs_defer_cancel_recovery(log->l_mp, dfp);
+       }
        if (error)
                goto err;
 
@@ -2606,27 +2619,15 @@ err:
  */
 STATIC void
 xlog_recover_cancel_intents(
-       struct xlog             *log)
+       struct xlog                     *log)
 {
-       struct xfs_log_item     *lip;
-       struct xfs_ail_cursor   cur;
-       struct xfs_ail          *ailp;
-
-       ailp = log->l_ailp;
-       spin_lock(&ailp->ail_lock);
-       lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
-       while (lip != NULL) {
-               if (!xlog_item_is_intent(lip))
-                       break;
+       struct xfs_defer_pending        *dfp, *n;
 
-               spin_unlock(&ailp->ail_lock);
-               lip->li_ops->iop_release(lip);
-               spin_lock(&ailp->ail_lock);
-               lip = xfs_trans_ail_cursor_next(ailp, &cur);
-       }
+       list_for_each_entry_safe(dfp, n, &log->r_dfops, dfp_list) {
+               ASSERT(xlog_item_is_intent(dfp->dfp_intent));
 
-       xfs_trans_ail_cursor_done(&cur);
-       spin_unlock(&ailp->ail_lock);
+               xfs_defer_cancel_recovery(log->l_mp, dfp);
+       }
 }
 
 /*
index 2d4444d61e98766e0b49f1187634b470f66334c9..b88cb2e982278562e5e72ba2c7c6ab78dc9d96e5 100644 (file)
@@ -696,12 +696,9 @@ xlog_recover_cui_commit_pass2(
        cuip = xfs_cui_init(mp, cui_formatp->cui_nextents);
        xfs_cui_copy_format(&cuip->cui_format, cui_formatp);
        atomic_set(&cuip->cui_next_extent, cui_formatp->cui_nextents);
-       /*
-        * Insert the intent into the AIL directly and drop one reference so
-        * that finishing or canceling the work will drop the other.
-        */
-       xfs_trans_ail_insert(log->l_ailp, &cuip->cui_item, lsn);
-       xfs_cui_release(cuip);
+
+       xlog_recover_intent_item(log, &cuip->cui_item, lsn,
+                       XFS_DEFER_OPS_TYPE_REFCOUNT);
        return 0;
 }
 
index 0e0e747028da2e8638114b3268ee87490eed5f97..c30d4a4a14b2a53723f34f91a3094c7e836bc323 100644 (file)
@@ -702,12 +702,9 @@ xlog_recover_rui_commit_pass2(
        ruip = xfs_rui_init(mp, rui_formatp->rui_nextents);
        xfs_rui_copy_format(&ruip->rui_format, rui_formatp);
        atomic_set(&ruip->rui_next_extent, rui_formatp->rui_nextents);
-       /*
-        * Insert the intent into the AIL directly and drop one reference so
-        * that finishing or canceling the work will drop the other.
-        */
-       xfs_trans_ail_insert(log->l_ailp, &ruip->rui_item, lsn);
-       xfs_rui_release(ruip);
+
+       xlog_recover_intent_item(log, &ruip->rui_item, lsn,
+                       XFS_DEFER_OPS_TYPE_RMAP);
        return 0;
 }