xfs: collect errors from inodegc for unlinked inode recovery
authorDave Chinner <dchinner@redhat.com>
Mon, 5 Jun 2023 04:48:15 +0000 (14:48 +1000)
committerDave Chinner <david@fromorbit.com>
Mon, 5 Jun 2023 04:48:15 +0000 (14:48 +1000)
Unlinked list recovery requires errors removing the inode the from
the unlinked list get fed back to the main recovery loop. Now that
we offload the unlinking to the inodegc work, we don't get errors
being fed back when we trip over a corruption that prevents the
inode from being removed from the unlinked list.

This means we never clear the corrupt unlinked list bucket,
resulting in runtime operations eventually tripping over it and
shutting down.

Fix this by collecting inodegc worker errors and feed them
back to the flush caller. This is largely best effort - the only
context that really cares is log recovery, and it only flushes a
single inode at a time so we don't need complex synchronised
handling. Essentially the inodegc workers will capture the first
error that occurs and the next flush will gather them and clear
them. The flush itself will only report the first gathered error.

In the cases where callers can return errors, propagate the
collected inodegc flush error up the error handling chain.

In the case of inode unlinked list recovery, there are several
superfluous calls to flush queued unlinked inodes -
xlog_recover_iunlink_bucket() guarantees that it has flushed the
inodegc and collected errors before it returns. Hence nothing in the
calling path needs to run a flush, even when an error is returned.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Dave Chinner <david@fromorbit.com>
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_super.c
fs/xfs/xfs_trans.c

index 0f60e301eb1fd8b72a9a94a2f062d26ab8e3d19a..453890942d9f556424cac0a394945f05aa057ac3 100644 (file)
@@ -454,6 +454,27 @@ xfs_inodegc_queue_all(
        return ret;
 }
 
+/* Wait for all queued work and collect errors */
+static int
+xfs_inodegc_wait_all(
+       struct xfs_mount        *mp)
+{
+       int                     cpu;
+       int                     error = 0;
+
+       flush_workqueue(mp->m_inodegc_wq);
+       for_each_online_cpu(cpu) {
+               struct xfs_inodegc      *gc;
+
+               gc = per_cpu_ptr(mp->m_inodegc, cpu);
+               if (gc->error && !error)
+                       error = gc->error;
+               gc->error = 0;
+       }
+
+       return error;
+}
+
 /*
  * Check the validity of the inode we just found it the cache
  */
@@ -1491,15 +1512,14 @@ xfs_blockgc_free_space(
        if (error)
                return error;
 
-       xfs_inodegc_flush(mp);
-       return 0;
+       return xfs_inodegc_flush(mp);
 }
 
 /*
  * Reclaim all the free space that we can by scheduling the background blockgc
  * and inodegc workers immediately and waiting for them all to clear.
  */
-void
+int
 xfs_blockgc_flush_all(
        struct xfs_mount        *mp)
 {
@@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all(
        for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
                flush_delayed_work(&pag->pag_blockgc_work);
 
-       xfs_inodegc_flush(mp);
+       return xfs_inodegc_flush(mp);
 }
 
 /*
@@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
  * This is the last chance to make changes to an otherwise unreferenced file
  * before incore reclamation happens.
  */
-static void
+static int
 xfs_inodegc_inactivate(
        struct xfs_inode        *ip)
 {
+       int                     error;
+
        trace_xfs_inode_inactivating(ip);
-       xfs_inactive(ip);
+       error = xfs_inactive(ip);
        xfs_inodegc_set_reclaimable(ip);
+       return error;
+
 }
 
 void
@@ -1880,8 +1904,12 @@ xfs_inodegc_worker(
 
        WRITE_ONCE(gc->shrinker_hits, 0);
        llist_for_each_entry_safe(ip, n, node, i_gclist) {
+               int     error;
+
                xfs_iflags_set(ip, XFS_INACTIVATING);
-               xfs_inodegc_inactivate(ip);
+               error = xfs_inodegc_inactivate(ip);
+               if (error && !gc->error)
+                       gc->error = error;
        }
 
        memalloc_nofs_restore(nofs_flag);
@@ -1905,13 +1933,13 @@ xfs_inodegc_push(
  * Force all currently queued inode inactivation work to run immediately and
  * wait for the work to finish.
  */
-void
+int
 xfs_inodegc_flush(
        struct xfs_mount        *mp)
 {
        xfs_inodegc_push(mp);
        trace_xfs_inodegc_flush(mp, __return_address);
-       flush_workqueue(mp->m_inodegc_wq);
+       return xfs_inodegc_wait_all(mp);
 }
 
 /*
index 87910191a9ddc03a6a8e256824dc5349344fe410..1dcdcb23796edb848e2a4f444ec35f0998d37558 100644 (file)
@@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
                unsigned int iwalk_flags);
 int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
 int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
-void xfs_blockgc_flush_all(struct xfs_mount *mp);
+int xfs_blockgc_flush_all(struct xfs_mount *mp);
 
 void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
 void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
@@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
 
 void xfs_inodegc_worker(struct work_struct *work);
 void xfs_inodegc_push(struct xfs_mount *mp);
-void xfs_inodegc_flush(struct xfs_mount *mp);
+int xfs_inodegc_flush(struct xfs_mount *mp);
 void xfs_inodegc_stop(struct xfs_mount *mp);
 void xfs_inodegc_start(struct xfs_mount *mp);
 void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
index 5808abab786c790d204cf502a6dce1ec6bc309f3..9e62cc50014010df3837f321e0de3db17ee133dd 100644 (file)
@@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
         */
        xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
 
-       /*
-        * Just ignore errors at this point.  There is nothing we can do except
-        * to try to keep going. Make sure it's not a silent error.
-        */
-       error = xfs_trans_commit(tp);
-       if (error)
-               xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
-                       __func__, error);
-
-       return 0;
+       return xfs_trans_commit(tp);
 }
 
 /*
@@ -1693,12 +1684,12 @@ xfs_inode_needs_inactive(
  * now be truncated.  Also, we clear all of the read-ahead state
  * kept for the inode here since the file is now closed.
  */
-void
+int
 xfs_inactive(
        xfs_inode_t     *ip)
 {
        struct xfs_mount        *mp;
-       int                     error;
+       int                     error = 0;
        int                     truncate = 0;
 
        /*
@@ -1736,7 +1727,7 @@ xfs_inactive(
                 * reference to the inode at this point anyways.
                 */
                if (xfs_can_free_eofblocks(ip, true))
-                       xfs_free_eofblocks(ip);
+                       error = xfs_free_eofblocks(ip);
 
                goto out;
        }
@@ -1773,7 +1764,7 @@ xfs_inactive(
        /*
         * Free the inode.
         */
-       xfs_inactive_ifree(ip);
+       error = xfs_inactive_ifree(ip);
 
 out:
        /*
@@ -1781,6 +1772,7 @@ out:
         * the attached dquots.
         */
        xfs_qm_dqdetach(ip);
+       return error;
 }
 
 /*
index 69d21e42c10a5f5d84f180d31397270bb83bf9b7..7547caf2f2abd525010aa046d8b1e93e29d599dd 100644 (file)
@@ -470,7 +470,7 @@ enum layout_break_reason {
        (xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
 
 int            xfs_release(struct xfs_inode *ip);
-void           xfs_inactive(struct xfs_inode *ip);
+int            xfs_inactive(struct xfs_inode *ip);
 int            xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
                           struct xfs_inode **ipp, struct xfs_name *ci_name);
 int            xfs_create(struct mnt_idmap *idmap,
index 322eb2ee6c5506b6d048d548d11f1f7594b0c670..82c81d20459d147ce6f18574037b2850863d0176 100644 (file)
@@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
                         * just to flush the inodegc queue and wait for it to
                         * complete.
                         */
-                       xfs_inodegc_flush(mp);
+                       error = xfs_inodegc_flush(mp);
+                       if (error)
+                               break;
                }
 
                prev_agino = agino;
@@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
        }
 
        if (prev_ip) {
+               int     error2;
+
                ip->i_prev_unlinked = prev_agino;
                xfs_irele(prev_ip);
+
+               error2 = xfs_inodegc_flush(mp);
+               if (error2 && !error)
+                       return error2;
        }
-       xfs_inodegc_flush(mp);
        return error;
 }
 
@@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
                         * bucket and remaining inodes on it unreferenced and
                         * unfreeable.
                         */
-                       xfs_inodegc_flush(pag->pag_mount);
                        xlog_recover_clear_agi_bucket(pag, bucket);
                }
        }
@@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
 
        for_each_perag(log->l_mp, agno, pag)
                xlog_recover_iunlink_ag(pag);
-
-       /*
-        * Flush the pending unlinked inodes to ensure that the inactivations
-        * are fully completed on disk and the incore inodes can be reclaimed
-        * before we signal that recovery is complete.
-        */
-       xfs_inodegc_flush(log->l_mp);
 }
 
 STATIC void
index aaaf5ec13492d2f0b09ae9165be574ba20d80bfd..6c09f89534d385a47bbc85f8154fc1452e0e9ccb 100644 (file)
@@ -62,6 +62,7 @@ struct xfs_error_cfg {
 struct xfs_inodegc {
        struct llist_head       list;
        struct delayed_work     work;
+       int                     error;
 
        /* approximate count of inodes in the list */
        unsigned int            items;
index 7e706255f165027c2c6e161f2878673ac89dad23..4120bd1cba90e492c66ec85ed21d44412e127e8c 100644 (file)
@@ -1100,6 +1100,7 @@ xfs_inodegc_init_percpu(
 #endif
                init_llist_head(&gc->list);
                gc->items = 0;
+               gc->error = 0;
                INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
        }
        return 0;
index f81fbf081b01de3065003ed93e4bfc76cee4ffaf..8c0bfc9a33b116cd74606daa2d6de4fc8362f004 100644 (file)
@@ -290,7 +290,9 @@ retry:
                 * Do not perform a synchronous scan because callers can hold
                 * other locks.
                 */
-               xfs_blockgc_flush_all(mp);
+               error = xfs_blockgc_flush_all(mp);
+               if (error)
+                       return error;
                want_retry = false;
                goto retry;
        }