xfs: reload entire unlinked bucket lists
authorDarrick J. Wong <djwong@kernel.org>
Mon, 11 Sep 2023 15:39:07 +0000 (08:39 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 12 Sep 2023 17:31:07 +0000 (10:31 -0700)
The previous patch to reload unrecovered unlinked inodes when adding a
newly created inode to the unlinked list is missing a key piece of
functionality.  It doesn't handle the case that someone calls xfs_iget
on an inode that is not the last item in the incore list.  For example,
if at mount time the ondisk iunlink bucket looks like this:

AGI -> 7 -> 22 -> 3 -> NULL

None of these three inodes are cached in memory.  Now let's say that
someone tries to open inode 3 by handle.  We need to walk the list to
make sure that inodes 7 and 22 get loaded cold, and that the
i_prev_unlinked of inode 3 gets set to 22.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
fs/xfs/xfs_export.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_itable.c
fs/xfs/xfs_trace.h

index 1064c2342876807837bf400a87ac9d296bfec9fa..f71ea786a6d2256c75742c58998c47d2eb4067fd 100644 (file)
@@ -146,6 +146,12 @@ xfs_nfs_get_inode(
                return ERR_PTR(error);
        }
 
+       error = xfs_inode_reload_unlinked(ip);
+       if (error) {
+               xfs_irele(ip);
+               return ERR_PTR(error);
+       }
+
        if (VFS_I(ip)->i_generation != generation) {
                xfs_irele(ip);
                return ERR_PTR(-ESTALE);
index 475de8f919bee208553a89f07f5ef3d8e2a32c52..2fd22db528b1a8e1ea28d5b379b4359dc8635426 100644 (file)
@@ -3606,3 +3606,103 @@ xfs_iunlock2_io_mmap(
        if (ip1 != ip2)
                inode_unlock(VFS_I(ip1));
 }
+
+/*
+ * Reload the incore inode list for this inode.  Caller should ensure that
+ * the link count cannot change, either by taking ILOCK_SHARED or otherwise
+ * preventing other threads from executing.
+ */
+int
+xfs_inode_reload_unlinked_bucket(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip)
+{
+       struct xfs_mount        *mp = tp->t_mountp;
+       struct xfs_buf          *agibp;
+       struct xfs_agi          *agi;
+       struct xfs_perag        *pag;
+       xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+       xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
+       xfs_agino_t             prev_agino, next_agino;
+       unsigned int            bucket;
+       bool                    foundit = false;
+       int                     error;
+
+       /* Grab the first inode in the list */
+       pag = xfs_perag_get(mp, agno);
+       error = xfs_ialloc_read_agi(pag, tp, &agibp);
+       xfs_perag_put(pag);
+       if (error)
+               return error;
+
+       bucket = agino % XFS_AGI_UNLINKED_BUCKETS;
+       agi = agibp->b_addr;
+
+       trace_xfs_inode_reload_unlinked_bucket(ip);
+
+       xfs_info_ratelimited(mp,
+ "Found unrecovered unlinked inode 0x%x in AG 0x%x.  Initiating list recovery.",
+                       agino, agno);
+
+       prev_agino = NULLAGINO;
+       next_agino = be32_to_cpu(agi->agi_unlinked[bucket]);
+       while (next_agino != NULLAGINO) {
+               struct xfs_inode        *next_ip = NULL;
+
+               if (next_agino == agino) {
+                       /* Found this inode, set its backlink. */
+                       next_ip = ip;
+                       next_ip->i_prev_unlinked = prev_agino;
+                       foundit = true;
+               }
+               if (!next_ip) {
+                       /* Inode already in memory. */
+                       next_ip = xfs_iunlink_lookup(pag, next_agino);
+               }
+               if (!next_ip) {
+                       /* Inode not in memory, reload. */
+                       error = xfs_iunlink_reload_next(tp, agibp, prev_agino,
+                                       next_agino);
+                       if (error)
+                               break;
+
+                       next_ip = xfs_iunlink_lookup(pag, next_agino);
+               }
+               if (!next_ip) {
+                       /* No incore inode at all?  We reloaded it... */
+                       ASSERT(next_ip != NULL);
+                       error = -EFSCORRUPTED;
+                       break;
+               }
+
+               prev_agino = next_agino;
+               next_agino = next_ip->i_next_unlinked;
+       }
+
+       xfs_trans_brelse(tp, agibp);
+       /* Should have found this inode somewhere in the iunlinked bucket. */
+       if (!error && !foundit)
+               error = -EFSCORRUPTED;
+       return error;
+}
+
+/* Decide if this inode is missing its unlinked list and reload it. */
+int
+xfs_inode_reload_unlinked(
+       struct xfs_inode        *ip)
+{
+       struct xfs_trans        *tp;
+       int                     error;
+
+       error = xfs_trans_alloc_empty(ip->i_mount, &tp);
+       if (error)
+               return error;
+
+       xfs_ilock(ip, XFS_ILOCK_SHARED);
+       if (xfs_inode_unlinked_incomplete(ip))
+               error = xfs_inode_reload_unlinked_bucket(tp, ip);
+       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+       xfs_trans_cancel(tp);
+
+       return error;
+}
index 65aae89255098c6f03a70401ea4fe210f65ee704..a111b5551ecd6f150b2197e600acf2f1f1065338 100644 (file)
@@ -593,4 +593,13 @@ void xfs_end_io(struct work_struct *work);
 int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
 void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
 
+static inline bool
+xfs_inode_unlinked_incomplete(
+       struct xfs_inode        *ip)
+{
+       return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
+}
+int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
+int xfs_inode_reload_unlinked(struct xfs_inode *ip);
+
 #endif /* __XFS_INODE_H__ */
index c2093cb56092bedb2058e3fdd9df13cbd099068b..ccf0c4ff449014b7811ae2b4114cafa208fdf3f5 100644 (file)
@@ -80,6 +80,15 @@ xfs_bulkstat_one_int(
        if (error)
                goto out;
 
+       if (xfs_inode_unlinked_incomplete(ip)) {
+               error = xfs_inode_reload_unlinked_bucket(tp, ip);
+               if (error) {
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+                       xfs_irele(ip);
+                       return error;
+               }
+       }
+
        ASSERT(ip != NULL);
        ASSERT(ip->i_imap.im_blkno != 0);
        inode = VFS_I(ip);
index 7b1cb5d59d8fb15d74358c92d1d03dc05c6bffc0..3926cf7f2a6ed189ccbf2d76f7b8e02db9e02294 100644 (file)
@@ -3849,6 +3849,26 @@ TRACE_EVENT(xfs_iunlink_reload_next,
                  __entry->next_agino)
 );
 
+TRACE_EVENT(xfs_inode_reload_unlinked_bucket,
+       TP_PROTO(struct xfs_inode *ip),
+       TP_ARGS(ip),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_agnumber_t, agno)
+               __field(xfs_agino_t, agino)
+       ),
+       TP_fast_assign(
+               __entry->dev = ip->i_mount->m_super->s_dev;
+               __entry->agno = XFS_INO_TO_AGNO(ip->i_mount, ip->i_ino);
+               __entry->agino = XFS_INO_TO_AGINO(ip->i_mount, ip->i_ino);
+       ),
+       TP_printk("dev %d:%d agno 0x%x agino 0x%x bucket %u",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+                 __entry->agno,
+                 __entry->agino,
+                 __entry->agino % XFS_AGI_UNLINKED_BUCKETS)
+);
+
 DECLARE_EVENT_CLASS(xfs_ag_inode_class,
        TP_PROTO(struct xfs_inode *ip),
        TP_ARGS(ip),