* locked. The function will walk across all the inodes on the cluster buffer it
  * can find and lock without blocking, and flush them to the cluster buffer.
  *
- * On success, the caller must write out the buffer returned in *bp and
- * release it. On failure, the filesystem will be shut down, the buffer will
- * have been unlocked and released, and EFSCORRUPTED will be returned.
+ * On successful flushing of at least one inode, the caller must write out the
+ * buffer and release it. If no inodes are flushed, -EAGAIN will be returned and
+ * the caller needs to release the buffer. On failure, the filesystem will be
+ * shut down, the buffer will have been unlocked and released, and EFSCORRUPTED
+ * will be returned.
  */
 int
 xfs_iflush_cluster(
-       struct xfs_inode        *ip,
        struct xfs_buf          *bp)
 {
-       struct xfs_mount        *mp = ip->i_mount;
-       struct xfs_perag        *pag;
-       unsigned long           first_index, mask;
-       int                     cilist_size;
-       struct xfs_inode        **cilist;
-       struct xfs_inode        *cip;
-       struct xfs_ino_geometry *igeo = M_IGEO(mp);
-       int                     error = 0;
-       int                     nr_found;
+       struct xfs_mount        *mp = bp->b_mount;
+       struct xfs_log_item     *lip, *n;
+       struct xfs_inode        *ip;
+       struct xfs_inode_log_item *iip;
        int                     clcount = 0;
-       int                     i;
-
-       pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
-
-       cilist_size = igeo->inodes_per_cluster * sizeof(struct xfs_inode *);
-       cilist = kmem_alloc(cilist_size, KM_MAYFAIL|KM_NOFS);
-       if (!cilist)
-               goto out_put;
-
-       mask = ~(igeo->inodes_per_cluster - 1);
-       first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;
-       rcu_read_lock();
-       /* really need a gang lookup range call here */
-       nr_found = radix_tree_gang_lookup(&pag->pag_ici_root, (void**)cilist,
-                                       first_index, igeo->inodes_per_cluster);
-       if (nr_found == 0)
-               goto out_free;
+       int                     error = 0;
 
-       for (i = 0; i < nr_found; i++) {
-               cip = cilist[i];
+       /*
+        * We must use the safe variant here as on shutdown xfs_iflush_abort()
+        * can remove itself from the list.
+        */
+       list_for_each_entry_safe(lip, n, &bp->b_li_list, li_bio_list) {
+               iip = (struct xfs_inode_log_item *)lip;
+               ip = iip->ili_inode;
 
                /*
-                * because this is an RCU protected lookup, we could find a
-                * recently freed or even reallocated inode during the lookup.
-                * We need to check under the i_flags_lock for a valid inode
-                * here. Skip it if it is not valid or the wrong inode.
+                * Quick and dirty check to avoid locks if possible.
                 */
-               spin_lock(&cip->i_flags_lock);
-               if (!cip->i_ino ||
-                   __xfs_iflags_test(cip, XFS_ISTALE)) {
-                       spin_unlock(&cip->i_flags_lock);
+               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK))
+                       continue;
+               if (xfs_ipincount(ip))
                        continue;
-               }
 
                /*
-                * Once we fall off the end of the cluster, no point checking
-                * any more inodes in the list because they will also all be
-                * outside the cluster.
+                * The inode is still attached to the buffer, which means it is
+                * dirty but reclaim might try to grab it. Check carefully for
+                * that, and grab the ilock while still holding the i_flags_lock
+                * to guarantee reclaim will not be able to reclaim this inode
+                * once we drop the i_flags_lock.
                 */
-               if ((XFS_INO_TO_AGINO(mp, cip->i_ino) & mask) != first_index) {
-                       spin_unlock(&cip->i_flags_lock);
-                       break;
+               spin_lock(&ip->i_flags_lock);
+               ASSERT(!__xfs_iflags_test(ip, XFS_ISTALE));
+               if (__xfs_iflags_test(ip, XFS_IRECLAIM | XFS_IFLOCK)) {
+                       spin_unlock(&ip->i_flags_lock);
+                       continue;
                }
-               spin_unlock(&cip->i_flags_lock);
 
                /*
-                * Do an un-protected check to see if the inode is dirty and
-                * is a candidate for flushing.  These checks will be repeated
-                * later after the appropriate locks are acquired.
+                * ILOCK will pin the inode against reclaim and prevent
+                * concurrent transactions modifying the inode while we are
+                * flushing the inode.
                 */
-               if (xfs_inode_clean(cip) && xfs_ipincount(cip) == 0)
+               if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
+                       spin_unlock(&ip->i_flags_lock);
                        continue;
+               }
+               spin_unlock(&ip->i_flags_lock);
 
                /*
-                * Try to get locks.  If any are unavailable or it is pinned,
-                * then this inode cannot be flushed and is skipped.
+                * Skip inodes that are already flush locked as they have
+                * already been written to the buffer.
                 */
-
-               if (!xfs_ilock_nowait(cip, XFS_ILOCK_SHARED))
-                       continue;
-               if (!xfs_iflock_nowait(cip)) {
-                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
+               if (!xfs_iflock_nowait(ip)) {
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
                        continue;
                }
-               if (xfs_ipincount(cip)) {
-                       xfs_ifunlock(cip);
-                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
-                       continue;
-               }
-
 
                /*
-                * Check the inode number again, just to be certain we are not
-                * racing with freeing in xfs_reclaim_inode(). See the comments
-                * in that function for more information as to why the initial
-                * check is not sufficient.
+                * Abort flushing this inode if we are shut down because the
+                * inode may not currently be in the AIL. This can occur when
+                * log I/O failure unpins the inode without inserting into the
+                * AIL, leaving a dirty/unpinned inode attached to the buffer
+                * that otherwise looks like it should be flushed.
                 */
-               if (!cip->i_ino) {
-                       xfs_ifunlock(cip);
-                       xfs_iunlock(cip, XFS_ILOCK_SHARED);
+               if (XFS_FORCED_SHUTDOWN(mp)) {
+                       xfs_iunpin_wait(ip);
+                       /* xfs_iflush_abort() drops the flush lock */
+                       xfs_iflush_abort(ip);
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+                       error = -EIO;
                        continue;
                }
 
-               /*
-                * arriving here means that this inode can be flushed.  First
-                * re-check that it's dirty before flushing.
-                */
-               if (!xfs_inode_clean(cip)) {
-                       error = xfs_iflush(cip, bp);
-                       if (error) {
-                               xfs_iunlock(cip, XFS_ILOCK_SHARED);
-                               goto out_free;
-                       }
-                       clcount++;
-               } else {
-                       xfs_ifunlock(cip);
+               /* don't block waiting on a log force to unpin dirty inodes */
+               if (xfs_ipincount(ip)) {
+                       xfs_ifunlock(ip);
+                       xfs_iunlock(ip, XFS_ILOCK_SHARED);
+                       continue;
                }
-               xfs_iunlock(cip, XFS_ILOCK_SHARED);
-       }
 
-       if (clcount) {
-               XFS_STATS_INC(mp, xs_icluster_flushcnt);
-               XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
+               if (!xfs_inode_clean(ip))
+                       error = xfs_iflush(ip, bp);
+               else
+                       xfs_ifunlock(ip);
+               xfs_iunlock(ip, XFS_ILOCK_SHARED);
+               if (error)
+                       break;
+               clcount++;
        }
 
-out_free:
-       rcu_read_unlock();
-       kmem_free(cilist);
-out_put:
-       xfs_perag_put(pag);
        if (error) {
                bp->b_flags |= XBF_ASYNC;
                xfs_buf_ioend_fail(bp);
                xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+               return error;
        }
-       return error;
+
+       if (!clcount)
+               return -EAGAIN;
+
+       XFS_STATS_INC(mp, xs_icluster_flushcnt);
+       XFS_STATS_ADD(mp, xs_icluster_flushinode, clcount);
+       return 0;
+
 }
 
 /* Release an inode. */