xfs: speed up directory bestfree block scanning
authorDave Chinner <dchinner@redhat.com>
Thu, 29 Aug 2019 16:04:07 +0000 (09:04 -0700)
committerDarrick J. Wong <darrick.wong@oracle.com>
Sat, 31 Aug 2019 05:43:57 +0000 (22:43 -0700)
When running a "create millions inodes in a directory" test
recently, I noticed we were spending a huge amount of time
converting freespace block headers from disk format to in-memory
format:

 31.47%  [kernel]  [k] xfs_dir2_node_addname
 17.86%  [kernel]  [k] xfs_dir3_free_hdr_from_disk
  3.55%  [kernel]  [k] xfs_dir3_free_bests_p

We shouldn't be hitting the best free block scanning code so hard
when doing sequential directory creates, and it turns out there's
a highly suboptimal loop searching the the best free array in
the freespace block - it decodes the block header before checking
each entry inside a loop, instead of decoding the header once before
running the entry search loop.

This makes a massive difference to create rates. Profile now looks
like this:

  13.15%  [kernel]  [k] xfs_dir2_node_addname
   3.52%  [kernel]  [k] xfs_dir3_leaf_check_int
   3.11%  [kernel]  [k] xfs_log_commit_cil

And the wall time/average file create rate differences are
just as stark:

create time(sec) / rate (files/s)
File count      vanilla     patched
  10k    0.41 / 24.3k    0.42 / 23.8k
  20k    0.74 / 27.0k    0.76 / 26.3k
 100k    3.81 / 26.4k    3.47 / 28.8k
 200k    8.58 / 23.3k    7.19 / 27.8k
   1M   85.69 / 11.7k   48.53 / 20.6k
   2M  280.31 /  7.1k  130.14 / 15.3k

The larger the directory, the bigger the performance improvement.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <darrick.wong@oracle.com>
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
fs/xfs/libxfs/xfs_dir2_node.c

index 93254f45a5f95eb6d50febe7afd2859244c6fab2..a81f56d9e538db561da1f6015e126ae3d0d997b0 100644 (file)
@@ -1750,8 +1750,8 @@ xfs_dir2_node_find_freeblk(
        xfs_dir2_db_t           dbno = -1;
        xfs_dir2_db_t           fbno = -1;
        xfs_fileoff_t           fo;
-       __be16                  *bests;
-       int                     findex;
+       __be16                  *bests = NULL;
+       int                     findex = 0;
        int                     error;
 
        /*
@@ -1781,14 +1781,14 @@ xfs_dir2_node_find_freeblk(
                 */
                ifbno = fblk->blkno;
                fbno = ifbno;
+               xfs_trans_brelse(tp, fbp);
+               fbp = NULL;
+               fblk->bp = NULL;
        }
-       ASSERT(dbno == -1);
-       findex = 0;
 
        /*
         * If we don't have a data block yet, we're going to scan the freespace
-        * blocks looking for one.  Figure out what the highest freespace block
-        * number is.
+        * data for a data block with enough free space in it.
         */
        error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK);
        if (error)
@@ -1799,70 +1799,41 @@ xfs_dir2_node_find_freeblk(
        if (fbno == -1)
                fbno = xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET);
 
-       /*
-        * While we haven't identified a data block, search the freeblock
-        * data for a good data block.  If we find a null freeblock entry,
-        * indicating a hole in the data blocks, remember that.
-        */
-       while (dbno == -1) {
-               /*
-                * If we don't have a freeblock in hand, get the next one.
-                */
-               if (fbp == NULL) {
-                       /*
-                        * If it's ifbno we already looked at it.
-                        */
-                       if (++fbno == ifbno)
-                               fbno++;
-                       /*
-                        * If it's off the end we're done.
-                        */
-                       if (fbno >= lastfbno)
-                               break;
-                       /*
-                        * Read the block.  There can be holes in the
-                        * freespace blocks, so this might not succeed.
-                        * This should be really rare, so there's no reason
-                        * to avoid it.
-                        */
-                       error = xfs_dir2_free_try_read(tp, dp,
-                                       xfs_dir2_db_to_da(args->geo, fbno),
-                                       &fbp);
-                       if (error)
-                               return error;
-                       if (!fbp)
-                               continue;
-                       free = fbp->b_addr;
-                       findex = 0;
-               }
+       for ( ; fbno < lastfbno; fbno++) {
+               /* If it's ifbno we already looked at it. */
+               if (fbno == ifbno)
+                       continue;
+
                /*
-                * Look at the current free entry.  Is it good enough?
-                *
-                * The bests initialisation should be where the bufer is read in
-                * the above branch. But gcc is too stupid to realise that bests
-                * and the freehdr are actually initialised if they are placed
-                * there, so we have to do it here to avoid warnings. Blech.
+                * Read the block.  There can be holes in the freespace blocks,
+                * so this might not succeed.  This should be really rare, so
+                * there's no reason to avoid it.
                 */
+               error = xfs_dir2_free_try_read(tp, dp,
+                               xfs_dir2_db_to_da(args->geo, fbno),
+                               &fbp);
+               if (error)
+                       return error;
+               if (!fbp)
+                       continue;
+
+               free = fbp->b_addr;
                bests = dp->d_ops->free_bests_p(free);
                dp->d_ops->free_hdr_from_disk(&freehdr, free);
-               if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
-                   be16_to_cpu(bests[findex]) >= length)
-                       dbno = freehdr.firstdb + findex;
-               else {
-                       /*
-                        * Are we done with the freeblock?
-                        */
-                       if (++findex == freehdr.nvalid) {
-                               /*
-                                * Drop the block.
-                                */
-                               xfs_trans_brelse(tp, fbp);
-                               fbp = NULL;
-                               if (fblk && fblk->bp)
-                                       fblk->bp = NULL;
+
+               /* Scan the free entry array for a large enough free space. */
+               for (findex = 0; findex < freehdr.nvalid; findex++) {
+                       if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&
+                           be16_to_cpu(bests[findex]) >= length) {
+                               dbno = freehdr.firstdb + findex;
+                               goto found_block;
                        }
                }
+
+               /* Didn't find free space, go on to next free block */
+               xfs_trans_brelse(tp, fbp);
        }
+
 found_block:
        *dbnop = dbno;
        *fbpp = fbp;