ext4: fast commit may not fallback for ineligible commit
authorXin Yin <yinxin.x@bytedance.com>
Mon, 17 Jan 2022 09:36:54 +0000 (17:36 +0800)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 8 Mar 2022 18:12:32 +0000 (19:12 +0100)
[ Upstream commit e85c81ba8859a4c839bcd69c5d83b32954133a5b ]

For the follow scenario:
1. jbd start commit transaction n
2. task A get new handle for transaction n+1
3. task A do some ineligible actions and mark FC_INELIGIBLE
4. jbd complete transaction n and clean FC_INELIGIBLE
5. task A call fsync

In this case fast commit will not fallback to full commit and
transaction n+1 also not handled by jbd.

Make ext4_fc_mark_ineligible() also record transaction tid for
latest ineligible case, when call ext4_fc_cleanup() check
current transaction tid, if small than latest ineligible tid
do not clear the EXT4_MF_FC_INELIGIBLE.

Reported-by: kernel test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Reported-by: Ritesh Harjani <riteshh@linux.ibm.com>
Suggested-by: Harshad Shirwadkar <harshadshirwadkar@gmail.com>
Signed-off-by: Xin Yin <yinxin.x@bytedance.com>
Link: https://lore.kernel.org/r/20220117093655.35160-2-yinxin.x@bytedance.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/fast_commit.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/namei.c
fs/ext4/super.c
fs/ext4/xattr.c
fs/jbd2/commit.c
fs/jbd2/journal.c
include/linux/jbd2.h

index 7448e24cd54591fe2747f696f5881eb575d19b0d..2b67c7d1677bbe9e9eac2a319d97364208be57c1 100644 (file)
@@ -1746,6 +1746,7 @@ struct ext4_sb_info {
        spinlock_t s_fc_lock;
        struct buffer_head *s_fc_bh;
        struct ext4_fc_stats s_fc_stats;
+       tid_t s_fc_ineligible_tid;
 #ifdef CONFIG_EXT4_DEBUG
        int s_fc_debug_max_replay;
 #endif
@@ -2923,7 +2924,7 @@ void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
                            struct dentry *dentry);
 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
 void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle);
 void ext4_fc_start_update(struct inode *inode);
 void ext4_fc_stop_update(struct inode *inode);
 void ext4_fc_del(struct inode *inode);
index c19813402ce98159676b58e9a7da49fe7d82ea7e..b81c008e667557a0d5b841d5603c338d71da2554 100644 (file)
@@ -5340,7 +5340,7 @@ static int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
 
        down_write(&EXT4_I(inode)->i_data_sem);
        ext4_discard_preallocations(inode, 0);
@@ -5480,7 +5480,7 @@ static int ext4_insert_range(struct inode *inode, loff_t offset, loff_t len)
                ret = PTR_ERR(handle);
                goto out_mmap;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_FALLOC_RANGE, handle);
 
        /* Expand file to avoid data loss if there is error while shifting */
        inode->i_size += len;
index f8915dff82eee0aef2a589e28949a7ea4e4e7acf..96fac6ac18f468b907ec4ad42fa229d2406c19b9 100644 (file)
@@ -302,18 +302,32 @@ restart:
 }
 
 /*
- * Mark file system as fast commit ineligible. This means that next commit
- * operation would result in a full jbd2 commit.
+ * Mark file system as fast commit ineligible, and record latest
+ * ineligible transaction tid. This means until the recorded
+ * transaction, commit operation would result in a full jbd2 commit.
  */
-void ext4_fc_mark_ineligible(struct super_block *sb, int reason)
+void ext4_fc_mark_ineligible(struct super_block *sb, int reason, handle_t *handle)
 {
        struct ext4_sb_info *sbi = EXT4_SB(sb);
+       tid_t tid;
 
        if (!test_opt2(sb, JOURNAL_FAST_COMMIT) ||
            (EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY))
                return;
 
        ext4_set_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       if (handle && !IS_ERR(handle))
+               tid = handle->h_transaction->t_tid;
+       else {
+               read_lock(&sbi->s_journal->j_state_lock);
+               tid = sbi->s_journal->j_running_transaction ?
+                               sbi->s_journal->j_running_transaction->t_tid : 0;
+               read_unlock(&sbi->s_journal->j_state_lock);
+       }
+       spin_lock(&sbi->s_fc_lock);
+       if (sbi->s_fc_ineligible_tid < tid)
+               sbi->s_fc_ineligible_tid = tid;
+       spin_unlock(&sbi->s_fc_lock);
        WARN_ON(reason >= EXT4_FC_REASON_MAX);
        sbi->s_fc_stats.fc_ineligible_reason_count[reason]++;
 }
@@ -389,7 +403,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
        mutex_unlock(&ei->i_fc_lock);
        node = kmem_cache_alloc(ext4_fc_dentry_cachep, GFP_NOFS);
        if (!node) {
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
                mutex_lock(&ei->i_fc_lock);
                return -ENOMEM;
        }
@@ -402,7 +416,7 @@ static int __track_dentry_update(struct inode *inode, void *arg, bool update)
                if (!node->fcd_name.name) {
                        kmem_cache_free(ext4_fc_dentry_cachep, node);
                        ext4_fc_mark_ineligible(inode->i_sb,
-                               EXT4_FC_REASON_NOMEM);
+                               EXT4_FC_REASON_NOMEM, NULL);
                        mutex_lock(&ei->i_fc_lock);
                        return -ENOMEM;
                }
@@ -504,7 +518,7 @@ void ext4_fc_track_inode(handle_t *handle, struct inode *inode)
 
        if (ext4_should_journal_data(inode)) {
                ext4_fc_mark_ineligible(inode->i_sb,
-                                       EXT4_FC_REASON_INODE_JOURNAL_DATA);
+                                       EXT4_FC_REASON_INODE_JOURNAL_DATA, handle);
                return;
        }
 
@@ -1180,7 +1194,7 @@ fallback:
  * Fast commit cleanup routine. This is called after every fast commit and
  * full commit. full is true if we are called after a full commit.
  */
-static void ext4_fc_cleanup(journal_t *journal, int full)
+static void ext4_fc_cleanup(journal_t *journal, int full, tid_t tid)
 {
        struct super_block *sb = journal->j_private;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -1228,7 +1242,10 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
                                &sbi->s_fc_q[FC_Q_MAIN]);
 
        ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
-       ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       if (tid >= sbi->s_fc_ineligible_tid) {
+               sbi->s_fc_ineligible_tid = 0;
+               ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
+       }
 
        if (full)
                sbi->s_fc_bytes = 0;
index b6746cc86cee3f7656c046943fd8cfaa9069fde0..22a5140546fb6e8a9b06ff2b806f8dfb4ef708fc 100644 (file)
@@ -337,7 +337,7 @@ stop_handle:
        return;
 no_delete:
        if (!list_empty(&EXT4_I(inode)->i_fc_list))
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_NOMEM, NULL);
        ext4_clear_inode(inode);        /* We must guarantee clearing of inode... */
 }
 
@@ -5969,7 +5969,7 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
                return PTR_ERR(handle);
 
        ext4_fc_mark_ineligible(inode->i_sb,
-               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE);
+               EXT4_FC_REASON_JOURNAL_FLAG_CHANGE, handle);
        err = ext4_mark_inode_dirty(handle, inode);
        ext4_handle_sync(handle);
        ext4_journal_stop(handle);
index fd70bebb1437076ca35b03e788daa8ad7aec1229..f61b59045c6d3a6e59a1262b2b559defd26a7f58 100644 (file)
@@ -169,7 +169,7 @@ static long swap_inode_boot_loader(struct super_block *sb,
                err = -EINVAL;
                goto err_out;
        }
-       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT);
+       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_SWAP_BOOT, handle);
 
        /* Protect extent tree against block allocations via delalloc */
        ext4_double_down_write_data_sem(inode, inode_bl);
@@ -1075,7 +1075,7 @@ mext_out:
 
                err = ext4_resize_fs(sb, n_blocks_count);
                if (EXT4_SB(sb)->s_journal) {
-                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE);
+                       ext4_fc_mark_ineligible(sb, EXT4_FC_REASON_RESIZE, NULL);
                        jbd2_journal_lock_updates(EXT4_SB(sb)->s_journal);
                        err2 = jbd2_journal_flush(EXT4_SB(sb)->s_journal, 0);
                        jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal);
index da7698341d7d3f98a93b379e939c6cdd5b0ffccb..192ea90e757ccc4982770db45b7c284f190d7e18 100644 (file)
@@ -3889,7 +3889,7 @@ static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
                 * dirents in directories.
                 */
                ext4_fc_mark_ineligible(old.inode->i_sb,
-                       EXT4_FC_REASON_RENAME_DIR);
+                       EXT4_FC_REASON_RENAME_DIR, handle);
        } else {
                if (new.inode)
                        ext4_fc_track_unlink(handle, new.dentry);
@@ -4049,7 +4049,7 @@ static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (unlikely(retval))
                goto end_rename;
        ext4_fc_mark_ineligible(new.inode->i_sb,
-                               EXT4_FC_REASON_CROSS_RENAME);
+                               EXT4_FC_REASON_CROSS_RENAME, handle);
        if (old.dir_bh) {
                retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
                if (retval)
index 538a33c3b11760c0500486e125809a899e250855..eb5d55cf277ce230f4eb727a4674664dd7954c82 100644 (file)
@@ -4620,6 +4620,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        sbi->s_fc_bytes = 0;
        ext4_clear_mount_flag(sb, EXT4_MF_FC_INELIGIBLE);
        ext4_clear_mount_flag(sb, EXT4_MF_FC_COMMITTING);
+       sbi->s_fc_ineligible_tid = 0;
        spin_lock_init(&sbi->s_fc_lock);
        memset(&sbi->s_fc_stats, 0, sizeof(sbi->s_fc_stats));
        sbi->s_fc_replay_state.fc_regions = NULL;
index 1e0fc1ed845bf6b04283844281b1bc056b66f5f9..0423253490986fab93430748c52b61a620610715 100644 (file)
@@ -2408,7 +2408,7 @@ retry_inode:
                if (IS_SYNC(inode))
                        ext4_handle_sync(handle);
        }
-       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
 
 cleanup:
        brelse(is.iloc.bh);
@@ -2486,7 +2486,7 @@ retry:
                if (error == 0)
                        error = error2;
        }
-       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+       ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, NULL);
 
        return error;
 }
@@ -2920,7 +2920,7 @@ int ext4_xattr_delete_inode(handle_t *handle, struct inode *inode,
                                         error);
                        goto cleanup;
                }
-               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR);
+               ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_XATTR, handle);
        }
        error = 0;
 cleanup:
index 3cc4ab2ba7f4f25a90ae1528a49cd6928455c937..d188fa913a0758c5341da94199795619cb69eb2c 100644 (file)
@@ -1170,7 +1170,7 @@ restart_loop:
        if (journal->j_commit_callback)
                journal->j_commit_callback(journal, commit_transaction);
        if (journal->j_fc_cleanup_callback)
-               journal->j_fc_cleanup_callback(journal, 1);
+               journal->j_fc_cleanup_callback(journal, 1, commit_transaction->t_tid);
 
        trace_jbd2_end_commit(journal, commit_transaction);
        jbd_debug(1, "JBD2: commit %d complete, head %d\n",
index bd9ac98916043effa1622f03ea98941ffb3c6fad..1f8493ef181d657b027c6eb23995a8791d1d3afd 100644 (file)
@@ -769,7 +769,7 @@ EXPORT_SYMBOL(jbd2_fc_begin_commit);
 static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
 {
        if (journal->j_fc_cleanup_callback)
-               journal->j_fc_cleanup_callback(journal, 0);
+               journal->j_fc_cleanup_callback(journal, 0, tid);
        write_lock(&journal->j_state_lock);
        journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
        if (fallback)
index fd933c45281af84c8391e3845021c70518c6b164..d63b8106796e2e419b5013f4a44478e951175c4f 100644 (file)
@@ -1295,7 +1295,7 @@ struct journal_s
         * Clean-up after fast commit or full commit. JBD2 calls this function
         * after every commit operation.
         */
-       void (*j_fc_cleanup_callback)(struct journal_s *journal, int);
+       void (*j_fc_cleanup_callback)(struct journal_s *journal, int full, tid_t tid);
 
        /**
         * @j_fc_replay_callback: