From: Linus Torvalds Date: Thu, 22 Oct 2020 17:31:08 +0000 (-0700) Subject: Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso... X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=96485e4462604744d66bf4301557d996d80b85eb;p=linux.git Merge tag 'ext4_for_linus' of git://git./linux/kernel/git/tytso/ext4 Pull ext4 updates from Ted Ts'o: "The siginificant new ext4 feature this time around is Harshad's new fast_commit mode. In addition, thanks to Mauricio for fixing a race where mmap'ed pages that are being changed in parallel with a data=journal transaction commit could result in bad checksums in the failure that could cause journal replays to fail. Also notable is Ritesh's buffered write optimization which can result in significant improvements on parallel write workloads. (The kernel test robot reported a 330.6% improvement on fio.write_iops on a 96 core system using DAX) Besides that, we have the usual miscellaneous cleanups and bug fixes" Link: https://lore.kernel.org/r/20200925071217.GO28663@shao2-debian * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (46 commits) ext4: fix invalid inode checksum ext4: add fast commit stats in procfs ext4: add a mount opt to forcefully turn fast commits on ext4: fast commit recovery path jbd2: fast commit recovery path ext4: main fast-commit commit path jbd2: add fast commit machinery ext4 / jbd2: add fast commit initialization ext4: add fast_commit feature and handling for extended mount options doc: update ext4 and journalling docs to include fast commit feature ext4: Detect already used quota file early jbd2: avoid transaction reuse after reformatting ext4: use the normal helper to get the actual inode ext4: fix bs < ps issue reported with dioread_nolock mount opt ext4: data=journal: write-protect pages on j_submit_inode_data_buffers() ext4: data=journal: fixes for ext4_page_mkwrite() jbd2, ext4, ocfs2: introduce/use journal callbacks j_submit|finish_inode_data_buffers() jbd2: introduce/export functions jbd2_journal_submit|finish_inode_data_buffers() ext4: introduce ext4_sb_bread_unmovable() to replace sb_bread_unmovable() ext4: use ext4_sb_bread() instead of sb_bread() ... --- 96485e4462604744d66bf4301557d996d80b85eb diff --cc fs/ext4/ialloc.c index 698ca4a4db5f7,2400a82004359..b215c564bc318 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@@ -742,53 -746,122 +746,169 @@@ not_found return 1; } + int ext4_mark_inode_used(struct super_block *sb, int ino) + { + unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count); + struct buffer_head *inode_bitmap_bh = NULL, *group_desc_bh = NULL; + struct ext4_group_desc *gdp; + ext4_group_t group; + int bit; + int err = -EFSCORRUPTED; + + if (ino < EXT4_FIRST_INO(sb) || ino > max_ino) + goto out; + + group = (ino - 1) / EXT4_INODES_PER_GROUP(sb); + bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb); + inode_bitmap_bh = ext4_read_inode_bitmap(sb, group); + if (IS_ERR(inode_bitmap_bh)) + return PTR_ERR(inode_bitmap_bh); + + if (ext4_test_bit(bit, inode_bitmap_bh->b_data)) { + err = 0; + goto out; + } + + gdp = ext4_get_group_desc(sb, group, &group_desc_bh); + if (!gdp || !group_desc_bh) { + err = -EINVAL; + goto out; + } + + ext4_set_bit(bit, inode_bitmap_bh->b_data); + + BUFFER_TRACE(inode_bitmap_bh, "call ext4_handle_dirty_metadata"); + err = ext4_handle_dirty_metadata(NULL, NULL, inode_bitmap_bh); + if (err) { + ext4_std_error(sb, err); + goto out; + } + err = sync_dirty_buffer(inode_bitmap_bh); + if (err) { + ext4_std_error(sb, err); + goto out; + } + + /* We may have to initialize the block bitmap if it isn't already */ + if (ext4_has_group_desc_csum(sb) && + gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { + struct buffer_head *block_bitmap_bh; + + block_bitmap_bh = ext4_read_block_bitmap(sb, group); + if (IS_ERR(block_bitmap_bh)) { + err = PTR_ERR(block_bitmap_bh); + goto out; + } + + BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap"); + err = ext4_handle_dirty_metadata(NULL, NULL, block_bitmap_bh); + sync_dirty_buffer(block_bitmap_bh); + + /* recheck and clear flag under lock if we still need to */ + ext4_lock_group(sb, group); + if (ext4_has_group_desc_csum(sb) && + (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT); + ext4_free_group_clusters_set(sb, gdp, + ext4_free_clusters_after_init(sb, group, gdp)); + ext4_block_bitmap_csum_set(sb, group, gdp, + block_bitmap_bh); + ext4_group_desc_csum_set(sb, group, gdp); + } + ext4_unlock_group(sb, group); + brelse(block_bitmap_bh); + + if (err) { + ext4_std_error(sb, err); + goto out; + } + } + + /* Update the relevant bg descriptor fields */ + if (ext4_has_group_desc_csum(sb)) { + int free; + + ext4_lock_group(sb, group); /* while we modify the bg desc */ + free = EXT4_INODES_PER_GROUP(sb) - + ext4_itable_unused_count(sb, gdp); + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT); + free = 0; + } + + /* + * Check the relative inode number against the last used + * relative inode number in this group. if it is greater + * we need to update the bg_itable_unused count + */ + if (bit >= free) + ext4_itable_unused_set(sb, gdp, + (EXT4_INODES_PER_GROUP(sb) - bit - 1)); + } else { + ext4_lock_group(sb, group); + } + + ext4_free_inodes_set(sb, gdp, ext4_free_inodes_count(sb, gdp) - 1); + if (ext4_has_group_desc_csum(sb)) { + ext4_inode_bitmap_csum_set(sb, group, gdp, inode_bitmap_bh, + EXT4_INODES_PER_GROUP(sb) / 8); + ext4_group_desc_csum_set(sb, group, gdp); + } + + ext4_unlock_group(sb, group); + err = ext4_handle_dirty_metadata(NULL, NULL, group_desc_bh); + sync_dirty_buffer(group_desc_bh); + out: + return err; + } + +static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode, + bool encrypt) +{ + struct super_block *sb = dir->i_sb; + int nblocks = 0; +#ifdef CONFIG_EXT4_FS_POSIX_ACL + struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); + + if (IS_ERR(p)) + return PTR_ERR(p); + if (p) { + int acl_size = p->a_count * sizeof(ext4_acl_entry); + + nblocks += (S_ISDIR(mode) ? 2 : 1) * + __ext4_xattr_set_credits(sb, NULL /* inode */, + NULL /* block_bh */, acl_size, + true /* is_create */); + posix_acl_release(p); + } +#endif + +#ifdef CONFIG_SECURITY + { + int num_security_xattrs = 1; + +#ifdef CONFIG_INTEGRITY + num_security_xattrs++; +#endif + /* + * We assume that security xattrs are never more than 1k. + * In practice they are under 128 bytes. + */ + nblocks += num_security_xattrs * + __ext4_xattr_set_credits(sb, NULL /* inode */, + NULL /* block_bh */, 1024, + true /* is_create */); + } +#endif + if (encrypt) + nblocks += __ext4_xattr_set_credits(sb, + NULL /* inode */, + NULL /* block_bh */, + FSCRYPT_SET_CONTEXT_MAX_SIZE, + true /* is_create */); + return nblocks; +} + /* * There are two policies for allocating an inode. If the new inode is * a directory, then a forward search is made for a block group with both @@@ -818,8 -891,8 +938,8 @@@ struct inode *__ext4_new_inode(handle_ struct inode *ret; ext4_group_t i; ext4_group_t flex_group; - struct ext4_group_info *grp; + struct ext4_group_info *grp = NULL; - int encrypt = 0; + bool encrypt = false; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink)