block: fix locking for struct block_device size updates
authorChristoph Hellwig <hch@lst.de>
Sun, 23 Aug 2020 09:10:42 +0000 (11:10 +0200)
committerJens Axboe <axboe@kernel.dk>
Tue, 1 Sep 2020 22:49:25 +0000 (16:49 -0600)
Two different callers use two different mutexes for updating the
block device size, which obviously doesn't help to actually protect
against concurrent updates from the different callers.  In addition
one of the locks, bd_mutex is rather prone to deadlocks with other
parts of the block stack that use it for high level synchronization.

Switch to using a new spinlock protecting just the size updates, as
that is all we need, and make sure everyone does the update through
the proper helper.

This fixes a bug reported with the nvme revalidating disks during a
hot removal operation, which can currently deadlock on bd_mutex.

Reported-by: Xianting Tian <xianting_tian@126.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
block/partitions/core.c
drivers/block/aoe/aoecmd.c
drivers/md/dm.c
drivers/s390/block/dasd_ioctl.c
fs/block_dev.c
include/linux/blk_types.h

index 5b4869c08fb3800ce9ed0a4619b0e3721763e05e..b1c0b50ca92d374cd339a4be3a6c5f7c4b6f47a5 100644 (file)
@@ -592,8 +592,8 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
        if (partition_overlaps(bdev->bd_disk, start, length, partno))
                goto out_unlock;
 
-       part_nr_sects_write(part, (sector_t)length);
-       i_size_write(bdevp->bd_inode, length << SECTOR_SHIFT);
+       part_nr_sects_write(part, length);
+       bd_set_nr_sectors(bdevp, length);
 
        ret = 0;
 out_unlock:
index 6dba4139515517d5012f47591e1b740d15f44a86..313f0b946fe2b347475e98814c1784d96360d96e 100644 (file)
@@ -900,9 +900,7 @@ aoecmd_sleepwork(struct work_struct *work)
                ssize = get_capacity(d->gd);
                bd = bdget_disk(d->gd, 0);
                if (bd) {
-                       inode_lock(bd->bd_inode);
-                       i_size_write(bd->bd_inode, (loff_t)ssize<<9);
-                       inode_unlock(bd->bd_inode);
+                       bd_set_nr_sectors(bd, ssize);
                        bdput(bd);
                }
                spin_lock_irq(&d->lock);
index fb0255d25e4b2ea29e05c2727b067237326858ad..3dedd9cc4fb65287d4c46e020d2970a2230b049d 100644 (file)
@@ -2097,18 +2097,6 @@ static void event_callback(void *context)
        dm_issue_global_event();
 }
 
-/*
- * Protected by md->suspend_lock obtained by dm_swap_table().
- */
-static void __set_size(struct mapped_device *md, sector_t size)
-{
-       lockdep_assert_held(&md->suspend_lock);
-
-       set_capacity(md->disk, size);
-
-       i_size_write(md->bdev->bd_inode, (loff_t)size << SECTOR_SHIFT);
-}
-
 /*
  * Returns old map, which caller must destroy.
  */
@@ -2131,7 +2119,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
        if (size != dm_get_size(md))
                memset(&md->geometry, 0, sizeof(md->geometry));
 
-       __set_size(md, size);
+       set_capacity(md->disk, size);
+       bd_set_nr_sectors(md->bdev, size);
 
        dm_table_event_callback(t, event_callback, md);
 
index 777734d1b4e58cfac5da5b7c7ca3c39d0b5d12eb..faaf5596e31c12443d238ee96876e76c7df8bf9c 100644 (file)
@@ -55,10 +55,7 @@ dasd_ioctl_enable(struct block_device *bdev)
 
        dasd_enable_device(base);
        /* Formatting the dasd device can change the capacity. */
-       mutex_lock(&bdev->bd_mutex);
-       i_size_write(bdev->bd_inode,
-                    (loff_t)get_capacity(base->block->gdp) << 9);
-       mutex_unlock(&bdev->bd_mutex);
+       bd_set_nr_sectors(bdev, get_capacity(base->block->gdp));
        dasd_put_device(base);
        return 0;
 }
@@ -91,9 +88,7 @@ dasd_ioctl_disable(struct block_device *bdev)
         * Set i_size to zero, since read, write, etc. check against this
         * value.
         */
-       mutex_lock(&bdev->bd_mutex);
-       i_size_write(bdev->bd_inode, 0);
-       mutex_unlock(&bdev->bd_mutex);
+       bd_set_nr_sectors(bdev, 0);
        dasd_put_device(base);
        return 0;
 }
index f52597172c8b790cfe4c1019294013222ae3ffda..08158bb2e76c8591a664cc1408fdfbf3304902b1 100644 (file)
@@ -876,6 +876,7 @@ struct block_device *bdget(dev_t dev)
        bdev = &BDEV_I(inode)->bdev;
 
        if (inode->i_state & I_NEW) {
+               spin_lock_init(&bdev->bd_size_lock);
                bdev->bd_contains = NULL;
                bdev->bd_super = NULL;
                bdev->bd_inode = inode;
@@ -1290,6 +1291,7 @@ static void check_disk_size_change(struct gendisk *disk,
 {
        loff_t disk_size, bdev_size;
 
+       spin_lock(&bdev->bd_size_lock);
        disk_size = (loff_t)get_capacity(disk) << 9;
        bdev_size = i_size_read(bdev->bd_inode);
        if (disk_size != bdev_size) {
@@ -1299,11 +1301,15 @@ static void check_disk_size_change(struct gendisk *disk,
                               disk->disk_name, bdev_size, disk_size);
                }
                i_size_write(bdev->bd_inode, disk_size);
-               if (bdev_size > disk_size && __invalidate_device(bdev, false))
+       }
+       bdev->bd_invalidated = 0;
+       spin_unlock(&bdev->bd_size_lock);
+
+       if (bdev_size > disk_size) {
+               if (__invalidate_device(bdev, false))
                        pr_warn("VFS: busy inodes on resized disk %s\n",
                                disk->disk_name);
        }
-       bdev->bd_invalidated = 0;
 }
 
 /**
@@ -1328,13 +1334,10 @@ int revalidate_disk(struct gendisk *disk)
        if (!(disk->flags & GENHD_FL_HIDDEN)) {
                struct block_device *bdev = bdget_disk(disk, 0);
 
-               if (!bdev)
-                       return ret;
-
-               mutex_lock(&bdev->bd_mutex);
-               check_disk_size_change(disk, bdev, ret == 0);
-               mutex_unlock(&bdev->bd_mutex);
-               bdput(bdev);
+               if (bdev) {
+                       check_disk_size_change(disk, bdev, ret == 0);
+                       bdput(bdev);
+               }
        }
        return ret;
 }
@@ -1373,9 +1376,9 @@ EXPORT_SYMBOL(check_disk_change);
 
 void bd_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
-       inode_lock(bdev->bd_inode);
+       spin_lock(&bdev->bd_size_lock);
        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-       inode_unlock(bdev->bd_inode);
+       spin_unlock(&bdev->bd_size_lock);
 }
 EXPORT_SYMBOL(bd_set_nr_sectors);
 
index 4ecf4fed171f0d5e97ec9d41abb9567a6c590c81..5accc2549d2259be67a642532ff945c65470d79f 100644 (file)
@@ -38,6 +38,7 @@ struct block_device {
        /* number of times partitions within this device have been opened. */
        unsigned                bd_part_count;
        int                     bd_invalidated;
+       spinlock_t              bd_size_lock; /* for bd_inode->i_size updates */
        struct gendisk *        bd_disk;
        struct backing_dev_info *bd_bdi;