md: remove flag RemoveSynchronized
authorYu Kuai <yukuai3@huawei.com>
Sat, 25 Nov 2023 08:16:00 +0000 (16:16 +0800)
committerSong Liu <song@kernel.org>
Mon, 27 Nov 2023 23:49:04 +0000 (15:49 -0800)
rcu is not used correctly here, because synchronize_rcu() is called
before replacing old value, for example:

remove_and_add_spares   // other path
 synchronize_rcu
 // called before replacing old value
 set_bit(RemoveSynchronized)
                        rcu_read_lock()
                        rdev = conf->mirros[].rdev
 pers->hot_remove_disk
  conf->mirros[].rdev = NULL;
  if (!test_bit(RemoveSynchronized))
   synchronize_rcu
   /*
    * won't be called, and won't wait
    * for concurrent readers to be done.
    */
                        // access rdev after remove_and_add_spares()
                        rcu_read_unlock()

Fortunately, there is a separate rcu protection to prevent such rdev
to be freed:

md_kick_rdev_from_array //other path
rcu_read_lock()
rdev = conf->mirros[].rdev
list_del_rcu(&rdev->same_set)

rcu_read_unlock()
/*
 * rdev can be removed from conf, but
 * rdev won't be freed.
 */
synchronize_rcu()
free rdev

Hence remove this useless flag and prepare to remove rcu protection to
access rdev from 'conf'.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Signed-off-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/20231125081604.3939938-2-yukuai1@huaweicloud.com
drivers/md/md-multipath.c
drivers/md/md.c
drivers/md/md.h
drivers/md/raid1.c
drivers/md/raid10.c
drivers/md/raid5.c

index d22276870283d85d6e6bc4c7642b850dabc4ce16..aa77133f31887968b2f8e3d4c9fa1d8c9cea20d9 100644 (file)
@@ -258,15 +258,6 @@ static int multipath_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                        goto abort;
                }
                p->rdev = NULL;
-               if (!test_bit(RemoveSynchronized, &rdev->flags)) {
-                       synchronize_rcu();
-                       if (atomic_read(&rdev->nr_pending)) {
-                               /* lost the race, try later */
-                               err = -EBUSY;
-                               p->rdev = rdev;
-                               goto abort;
-                       }
-               }
                err = md_integrity_register(mddev);
        }
 abort:
index 466bbcb4e230e7504cf8779e6a0c05d06f6b49a2..71b3397dea475f86cc7e9ea8a419085e9006f2df 100644 (file)
@@ -9244,44 +9244,19 @@ static int remove_and_add_spares(struct mddev *mddev,
        struct md_rdev *rdev;
        int spares = 0;
        int removed = 0;
-       bool remove_some = false;
 
        if (this && test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
                /* Mustn't remove devices when resync thread is running */
                return 0;
 
        rdev_for_each(rdev, mddev) {
-               if ((this == NULL || rdev == this) &&
-                   rdev->raid_disk >= 0 &&
-                   !test_bit(Blocked, &rdev->flags) &&
-                   test_bit(Faulty, &rdev->flags) &&
-                   atomic_read(&rdev->nr_pending)==0) {
-                       /* Faulty non-Blocked devices with nr_pending == 0
-                        * never get nr_pending incremented,
-                        * never get Faulty cleared, and never get Blocked set.
-                        * So we can synchronize_rcu now rather than once per device
-                        */
-                       remove_some = true;
-                       set_bit(RemoveSynchronized, &rdev->flags);
-               }
-       }
-
-       if (remove_some)
-               synchronize_rcu();
-       rdev_for_each(rdev, mddev) {
-               if ((this == NULL || rdev == this) &&
-                   (test_bit(RemoveSynchronized, &rdev->flags) ||
-                    rdev_removeable(rdev))) {
-                       if (mddev->pers->hot_remove_disk(
-                                   mddev, rdev) == 0) {
-                               sysfs_unlink_rdev(mddev, rdev);
-                               rdev->saved_raid_disk = rdev->raid_disk;
-                               rdev->raid_disk = -1;
-                               removed++;
-                       }
+               if ((this == NULL || rdev == this) && rdev_removeable(rdev) &&
+                   !mddev->pers->hot_remove_disk(mddev, rdev)) {
+                       sysfs_unlink_rdev(mddev, rdev);
+                       rdev->saved_raid_disk = rdev->raid_disk;
+                       rdev->raid_disk = -1;
+                       removed++;
                }
-               if (remove_some && test_bit(RemoveSynchronized, &rdev->flags))
-                       clear_bit(RemoveSynchronized, &rdev->flags);
        }
 
        if (removed && mddev->kobj.sd)
index ade83af123a2256c7c653e73cc44bb6dfde4af83..8d881cc597992f1a2af08cb05d07081182860ebd 100644 (file)
@@ -190,11 +190,6 @@ enum flag_bits {
                                 * than other devices in the array
                                 */
        ClusterRemove,
-       RemoveSynchronized,     /* synchronize_rcu() was called after
-                                * this device was known to be faulty,
-                                * so it is safe to remove without
-                                * another synchronize_rcu() call.
-                                */
        ExternalBbl,            /* External metadata provides bad
                                 * block management for a disk
                                 */
index 35d12948e0a963bb0786c19b0c4ad2a5757de876..a678e0e6e102fc8d2b6e42a5f567825811338b9d 100644 (file)
@@ -1863,15 +1863,6 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                        goto abort;
                }
                p->rdev = NULL;
-               if (!test_bit(RemoveSynchronized, &rdev->flags)) {
-                       synchronize_rcu();
-                       if (atomic_read(&rdev->nr_pending)) {
-                               /* lost the race, try later */
-                               err = -EBUSY;
-                               p->rdev = rdev;
-                               goto abort;
-                       }
-               }
                if (conf->mirrors[conf->raid_disks + number].rdev) {
                        /* We just removed a device that is being replaced.
                         * Move down the replacement.  We drain all IO before
index a5927e98dc678f87917152b0d3e8a96c2fe7140d..132a795233386ba8068024bb7631621c2480a0da 100644 (file)
@@ -2247,15 +2247,6 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                goto abort;
        }
        *rdevp = NULL;
-       if (!test_bit(RemoveSynchronized, &rdev->flags)) {
-               synchronize_rcu();
-               if (atomic_read(&rdev->nr_pending)) {
-                       /* lost the race, try later */
-                       err = -EBUSY;
-                       *rdevp = rdev;
-                       goto abort;
-               }
-       }
        if (p->replacement) {
                /* We must have just cleared 'rdev' */
                p->rdev = p->replacement;
index fcc8a44dd4fdc1b05ed3dd736a895da03efcf101..d431e4625cc592bc426b3bb8aaf273680683da38 100644 (file)
@@ -8233,15 +8233,6 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
                goto abort;
        }
        *rdevp = NULL;
-       if (!test_bit(RemoveSynchronized, &rdev->flags)) {
-               lockdep_assert_held(&mddev->reconfig_mutex);
-               synchronize_rcu();
-               if (atomic_read(&rdev->nr_pending)) {
-                       /* lost the race, try later */
-                       err = -EBUSY;
-                       rcu_assign_pointer(*rdevp, rdev);
-               }
-       }
        if (!err) {
                err = log_modify(conf, rdev, false);
                if (err)