md/raid10: convert resync_lock to use seqlock
authorYu Kuai <yukuai3@huawei.com>
Fri, 16 Sep 2022 11:34:28 +0000 (19:34 +0800)
committerSong Liu <song@kernel.org>
Thu, 22 Sep 2022 07:05:05 +0000 (00:05 -0700)
Currently, wait_barrier() will hold 'resync_lock' to read 'conf->barrier',
and io can't be dispatched until 'barrier' is dropped.

Since holding the 'barrier' is not common, convert 'resync_lock' to use
seqlock so that holding lock can be avoided in fast path.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Reviewed-and-Tested-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Song Liu <song@kernel.org>
drivers/md/raid10.c
drivers/md/raid10.h

index 2970a73d9f5ccafcbcb1b8c1486a4239a060bac4..58c711912875d366fc1d1b3781ddf19375c61297 100644 (file)
@@ -79,6 +79,21 @@ static void end_reshape(struct r10conf *conf);
 
 #include "raid1-10.c"
 
+#define NULL_CMD
+#define cmd_before(conf, cmd) \
+       do { \
+               write_sequnlock_irq(&(conf)->resync_lock); \
+               cmd; \
+       } while (0)
+#define cmd_after(conf) write_seqlock_irq(&(conf)->resync_lock)
+
+#define wait_event_barrier_cmd(conf, cond, cmd) \
+       wait_event_cmd((conf)->wait_barrier, cond, cmd_before(conf, cmd), \
+                      cmd_after(conf))
+
+#define wait_event_barrier(conf, cond) \
+       wait_event_barrier_cmd(conf, cond, NULL_CMD)
+
 /*
  * for resync bio, r10bio pointer can be retrieved from the per-bio
  * 'struct resync_pages'.
@@ -936,30 +951,29 @@ static void flush_pending_writes(struct r10conf *conf)
 
 static void raise_barrier(struct r10conf *conf, int force)
 {
-       spin_lock_irq(&conf->resync_lock);
+       write_seqlock_irq(&conf->resync_lock);
        BUG_ON(force && !conf->barrier);
 
        /* Wait until no block IO is waiting (unless 'force') */
-       wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
-                           conf->resync_lock);
+       wait_event_barrier(conf, force || !conf->nr_waiting);
 
        /* block any new IO from starting */
-       conf->barrier++;
+       WRITE_ONCE(conf->barrier, conf->barrier + 1);
 
        /* Now wait for all pending IO to complete */
-       wait_event_lock_irq(conf->wait_barrier,
-                           !atomic_read(&conf->nr_pending) && conf->barrier < RESYNC_DEPTH,
-                           conf->resync_lock);
+       wait_event_barrier(conf, !atomic_read(&conf->nr_pending) &&
+                                conf->barrier < RESYNC_DEPTH);
 
-       spin_unlock_irq(&conf->resync_lock);
+       write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void lower_barrier(struct r10conf *conf)
 {
        unsigned long flags;
-       spin_lock_irqsave(&conf->resync_lock, flags);
-       conf->barrier--;
-       spin_unlock_irqrestore(&conf->resync_lock, flags);
+
+       write_seqlock_irqsave(&conf->resync_lock, flags);
+       WRITE_ONCE(conf->barrier, conf->barrier - 1);
+       write_sequnlock_irqrestore(&conf->resync_lock, flags);
        wake_up(&conf->wait_barrier);
 }
 
@@ -990,11 +1004,31 @@ static bool stop_waiting_barrier(struct r10conf *conf)
        return false;
 }
 
+static bool wait_barrier_nolock(struct r10conf *conf)
+{
+       unsigned int seq = read_seqbegin(&conf->resync_lock);
+
+       if (READ_ONCE(conf->barrier))
+               return false;
+
+       atomic_inc(&conf->nr_pending);
+       if (!read_seqretry(&conf->resync_lock, seq))
+               return true;
+
+       if (atomic_dec_and_test(&conf->nr_pending))
+               wake_up_barrier(conf);
+
+       return false;
+}
+
 static bool wait_barrier(struct r10conf *conf, bool nowait)
 {
        bool ret = true;
 
-       spin_lock_irq(&conf->resync_lock);
+       if (wait_barrier_nolock(conf))
+               return true;
+
+       write_seqlock_irq(&conf->resync_lock);
        if (conf->barrier) {
                /* Return false when nowait flag is set */
                if (nowait) {
@@ -1002,9 +1036,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
                } else {
                        conf->nr_waiting++;
                        raid10_log(conf->mddev, "wait barrier");
-                       wait_event_lock_irq(conf->wait_barrier,
-                                           stop_waiting_barrier(conf),
-                                           conf->resync_lock);
+                       wait_event_barrier(conf, stop_waiting_barrier(conf));
                        conf->nr_waiting--;
                }
                if (!conf->nr_waiting)
@@ -1013,7 +1045,7 @@ static bool wait_barrier(struct r10conf *conf, bool nowait)
        /* Only increment nr_pending when we wait */
        if (ret)
                atomic_inc(&conf->nr_pending);
-       spin_unlock_irq(&conf->resync_lock);
+       write_sequnlock_irq(&conf->resync_lock);
        return ret;
 }
 
@@ -1038,27 +1070,24 @@ static void freeze_array(struct r10conf *conf, int extra)
         * must match the number of pending IOs (nr_pending) before
         * we continue.
         */
-       spin_lock_irq(&conf->resync_lock);
+       write_seqlock_irq(&conf->resync_lock);
        conf->array_freeze_pending++;
-       conf->barrier++;
+       WRITE_ONCE(conf->barrier, conf->barrier + 1);
        conf->nr_waiting++;
-       wait_event_lock_irq_cmd(conf->wait_barrier,
-                               atomic_read(&conf->nr_pending) == conf->nr_queued+extra,
-                               conf->resync_lock,
-                               flush_pending_writes(conf));
-
+       wait_event_barrier_cmd(conf, atomic_read(&conf->nr_pending) ==
+                       conf->nr_queued + extra, flush_pending_writes(conf));
        conf->array_freeze_pending--;
-       spin_unlock_irq(&conf->resync_lock);
+       write_sequnlock_irq(&conf->resync_lock);
 }
 
 static void unfreeze_array(struct r10conf *conf)
 {
        /* reverse the effect of the freeze */
-       spin_lock_irq(&conf->resync_lock);
-       conf->barrier--;
+       write_seqlock_irq(&conf->resync_lock);
+       WRITE_ONCE(conf->barrier, conf->barrier - 1);
        conf->nr_waiting--;
        wake_up(&conf->wait_barrier);
-       spin_unlock_irq(&conf->resync_lock);
+       write_sequnlock_irq(&conf->resync_lock);
 }
 
 static sector_t choose_data_offset(struct r10bio *r10_bio,
@@ -4045,7 +4074,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
        INIT_LIST_HEAD(&conf->retry_list);
        INIT_LIST_HEAD(&conf->bio_end_io_list);
 
-       spin_lock_init(&conf->resync_lock);
+       seqlock_init(&conf->resync_lock);
        init_waitqueue_head(&conf->wait_barrier);
        atomic_set(&conf->nr_pending, 0);
 
@@ -4364,7 +4393,7 @@ static void *raid10_takeover_raid0(struct mddev *mddev, sector_t size, int devs)
                                rdev->new_raid_disk = rdev->raid_disk * 2;
                                rdev->sectors = size;
                        }
-               conf->barrier = 1;
+               WRITE_ONCE(conf->barrier, 1);
        }
 
        return conf;
index 5c0804d8bb1fe7e37b1e467ba05582ccfb625f17..8c072ce0bc542038837ed5640afe475db18b51ba 100644 (file)
@@ -76,7 +76,7 @@ struct r10conf {
        /* queue pending writes and submit them on unplug */
        struct bio_list         pending_bio_list;
 
-       spinlock_t              resync_lock;
+       seqlock_t               resync_lock;
        atomic_t                nr_pending;
        int                     nr_waiting;
        int                     nr_queued;