btrfs: raid56: switch write path to rmw_rbio()

author Qu Wenruo <wqu@suse.com>

Tue, 1 Nov 2022 11:16:09 +0000 (19:16 +0800)

committer David Sterba <dsterba@suse.com>

Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)
author Qu Wenruo <wqu@suse.com>
Tue, 1 Nov 2022 11:16:09 +0000 (19:16 +0800)
committer David Sterba <dsterba@suse.com>
Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c

index 8fd633f01d9eea728dd3b6bf5031afbcc03effa2..ffedbfde95e0d797276a30e13934a54449dd8791 100644 (file)
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -64,9 +64,9 @@ struct sector_ptr {
         unsigned int uptodate:8;
  };
  
-static int __raid56_parity_recover(struct btrfs_raid_bio *rbio);
  static noinline void finish_rmw(struct btrfs_raid_bio *rbio);
-static void rmw_work(struct work_struct *work);
+static void rmw_rbio_work(struct work_struct *work);
+static void rmw_rbio_work_locked(struct work_struct *work);
  static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio);
  static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed);
  static void index_rbio_pages(struct btrfs_raid_bio *rbio);
@@ -816,7 +816,7 @@ static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
                                 start_async_work(next, recover_rbio_work_locked);
                         } else if (next->operation == BTRFS_RBIO_WRITE) {
                                 steal_rbio(rbio, next);
-                               start_async_work(next, rmw_work);
+                               start_async_work(next, rmw_rbio_work_locked);
                         } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
                                 steal_rbio(rbio, next);
                                 start_async_work(next, scrub_parity_work);
@@ -1108,23 +1108,6 @@ static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
         return 0;
  }
  
-/*
- * while we're doing the read/modify/write cycle, we could
- * have errors in reading pages off the disk.  This checks
- * for errors and if we're not able to read the page it'll
- * trigger parity reconstruction.  The rmw will be finished
- * after we've reconstructed the failed stripes
- */
-static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio)
-{
-       if (rbio->faila >= 0 || rbio->failb >= 0) {
-               BUG_ON(rbio->faila == rbio->real_stripes - 1);
-               __raid56_parity_recover(rbio);
-       } else {
-               finish_rmw(rbio);
-       }
-}
-
  static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
  {
         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
@@ -1601,31 +1584,6 @@ static void raid56_bio_end_io(struct bio *bio)
                            &rbio->end_io_work);
  }
  
-/*
- * End io handler for the read phase of the RMW cycle.  All the bios here are
- * physical stripe bios we've read from the disk so we can recalculate the
- * parity of the stripe.
- *
- * This will usually kick off finish_rmw once all the bios are read in, but it
- * may trigger parity reconstruction if we had any errors along the way
- */
-static void raid56_rmw_end_io_work(struct work_struct *work)
-{
-       struct btrfs_raid_bio *rbio =
-               container_of(work, struct btrfs_raid_bio, end_io_work);
-
-       if (atomic_read(&rbio->error) > rbio->bioc->max_errors) {
-               rbio_orig_end_io(rbio, BLK_STS_IOERR);
-               return;
-       }
-
-       /*
-        * This will normally call finish_rmw to start our write but if there
-        * are any failed stripes we'll reconstruct from parity first.
-        */
-       validate_rbio_for_rmw(rbio);
-}
-
  static int rmw_assemble_read_bios(struct btrfs_raid_bio *rbio,
                                   struct bio_list *bio_list)
  {
@@ -1686,122 +1644,6 @@ static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
         return 0;
  }
  
-/*
- * the stripe must be locked by the caller.  It will
- * unlock after all the writes are done
- */
-static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio)
-{
-       int bios_to_read = 0;
-       struct bio_list bio_list;
-       int ret;
-       struct bio *bio;
-
-       bio_list_init(&bio_list);
-
-       ret = alloc_rbio_pages(rbio);
-       if (ret)
-               goto cleanup;
-
-       index_rbio_pages(rbio);
-
-       atomic_set(&rbio->error, 0);
-
-       ret = rmw_assemble_read_bios(rbio, &bio_list);
-       if (ret < 0)
-               goto cleanup;
-
-       bios_to_read = bio_list_size(&bio_list);
-       if (!bios_to_read) {
-               /*
-                * this can happen if others have merged with
-                * us, it means there is nothing left to read.
-                * But if there are missing devices it may not be
-                * safe to do the full stripe write yet.
-                */
-               goto finish;
-       }
-
-       /*
-        * The bioc may be freed once we submit the last bio. Make sure not to
-        * touch it after that.
-        */
-       atomic_set(&rbio->stripes_pending, bios_to_read);
-       INIT_WORK(&rbio->end_io_work, raid56_rmw_end_io_work);
-       while ((bio = bio_list_pop(&bio_list))) {
-               bio->bi_end_io = raid56_bio_end_io;
-
-               if (trace_raid56_read_partial_enabled()) {
-                       struct raid56_bio_trace_info trace_info = { 0 };
-
-                       bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_read_partial(rbio, bio, &trace_info);
-               }
-               submit_bio(bio);
-       }
-       /* the actual write will happen once the reads are done */
-       return 0;
-
-cleanup:
-       rbio_orig_end_io(rbio, BLK_STS_IOERR);
-
-       while ((bio = bio_list_pop(&bio_list)))
-               bio_put(bio);
-
-       return -EIO;
-
-finish:
-       validate_rbio_for_rmw(rbio);
-       return 0;
-}
-
-/*
- * if the upper layers pass in a full stripe, we thank them by only allocating
- * enough pages to hold the parity, and sending it all down quickly.
- */
-static int full_stripe_write(struct btrfs_raid_bio *rbio)
-{
-       int ret;
-
-       ret = alloc_rbio_parity_pages(rbio);
-       if (ret)
-               return ret;
-
-       ret = lock_stripe_add(rbio);
-       if (ret == 0)
-               finish_rmw(rbio);
-       return 0;
-}
-
-/*
- * partial stripe writes get handed over to async helpers.
- * We're really hoping to merge a few more writes into this
- * rbio before calculating new parity
- */
-static int partial_stripe_write(struct btrfs_raid_bio *rbio)
-{
-       int ret;
-
-       ret = lock_stripe_add(rbio);
-       if (ret == 0)
-               start_async_work(rbio, rmw_work);
-       return 0;
-}
-
-/*
- * sometimes while we were reading from the drive to
- * recalculate parity, enough new bios come into create
- * a full stripe.  So we do a check here to see if we can
- * go directly to finish_rmw
- */
-static int __raid56_parity_write(struct btrfs_raid_bio *rbio)
-{
-       /* head off into rmw land if we don't have a full stripe */
-       if (!rbio_is_full(rbio))
-               return partial_stripe_write(rbio);
-       return full_stripe_write(rbio);
-}
-
  /*
   * We use plugging call backs to collect full stripes.
   * Any time we get a partial stripe write while plugged
@@ -1836,28 +1678,22 @@ static int plug_cmp(void *priv, const struct list_head *a,
         return 0;
  }
  
-static void run_plug(struct btrfs_plug_cb *plug)
+static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
  {
+       struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb);
         struct btrfs_raid_bio *cur;
         struct btrfs_raid_bio *last = NULL;
  
-       /*
-        * sort our plug list then try to merge
-        * everything we can in hopes of creating full
-        * stripes.
-        */
         list_sort(NULL, &plug->rbio_list, plug_cmp);
+
         while (!list_empty(&plug->rbio_list)) {
                 cur = list_entry(plug->rbio_list.next,
                                  struct btrfs_raid_bio, plug_list);
                 list_del_init(&cur->plug_list);
  
                 if (rbio_is_full(cur)) {
-                       int ret;
-
-                       /* we have a full stripe, send it down */
-                       ret = full_stripe_write(cur);
-                       BUG_ON(ret);
+                       /* We have a full stripe, queue it down. */
+                       start_async_work(cur, rmw_rbio_work);
                         continue;
                 }
                 if (last) {
@@ -1865,42 +1701,16 @@ static void run_plug(struct btrfs_plug_cb *plug)
                                 merge_rbio(last, cur);
                                 free_raid_bio(cur);
                                 continue;
-
                         }
-                       __raid56_parity_write(last);
+                       start_async_work(last, rmw_rbio_work);
                 }
                 last = cur;
         }
-       if (last) {
-               __raid56_parity_write(last);
-       }
+       if (last)
+               start_async_work(last, rmw_rbio_work);
         kfree(plug);
  }
  
-/*
- * if the unplug comes from schedule, we have to push the
- * work off to a helper thread
- */
-static void unplug_work(struct work_struct *work)
-{
-       struct btrfs_plug_cb *plug;
-       plug = container_of(work, struct btrfs_plug_cb, work);
-       run_plug(plug);
-}
-
-static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
-{
-       struct btrfs_plug_cb *plug;
-       plug = container_of(cb, struct btrfs_plug_cb, cb);
-
-       if (from_schedule) {
-               INIT_WORK(&plug->work, unplug_work);
-               queue_work(plug->info->rmw_workers, &plug->work);
-               return;
-       }
-       run_plug(plug);
-}
-
  /* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
  static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
  {
@@ -1948,19 +1758,13 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
         rbio_add_bio(rbio, bio);
  
         /*
-        * don't plug on full rbios, just get them out the door
+        * Don't plug on full rbios, just get them out the door
          * as quickly as we can
          */
-       if (rbio_is_full(rbio)) {
-               ret = full_stripe_write(rbio);
-               if (ret) {
-                       free_raid_bio(rbio);
-                       goto fail;
-               }
-               return;
-       }
+       if (rbio_is_full(rbio))
+               goto queue_rbio;
  
-       cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug));
+       cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
         if (cb) {
                 plug = container_of(cb, struct btrfs_plug_cb, cb);
                 if (!plug->info) {
@@ -1968,13 +1772,14 @@ void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
                         INIT_LIST_HEAD(&plug->rbio_list);
                 }
                 list_add_tail(&rbio->plug_list, &plug->rbio_list);
-       } else {
-               ret = __raid56_parity_write(rbio);
-               if (ret) {
-                       free_raid_bio(rbio);
-                       goto fail;
-               }
+               return;
         }
+queue_rbio:
+       /*
+        * Either we don't have any existing plug, or we're doing a full stripe,
+        * can queue the rmw work now.
+        */
+       start_async_work(rbio, rmw_rbio_work);
  
         return;
  
@@ -2217,21 +2022,6 @@ static void __raid_recover_end_io(struct btrfs_raid_bio *rbio)
         }
  }
  
-/*
- * This is called only for stripes we've read from disk to reconstruct the
- * parity.
- */
-static void raid_recover_end_io_work(struct work_struct *work)
-{
-       struct btrfs_raid_bio *rbio =
-               container_of(work, struct btrfs_raid_bio, end_io_work);
-
-       if (atomic_read(&rbio->error) > rbio->bioc->max_errors)
-               rbio_orig_end_io(rbio, BLK_STS_IOERR);
-       else
-               __raid_recover_end_io(rbio);
-}
-
  static int recover_assemble_read_bios(struct btrfs_raid_bio *rbio,
                                       struct bio_list *bio_list)
  {
@@ -2348,79 +2138,6 @@ static void recover_rbio_work_locked(struct work_struct *work)
         rbio_orig_end_io(rbio, errno_to_blk_status(ret));
  }
  
-/*
- * reads everything we need off the disk to reconstruct
- * the parity. endio handlers trigger final reconstruction
- * when the IO is done.
- *
- * This is used both for reads from the higher layers and for
- * parity construction required to finish a rmw cycle.
- */
-static int __raid56_parity_recover(struct btrfs_raid_bio *rbio)
-{
-       int bios_to_read = 0;
-       struct bio_list bio_list;
-       int ret;
-       struct bio *bio;
-
-       bio_list_init(&bio_list);
-
-       ret = alloc_rbio_pages(rbio);
-       if (ret)
-               goto cleanup;
-
-       atomic_set(&rbio->error, 0);
-
-       ret = recover_assemble_read_bios(rbio, &bio_list);
-       if (ret < 0)
-               goto cleanup;
-
-       bios_to_read = bio_list_size(&bio_list);
-       if (!bios_to_read) {
-               /*
-                * we might have no bios to read just because the pages
-                * were up to date, or we might have no bios to read because
-                * the devices were gone.
-                */
-               if (atomic_read(&rbio->error) <= rbio->bioc->max_errors) {
-                       __raid_recover_end_io(rbio);
-                       return 0;
-               } else {
-                       goto cleanup;
-               }
-       }
-
-       /*
-        * The bioc may be freed once we submit the last bio. Make sure not to
-        * touch it after that.
-        */
-       atomic_set(&rbio->stripes_pending, bios_to_read);
-       INIT_WORK(&rbio->end_io_work, raid_recover_end_io_work);
-       while ((bio = bio_list_pop(&bio_list))) {
-               bio->bi_end_io = raid56_bio_end_io;
-
-               if (trace_raid56_scrub_read_recover_enabled()) {
-                       struct raid56_bio_trace_info trace_info = { 0 };
-
-                       bio_get_trace_info(rbio, bio, &trace_info);
-                       trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
-               }
-               submit_bio(bio);
-       }
-
-       return 0;
-
-cleanup:
-       if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
-           rbio->operation == BTRFS_RBIO_REBUILD_MISSING)
-               rbio_orig_end_io(rbio, BLK_STS_IOERR);
-
-       while ((bio = bio_list_pop(&bio_list)))
-               bio_put(bio);
-
-       return -EIO;
-}
-
  /*
   * the main entry point for reads from the higher layers.  This
   * is really only called when the normal read path had a failure,
@@ -2529,7 +2246,7 @@ static void submit_write_bios(struct btrfs_raid_bio *rbio,
         }
  }
  
-int rmw_rbio(struct btrfs_raid_bio *rbio)
+static int rmw_rbio(struct btrfs_raid_bio *rbio)
  {
         struct bio_list bio_list;
         int sectornr;
@@ -2615,12 +2332,29 @@ write:
         return ret;
  }
  
-static void rmw_work(struct work_struct *work)
+static void rmw_rbio_work(struct work_struct *work)
+{
+       struct btrfs_raid_bio *rbio;
+       int ret;
+
+       rbio = container_of(work, struct btrfs_raid_bio, work);
+
+       ret = lock_stripe_add(rbio);
+       if (ret == 0) {
+               ret = rmw_rbio(rbio);
+               rbio_orig_end_io(rbio, errno_to_blk_status(ret));
+       }
+}
+
+static void rmw_rbio_work_locked(struct work_struct *work)
  {
         struct btrfs_raid_bio *rbio;
+       int ret;
  
         rbio = container_of(work, struct btrfs_raid_bio, work);
-       raid56_rmw_stripe(rbio);
+
+       ret = rmw_rbio(rbio);
+       rbio_orig_end_io(rbio, errno_to_blk_status(ret));
  }
  
  /*
diff --git a/fs/btrfs/raid56.h b/fs/btrfs/raid56.h

index 0e77c77c5dba21a7ae7d3c1254d9d0d792b02685..445e833fcfcf83a482db4465ac1bdc3624960cb9 100644 (file)
--- a/fs/btrfs/raid56.h
+++ b/fs/btrfs/raid56.h
@@ -185,9 +185,4 @@ void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio);
  int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
  void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
  
-/*
- * Placeholder definition to avoid warning, will be removed when
- * the full write path is migrated.
- */
-int rmw_rbio(struct btrfs_raid_bio *rbio);
  #endif
author	Qu Wenruo <wqu@suse.com>
	Tue, 1 Nov 2022 11:16:09 +0000 (19:16 +0800)
committer	David Sterba <dsterba@suse.com>
	Mon, 5 Dec 2022 17:00:49 +0000 (18:00 +0100)
fs/btrfs/raid56.c		patch \| blob \| history
fs/btrfs/raid56.h		patch \| blob \| history