bcachefs: data jobs, including rebalance wait for copygc.
authorDaniel Hill <daniel@gluo.nz>
Wed, 15 Jun 2022 14:06:43 +0000 (02:06 +1200)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:35 +0000 (17:09 -0400)
move_ratelimit() now has a bool that specifies whether we want to
wait for copygc to finish.

When copygc is running, we're probably low on free buckets instead
of consuming the remaining buckets, we want to wait for copygc to
finish.

This should help with performance, and run away bucket fragmentation.

Signed-off-by: Daniel Hill <daniel@gluo.nz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/move.c
fs/bcachefs/move.h
fs/bcachefs/movinggc.c
fs/bcachefs/rebalance.c

index 127323b677df4277893adc8bcaab44308b46af57..c07ea9af561d8cf84fbe55d620b787c2a9196903 100644 (file)
@@ -825,6 +825,8 @@ mempool_t           bio_bounce_pages;
        copygc_heap             copygc_heap;
        struct write_point      copygc_write_point;
        s64                     copygc_wait;
+       bool                    copygc_running;
+       wait_queue_head_t       copygc_running_wq;
 
        /* DATA PROGRESS STATS */
        struct list_head        data_progress_list;
index 4060678cf7168b866bb56287e4aba34be3b104c1..fad15ba7d239b6732443ca8481d314c24578bbca 100644 (file)
@@ -237,24 +237,72 @@ err:
        return ret;
 }
 
+static int move_ratelimit(struct btree_trans *trans,
+                         struct moving_context *ctxt,
+                         struct bch_ratelimit *rate,
+                         bool wait_on_copygc)
+{
+       struct bch_fs *c = trans->c;
+       u64 delay;
+
+       if (wait_on_copygc) {
+               bch2_trans_unlock(trans);
+               wait_event_killable(c->copygc_running_wq,
+                                   !c->copygc_running ||
+                                   kthread_should_stop());
+       }
+
+       do {
+               delay = rate ? bch2_ratelimit_delay(rate) : 0;
+
+               if (delay) {
+                       bch2_trans_unlock(trans);
+                       set_current_state(TASK_INTERRUPTIBLE);
+               }
+
+               if ((current->flags & PF_KTHREAD) && kthread_should_stop()) {
+                       __set_current_state(TASK_RUNNING);
+                       return 1;
+               }
+
+               if (delay)
+                       schedule_timeout(delay);
+
+               if (unlikely(freezing(current))) {
+                       move_ctxt_wait_event(ctxt, trans, list_empty(&ctxt->reads));
+                       try_to_freeze();
+               }
+       } while (delay);
+
+       move_ctxt_wait_event(ctxt, trans,
+               atomic_read(&ctxt->write_sectors) <
+               c->opts.move_bytes_in_flight >> 9);
+
+       move_ctxt_wait_event(ctxt, trans,
+               atomic_read(&ctxt->read_sectors) <
+               c->opts.move_bytes_in_flight >> 9);
+
+       return 0;
+}
+
 static int __bch2_move_data(struct bch_fs *c,
-               struct moving_context *ctxt,
-               struct bch_ratelimit *rate,
-               struct write_point_specifier wp,
-               struct bpos start,
-               struct bpos end,
-               move_pred_fn pred, void *arg,
-               struct bch_move_stats *stats,
-               enum btree_id btree_id)
+                           struct moving_context *ctxt,
+                           struct bch_ratelimit *rate,
+                           struct write_point_specifier wp,
+                           struct bpos start,
+                           struct bpos end,
+                           move_pred_fn pred, void *arg,
+                           struct bch_move_stats *stats,
+                           enum btree_id btree_id,
+                           bool wait_on_copygc)
 {
-       bool kthread = (current->flags & PF_KTHREAD) != 0;
        struct bch_io_opts io_opts = bch2_opts_to_inode_opts(c->opts);
        struct bkey_buf sk;
        struct btree_trans trans;
        struct btree_iter iter;
        struct bkey_s_c k;
        struct data_update_opts data_opts;
-       u64 delay, cur_inum = U64_MAX;
+       u64 cur_inum = U64_MAX;
        int ret = 0, ret2;
 
        bch2_bkey_buf_init(&sk);
@@ -271,37 +319,7 @@ static int __bch2_move_data(struct bch_fs *c,
        if (rate)
                bch2_ratelimit_reset(rate);
 
-       while (1) {
-               do {
-                       delay = rate ? bch2_ratelimit_delay(rate) : 0;
-
-                       if (delay) {
-                               bch2_trans_unlock(&trans);
-                               set_current_state(TASK_INTERRUPTIBLE);
-                       }
-
-                       if (kthread && (ret = kthread_should_stop())) {
-                               __set_current_state(TASK_RUNNING);
-                               goto out;
-                       }
-
-                       if (delay)
-                               schedule_timeout(delay);
-
-                       if (unlikely(freezing(current))) {
-                               move_ctxt_wait_event(ctxt, &trans, list_empty(&ctxt->reads));
-                               try_to_freeze();
-                       }
-               } while (delay);
-
-               move_ctxt_wait_event(ctxt, &trans,
-                       atomic_read(&ctxt->write_sectors) <
-                       c->opts.move_bytes_in_flight >> 9);
-
-               move_ctxt_wait_event(ctxt, &trans,
-                       atomic_read(&ctxt->read_sectors) <
-                       c->opts.move_bytes_in_flight >> 9);
-
+       while (!move_ratelimit(&trans, ctxt, rate, wait_on_copygc)) {
                bch2_trans_begin(&trans);
 
                k = bch2_btree_iter_peek(&iter);
@@ -374,7 +392,6 @@ next:
 next_nondata:
                bch2_btree_iter_advance(&iter);
        }
-out:
 
        bch2_trans_iter_exit(&trans, &iter);
        bch2_trans_exit(&trans);
@@ -413,7 +430,8 @@ int bch2_move_data(struct bch_fs *c,
                   struct bch_ratelimit *rate,
                   struct write_point_specifier wp,
                   move_pred_fn pred, void *arg,
-                  struct bch_move_stats *stats)
+                  struct bch_move_stats *stats,
+                  bool wait_on_copygc)
 {
        struct moving_context ctxt = { .stats = stats };
        enum btree_id id;
@@ -438,7 +456,7 @@ int bch2_move_data(struct bch_fs *c,
                ret = __bch2_move_data(c, &ctxt, rate, wp,
                                       id == start_btree_id ? start_pos : POS_MIN,
                                       id == end_btree_id   ? end_pos   : POS_MAX,
-                                      pred, arg, stats, id);
+                                      pred, arg, stats, id, wait_on_copygc);
                if (ret)
                        break;
        }
@@ -675,7 +693,7 @@ int bch2_data_job(struct bch_fs *c,
                                     op.start_btree,    op.start_pos,
                                     op.end_btree,      op.end_pos,
                                     NULL, writepoint_hashed((unsigned long) current),
-                                    rereplicate_pred, c, stats) ?: ret;
+                                    rereplicate_pred, c, stats, true) ?: ret;
                ret = bch2_replicas_gc2(c) ?: ret;
                break;
        case BCH_DATA_OP_MIGRATE:
@@ -696,7 +714,7 @@ int bch2_data_job(struct bch_fs *c,
                                     op.start_btree,    op.start_pos,
                                     op.end_btree,      op.end_pos,
                                     NULL, writepoint_hashed((unsigned long) current),
-                                    migrate_pred, &op, stats) ?: ret;
+                                    migrate_pred, &op, stats, true) ?: ret;
                ret = bch2_replicas_gc2(c) ?: ret;
                break;
        case BCH_DATA_OP_REWRITE_OLD_NODES:
index fd5562909382c76e296dad4b1941309732f90bca..d362cb545c0b357274fe80ec0b40701b9738b037 100644 (file)
@@ -35,7 +35,8 @@ int bch2_move_data(struct bch_fs *,
                   struct bch_ratelimit *,
                   struct write_point_specifier,
                   move_pred_fn, void *,
-                  struct bch_move_stats *);
+                  struct bch_move_stats *,
+                  bool);
 
 int bch2_data_job(struct bch_fs *,
                  struct bch_move_stats *,
index d63b9fea4f0536a4ed4dccdc5fd7aa7a089a08ce..8b6ad9ec72af404b12898f96ba8ee5b0ce14f460 100644 (file)
@@ -316,7 +316,8 @@ static int bch2_copygc(struct bch_fs *c)
                             NULL,
                             writepoint_ptr(&c->copygc_write_point),
                             copygc_pred, NULL,
-                            &move_stats);
+                            &move_stats,
+                            false);
        if (ret < 0)
                bch_err(c, "error %i from bch2_move_data() in copygc", ret);
        if (ret)
@@ -381,10 +382,11 @@ static int bch2_copygc_thread(void *arg)
        struct bch_fs *c = arg;
        struct io_clock *clock = &c->io_clock[WRITE];
        u64 last, wait;
+       int ret = 0;
 
        set_freezable();
 
-       while (!kthread_should_stop()) {
+       while (!ret && !kthread_should_stop()) {
                cond_resched();
 
                if (kthread_wait_freezable(c->copy_gc_enabled))
@@ -403,8 +405,11 @@ static int bch2_copygc_thread(void *arg)
 
                c->copygc_wait = 0;
 
-               if (bch2_copygc(c))
-                       break;
+               c->copygc_running = true;
+               ret = bch2_copygc(c);
+               c->copygc_running = false;
+
+               wake_up(&c->copygc_running_wq);
        }
 
        return 0;
@@ -448,4 +453,6 @@ int bch2_copygc_start(struct bch_fs *c)
 
 void bch2_fs_copygc_init(struct bch_fs *c)
 {
+       init_waitqueue_head(&c->copygc_running_wq);
+       c->copygc_running = false;
 }
index 63b24dc9c917f3164631dc2f86add9a1bbc66a03..57082260fc0023e7b9c6f726130927a737c1eeff 100644 (file)
@@ -255,7 +255,7 @@ static int bch2_rebalance_thread(void *arg)
                               NULL, /*  &r->pd.rate, */
                               writepoint_ptr(&c->rebalance_write_point),
                               rebalance_pred, NULL,
-                              &move_stats);
+                              &move_stats, true);
        }
 
        return 0;