bcachefs: Better calculation of copygc threshold
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 1 Aug 2018 18:26:55 +0000 (14:26 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:08 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc.c
fs/bcachefs/bcachefs.h
fs/bcachefs/movinggc.c
fs/bcachefs/sysfs.c

index 192ab655ca238e992498e3956bb77cd80b1f2bec..bde22df251349067aa2543a4e014ad2a72cfab4d 100644 (file)
@@ -1711,7 +1711,7 @@ void bch2_alloc_sectors_done(struct bch_fs *c, struct write_point *wp)
 void bch2_recalc_capacity(struct bch_fs *c)
 {
        struct bch_dev *ca;
-       u64 total_capacity, capacity = 0, reserved_sectors = 0;
+       u64 capacity = 0, reserved_sectors = 0;
        unsigned long ra_pages = 0;
        unsigned i, j;
 
@@ -1726,7 +1726,7 @@ void bch2_recalc_capacity(struct bch_fs *c)
        bch2_set_ra_pages(c, ra_pages);
 
        for_each_rw_member(ca, c, i) {
-               size_t reserve = 0;
+               u64 dev_capacity, dev_reserve = 0;
 
                /*
                 * We need to reserve buckets (from the number
@@ -1745,30 +1745,40 @@ void bch2_recalc_capacity(struct bch_fs *c)
                 * not -ENOSPC calculations.
                 */
                for (j = 0; j < RESERVE_NONE; j++)
-                       reserve += ca->free[j].size;
+                       dev_reserve += ca->free[j].size;
 
-               reserve += ca->free_inc.size;
+               dev_reserve += ca->free_inc.size;
 
-               reserve += ARRAY_SIZE(c->write_points);
+               dev_reserve += ARRAY_SIZE(c->write_points);
 
-               reserve += 1;   /* btree write point */
+               dev_reserve += 1;       /* btree write point */
+               dev_reserve += 1;       /* copygc write point */
+               dev_reserve += 1;       /* rebalance write point */
+               dev_reserve += WRITE_POINT_COUNT;
 
-               reserved_sectors += bucket_to_sector(ca, reserve);
+               dev_reserve *= ca->mi.bucket_size;
 
-               capacity += bucket_to_sector(ca, ca->mi.nbuckets -
-                                            ca->mi.first_bucket);
-       }
+               dev_reserve *= 2;
+
+               dev_capacity = bucket_to_sector(ca, ca->mi.nbuckets -
+                                               ca->mi.first_bucket);
 
-       total_capacity = capacity;
+               ca->copygc_threshold =
+                       max(div64_u64(dev_capacity *
+                                     c->opts.gc_reserve_percent, 100),
+                           dev_reserve) / 2;
 
-       capacity *= (100 - c->opts.gc_reserve_percent);
-       capacity = div64_u64(capacity, 100);
+               capacity += dev_capacity;
+               reserved_sectors += dev_reserve;
+       }
 
-       BUG_ON(reserved_sectors > total_capacity);
+       reserved_sectors = max(div64_u64(capacity *
+                                        c->opts.gc_reserve_percent, 100),
+                              reserved_sectors);
 
-       capacity = min(capacity, total_capacity - reserved_sectors);
+       BUG_ON(reserved_sectors > capacity);
 
-       c->capacity = capacity;
+       c->capacity = capacity - reserved_sectors;
 
        if (c->capacity) {
                bch2_io_timer_add(&c->io_clock[READ],
index 8dd96a2de1a303b2da6e134a450dcbbdb16704ab..a9ac68c1753359c5508835f7f7d96f599e034f0e 100644 (file)
@@ -427,6 +427,7 @@ struct bch_dev {
        copygc_heap             copygc_heap;
        struct bch_pd_controller copygc_pd;
        struct write_point      copygc_write_point;
+       u64                     copygc_threshold;
 
        atomic64_t              rebalance_work;
 
index 8b61b163faf5f8ecef3a8c304e43f7c7c2849194..26b8e95db1db72aacf17b547d46ad6f004d06218 100644 (file)
@@ -228,16 +228,10 @@ static int bch2_copygc_thread(void *arg)
 
                last = atomic_long_read(&clock->now);
 
-               reserve = div64_u64((ca->mi.nbuckets - ca->mi.first_bucket) *
-                                ca->mi.bucket_size *
-                                c->opts.gc_reserve_percent, 200);
+               reserve = ca->copygc_threshold;
 
                usage = bch2_dev_usage_read(c, ca);
 
-               /*
-                * don't start copygc until less than half the gc reserve is
-                * available:
-                */
                available = __dev_buckets_available(ca, usage) *
                        ca->mi.bucket_size;
                if (available > reserve) {
index 582e281694a9a250bc16d94c3250885ef8be1533..a472e454099b40e470ccea2e4b9476a631580f55 100644 (file)
@@ -788,6 +788,8 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
                "    meta:               %llu\n"
                "    user:               %llu\n"
                "    cached:             %llu\n"
+               "    fragmented:         %llu\n"
+               "    copygc threshold:   %llu\n"
                "freelist_wait:          %s\n"
                "open buckets:           %u/%u (reserved %u)\n"
                "open_buckets_wait:      %s\n",
@@ -808,6 +810,8 @@ static ssize_t show_dev_alloc_debug(struct bch_dev *ca, char *buf)
                stats.sectors[BCH_DATA_BTREE],
                stats.sectors[BCH_DATA_USER],
                stats.sectors[BCH_DATA_CACHED],
+               stats.sectors_fragmented,
+               ca->copygc_threshold,
                c->freelist_wait.list.first             ? "waiting" : "empty",
                c->open_buckets_nr_free, OPEN_BUCKETS_COUNT, BTREE_NODE_RESERVE,
                c->open_buckets_wait.list.first         ? "waiting" : "empty");