bcachefs: Improve journal free space calculations
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 14 Nov 2020 17:29:21 +0000 (12:29 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:49 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/journal.c
fs/bcachefs/journal_reclaim.c
fs/bcachefs/journal_reclaim.h
fs/bcachefs/journal_types.h

index ac2dddd90c3110955b14aef775ee87f356a3b58c..3bbb23d7739a78a89ef7cbf59f2ea2beffa55632 100644 (file)
@@ -1147,7 +1147,7 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        }
 
        pr_buf(out,
-              "current entry:\tidx %u refcount %u\n",
+              "current entry:\t\tidx %u refcount %u\n",
               s.idx, journal_state_count(s, s.idx));
 
        i = s.idx;
@@ -1164,6 +1164,20 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
               test_bit(JOURNAL_NEED_WRITE,     &j->flags),
               test_bit(JOURNAL_REPLAY_DONE,    &j->flags));
 
+       pr_buf(out, "space:\n");
+       pr_buf(out, "\tdiscarded\t%u:%u\n",
+              j->space[journal_space_discarded].next_entry,
+              j->space[journal_space_discarded].total);
+       pr_buf(out, "\tclean ondisk\t%u:%u\n",
+              j->space[journal_space_clean_ondisk].next_entry,
+              j->space[journal_space_clean_ondisk].total);
+       pr_buf(out, "\tclean\t\t%u:%u\n",
+              j->space[journal_space_clean].next_entry,
+              j->space[journal_space_clean].total);
+       pr_buf(out, "\ttotal\t\t%u:%u\n",
+              j->space[journal_space_total].next_entry,
+              j->space[journal_space_total].total);
+
        for_each_member_device_rcu(ca, c, i,
                                   &c->rw_devs[BCH_DATA_journal]) {
                struct journal_device *ja = &ca->journal;
@@ -1174,12 +1188,13 @@ void bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
                pr_buf(out,
                       "dev %u:\n"
                       "\tnr\t\t%u\n"
+                      "\tbucket size\t%u\n"
                       "\tavailable\t%u:%u\n"
-                      "\tdiscard_idx\t\t%u\n"
-                      "\tdirty_idx_ondisk\t%u (seq %llu)\n"
-                      "\tdirty_idx\t\t%u (seq %llu)\n"
+                      "\tdiscard_idx\t%u\n"
+                      "\tdirty_ondisk\t%u (seq %llu)\n"
+                      "\tdirty_idx\t%u (seq %llu)\n"
                       "\tcur_idx\t\t%u (seq %llu)\n",
-                      i, ja->nr,
+                      i, ja->nr, ca->mi.bucket_size,
                       bch2_journal_dev_buckets_available(j, ja, journal_space_discarded),
                       ja->sectors_free,
                       ja->discard_idx,
index c50352385a474cf74a2b5cd344efeb09a5bd923c..c6267284a02807b24fd08acee8c5f8449e52f003 100644 (file)
@@ -71,84 +71,94 @@ static inline unsigned get_unwritten_sectors(struct journal *j, unsigned *idx)
        return sectors;
 }
 
-static struct journal_space {
-       unsigned        next_entry;
-       unsigned        remaining;
-} __journal_space_available(struct journal *j, unsigned nr_devs_want,
+static struct journal_space
+journal_dev_space_available(struct journal *j, struct bch_dev *ca,
                            enum journal_space_from from)
 {
-       struct bch_fs *c = container_of(j, struct bch_fs, journal);
-       struct bch_dev *ca;
-       unsigned sectors_next_entry     = UINT_MAX;
-       unsigned sectors_total          = UINT_MAX;
-       unsigned i, nr_devs = 0;
-       unsigned unwritten_sectors;
+       struct journal_device *ja = &ca->journal;
+       unsigned sectors, buckets, unwritten, idx = j->reservations.unwritten_idx;
 
-       rcu_read_lock();
-       for_each_member_device_rcu(ca, c, i,
-                                  &c->rw_devs[BCH_DATA_journal]) {
-               struct journal_device *ja = &ca->journal;
-               unsigned buckets_this_device, sectors_this_device;
-               unsigned idx = j->reservations.unwritten_idx;
+       if (from == journal_space_total)
+               return (struct journal_space) {
+                       .next_entry     = ca->mi.bucket_size,
+                       .total          = ca->mi.bucket_size * ja->nr,
+               };
 
-               if (!ja->nr)
-                       continue;
-
-               buckets_this_device = bch2_journal_dev_buckets_available(j, ja, from);
-               sectors_this_device = ja->sectors_free;
+       buckets = bch2_journal_dev_buckets_available(j, ja, from);
+       sectors = ja->sectors_free;
 
-               /*
-                * We that we don't allocate the space for a journal entry
-                * until we write it out - thus, account for it here:
-                */
-               while ((unwritten_sectors = get_unwritten_sectors(j, &idx))) {
-                       if (unwritten_sectors >= sectors_this_device) {
-                               if (!buckets_this_device) {
-                                       sectors_this_device = 0;
-                                       break;
-                               }
-
-                               buckets_this_device--;
-                               sectors_this_device = ca->mi.bucket_size;
+       /*
+        * We that we don't allocate the space for a journal entry
+        * until we write it out - thus, account for it here:
+        */
+       while ((unwritten = get_unwritten_sectors(j, &idx))) {
+               if (unwritten >= sectors) {
+                       if (!buckets) {
+                               sectors = 0;
+                               break;
                        }
 
-                       sectors_this_device -= unwritten_sectors;
+                       buckets--;
+                       sectors = ca->mi.bucket_size;
                }
 
-               if (sectors_this_device < ca->mi.bucket_size &&
-                   buckets_this_device) {
-                       buckets_this_device--;
-                       sectors_this_device = ca->mi.bucket_size;
-               }
+               sectors -= unwritten;
+       }
+
+       if (sectors < ca->mi.bucket_size && buckets) {
+               buckets--;
+               sectors = ca->mi.bucket_size;
+       }
+
+       return (struct journal_space) {
+               .next_entry     = sectors,
+               .total          = sectors + buckets * ca->mi.bucket_size,
+       };
+}
 
-               if (!sectors_this_device)
+static struct journal_space __journal_space_available(struct journal *j, unsigned nr_devs_want,
+                           enum journal_space_from from)
+{
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct bch_dev *ca;
+       unsigned i, pos, nr_devs = 0;
+       struct journal_space space, dev_space[BCH_SB_MEMBERS_MAX];
+
+       BUG_ON(nr_devs_want > ARRAY_SIZE(dev_space));
+
+       rcu_read_lock();
+       for_each_member_device_rcu(ca, c, i,
+                                  &c->rw_devs[BCH_DATA_journal]) {
+               if (!ca->journal.nr)
                        continue;
 
-               sectors_next_entry = min(sectors_next_entry,
-                                        sectors_this_device);
+               space = journal_dev_space_available(j, ca, from);
+               if (!space.next_entry)
+                       continue;
 
-               sectors_total = min(sectors_total,
-                       buckets_this_device * ca->mi.bucket_size +
-                       sectors_this_device);
+               for (pos = 0; pos < nr_devs; pos++)
+                       if (space.total > dev_space[pos].total)
+                               break;
 
-               nr_devs++;
+               array_insert_item(dev_space, nr_devs, pos, space);
        }
        rcu_read_unlock();
 
        if (nr_devs < nr_devs_want)
                return (struct journal_space) { 0, 0 };
 
-       return (struct journal_space) {
-               .next_entry     = sectors_next_entry,
-               .remaining      = max_t(int, 0, sectors_total - sectors_next_entry),
-       };
+       /*
+        * We sorted largest to smallest, and we want the smallest out of the
+        * @nr_devs_want largest devices:
+        */
+       return dev_space[nr_devs_want - 1];
 }
 
 void bch2_journal_space_available(struct journal *j)
 {
        struct bch_fs *c = container_of(j, struct bch_fs, journal);
        struct bch_dev *ca;
-       struct journal_space discarded, clean_ondisk, clean;
+       unsigned clean;
        unsigned overhead, u64s_remaining = 0;
        unsigned max_entry_size  = min(j->buf[0].buf_size >> 9,
                                       j->buf[1].buf_size >> 9);
@@ -189,27 +199,25 @@ void bch2_journal_space_available(struct journal *j)
                goto out;
        }
 
-       if (!fifo_free(&j->pin)) {
-               ret = cur_entry_journal_pin_full;
-               goto out;
-       }
-
        nr_devs_want = min_t(unsigned, nr_online, c->opts.metadata_replicas);
 
-       discarded       = __journal_space_available(j, nr_devs_want, journal_space_discarded);
-       clean_ondisk    = __journal_space_available(j, nr_devs_want, journal_space_clean_ondisk);
-       clean           = __journal_space_available(j, nr_devs_want, journal_space_clean);
+       for (i = 0; i < journal_space_nr; i++)
+               j->space[i] = __journal_space_available(j, nr_devs_want, i);
 
-       if (!discarded.next_entry)
+       clean           = j->space[journal_space_clean].total;
+
+       if (!j->space[journal_space_discarded].next_entry)
                ret = cur_entry_journal_full;
+       else if (!fifo_free(&j->pin))
+               ret = cur_entry_journal_pin_full;
 
-       overhead = DIV_ROUND_UP(clean.remaining, max_entry_size) *
+       overhead = DIV_ROUND_UP(clean, max_entry_size) *
                journal_entry_overhead(j);
-       u64s_remaining = clean.remaining << 6;
+       u64s_remaining = clean << 6;
        u64s_remaining = max_t(int, 0, u64s_remaining - overhead);
        u64s_remaining /= 4;
 out:
-       j->cur_entry_sectors    = !ret ? discarded.next_entry : 0;
+       j->cur_entry_sectors    = !ret ? j->space[journal_space_discarded].next_entry : 0;
        j->cur_entry_error      = ret;
        journal_set_remaining(j, u64s_remaining);
        journal_check_may_get_unreserved(j);
index b0f05839396d7b29744241f1fa43eb9176c8a1fa..f02caa3d49ea74daf97d1054cc3ddbfba250d254 100644 (file)
@@ -4,12 +4,6 @@
 
 #define JOURNAL_PIN    (32 * 1024)
 
-enum journal_space_from {
-       journal_space_discarded,
-       journal_space_clean_ondisk,
-       journal_space_clean,
-};
-
 static inline void journal_reclaim_kick(struct journal *j)
 {
        struct task_struct *p = READ_ONCE(j->reclaim_thread);
index ec19f75f8ede740e97be6f0d2dfa14888a85b366..6b525dc6ab7cae73a7b5b731a244fbb083d3e051 100644 (file)
@@ -9,8 +9,6 @@
 #include "super_types.h"
 #include "fifo.h"
 
-struct journal_res;
-
 #define JOURNAL_BUF_BITS       2
 #define JOURNAL_BUF_NR         (1U << JOURNAL_BUF_BITS)
 #define JOURNAL_BUF_MASK       (JOURNAL_BUF_NR - 1)
@@ -122,6 +120,20 @@ union journal_preres_state {
 #define JOURNAL_ENTRY_CLOSED_VAL       (JOURNAL_ENTRY_OFFSET_MAX - 1)
 #define JOURNAL_ENTRY_ERROR_VAL                (JOURNAL_ENTRY_OFFSET_MAX)
 
+struct journal_space {
+       /* Units of 512 bytes sectors: */
+       unsigned        next_entry; /* How big the next journal entry can be */
+       unsigned        total;
+};
+
+enum journal_space_from {
+       journal_space_discarded,
+       journal_space_clean_ondisk,
+       journal_space_clean,
+       journal_space_total,
+       journal_space_nr,
+};
+
 /*
  * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
  * either because something's waiting on the write to complete or because it's
@@ -216,6 +228,8 @@ struct journal {
                struct journal_entry_pin_list *data;
        }                       pin;
 
+       struct journal_space    space[journal_space_nr];
+
        u64                     replay_journal_seq;
        u64                     replay_journal_seq_end;