bcachefs: Add a mechanism for blocking the journal
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 14 Feb 2019 23:38:52 +0000 (18:38 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:16 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
13 files changed:
fs/bcachefs/alloc_foreground.c
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/buckets_types.h
fs/bcachefs/chardev.c
fs/bcachefs/journal.c
fs/bcachefs/journal.h
fs/bcachefs/journal_types.h
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c
fs/bcachefs/super-io.c
fs/bcachefs/sysfs.c

index f40fca9328f9f72df062729b4739f3824a66d650..ba0640e3f981c44fbade80d6fe28ffbdeaf73f15 100644 (file)
@@ -724,7 +724,7 @@ static struct write_point *__writepoint_find(struct hlist_head *head,
 static inline bool too_many_writepoints(struct bch_fs *c, unsigned factor)
 {
        u64 stranded    = c->write_points_nr * c->bucket_size_max;
-       u64 free        = bch2_fs_sectors_free(c);
+       u64 free        = bch2_fs_usage_read_short(c).free;
 
        return stranded * factor > free;
 }
index 922d34abc675d0d16f5d7fadfde3c636df10876a..5091966b7b5426f2b209b867d782d4eb0a330308 100644 (file)
@@ -612,11 +612,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                struct bch_fs_usage *src = (void *)
                        bch2_acc_percpu_u64s((void *) c->usage[1], nr);
 
-               copy_fs_field(s.hidden,         "hidden");
-               copy_fs_field(s.data,           "data");
-               copy_fs_field(s.cached,         "cached");
-               copy_fs_field(s.reserved,       "reserved");
-               copy_fs_field(s.nr_inodes,      "nr_inodes");
+               copy_fs_field(hidden,           "hidden");
+               copy_fs_field(data,             "data");
+               copy_fs_field(cached,           "cached");
+               copy_fs_field(reserved,         "reserved");
+               copy_fs_field(nr_inodes,        "nr_inodes");
 
                for (i = 0; i < BCH_REPLICAS_MAX; i++)
                        copy_fs_field(persistent_reserved[i],
@@ -629,7 +629,7 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 
                        bch2_replicas_entry_to_text(&PBUF(buf), e);
 
-                       copy_fs_field(data[i], "%s", buf);
+                       copy_fs_field(replicas[i], "%s", buf);
                }
        }
 
index 3286ee26f7e23bf5f0c614b3175a4bfa2c9ab6a4..ac54d82f9e110cd70477ae26311a3bb5ab7cd3c8 100644 (file)
@@ -124,7 +124,7 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
        usage = (void *) bch2_acc_percpu_u64s((void *) c->usage[0], nr);
 
        for (i = 0; i < BCH_REPLICAS_MAX; i++)
-               usage->s.reserved += usage->persistent_reserved[i];
+               usage->reserved += usage->persistent_reserved[i];
 
        for (i = 0; i < c->replicas.nr; i++) {
                struct bch_replicas_entry *e =
@@ -133,10 +133,10 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
                switch (e->data_type) {
                case BCH_DATA_BTREE:
                case BCH_DATA_USER:
-                       usage->s.data   += usage->data[i];
+                       usage->data     += usage->replicas[i];
                        break;
                case BCH_DATA_CACHED:
-                       usage->s.cached += usage->data[i];
+                       usage->cached   += usage->replicas[i];
                        break;
                }
        }
@@ -144,21 +144,16 @@ void bch2_fs_usage_initialize(struct bch_fs *c)
        percpu_up_write(&c->mark_lock);
 }
 
-#define bch2_usage_read_raw(_stats)                                    \
-({                                                                     \
-       typeof(*this_cpu_ptr(_stats)) _acc;                             \
-                                                                       \
-       memset(&_acc, 0, sizeof(_acc));                                 \
-       acc_u64s_percpu((u64 *) &_acc,                                  \
-                       (u64 __percpu *) _stats,                        \
-                       sizeof(_acc) / sizeof(u64));                    \
-                                                                       \
-       _acc;                                                           \
-})
-
 struct bch_dev_usage bch2_dev_usage_read(struct bch_fs *c, struct bch_dev *ca)
 {
-       return bch2_usage_read_raw(ca->usage[0]);
+       struct bch_dev_usage ret;
+
+       memset(&ret, 0, sizeof(ret));
+       acc_u64s_percpu((u64 *) &ret,
+                       (u64 __percpu *) ca->usage[0],
+                       sizeof(ret) / sizeof(u64));
+
+       return ret;
 }
 
 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *c)
@@ -198,27 +193,44 @@ static u64 avail_factor(u64 r)
        return (r << RESERVE_FACTOR) / ((1 << RESERVE_FACTOR) + 1);
 }
 
-u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage fs_usage)
+u64 bch2_fs_sectors_used(struct bch_fs *c, struct bch_fs_usage *fs_usage)
 {
-       return min(fs_usage.s.hidden +
-                  fs_usage.s.data +
-                  reserve_factor(fs_usage.s.reserved +
-                                 fs_usage.s.online_reserved),
+       return min(fs_usage->hidden +
+                  fs_usage->data +
+                  reserve_factor(fs_usage->reserved +
+                                 fs_usage->online_reserved),
                   c->capacity);
 }
 
+static struct bch_fs_usage_short
+__bch2_fs_usage_read_short(struct bch_fs *c)
+{
+       struct bch_fs_usage_short ret;
+       u64 data, reserved;
+
+       ret.capacity = c->capacity -
+               percpu_u64_get(&c->usage[0]->hidden);
+
+       data            = percpu_u64_get(&c->usage[0]->data);
+       reserved        = percpu_u64_get(&c->usage[0]->reserved) +
+               percpu_u64_get(&c->usage[0]->online_reserved);
+
+       ret.used        = min(ret.capacity, data + reserve_factor(reserved));
+       ret.free        = ret.capacity - ret.used;
+
+       ret.nr_inodes   = percpu_u64_get(&c->usage[0]->nr_inodes);
+
+       return ret;
+}
+
 struct bch_fs_usage_short
 bch2_fs_usage_read_short(struct bch_fs *c)
 {
-       struct bch_fs_usage_summarized usage =
-               bch2_usage_read_raw(&c->usage[0]->s);
        struct bch_fs_usage_short ret;
 
-       ret.capacity    = READ_ONCE(c->capacity) - usage.hidden;
-       ret.used        = min(ret.capacity, usage.data +
-                             reserve_factor(usage.reserved +
-                                            usage.online_reserved));
-       ret.nr_inodes   = usage.nr_inodes;
+       percpu_down_read(&c->mark_lock);
+       ret = __bch2_fs_usage_read_short(c);
+       percpu_up_read(&c->mark_lock);
 
        return ret;
 }
@@ -257,7 +269,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
                        struct bch_fs_usage *fs_usage,
                        struct disk_reservation *disk_res)
 {
-       s64 added = fs_usage->s.data + fs_usage->s.reserved;
+       s64 added = fs_usage->data + fs_usage->reserved;
        s64 should_not_have_added;
        int ret = 0;
 
@@ -277,7 +289,7 @@ int bch2_fs_usage_apply(struct bch_fs *c,
 
        if (added > 0) {
                disk_res->sectors               -= added;
-               fs_usage->s.online_reserved     -= added;
+               fs_usage->online_reserved       -= added;
        }
 
        preempt_disable();
@@ -295,7 +307,7 @@ static inline void account_bucket(struct bch_fs_usage *fs_usage,
                                  int nr, s64 size)
 {
        if (type == BCH_DATA_SB || type == BCH_DATA_JOURNAL)
-               fs_usage->s.hidden      += size;
+               fs_usage->hidden        += size;
 
        dev_usage->buckets[type]        += nr;
 }
@@ -381,10 +393,10 @@ static inline void update_replicas(struct bch_fs *c,
        BUG_ON(!sectors);
 
        if (r->data_type == BCH_DATA_CACHED)
-               fs_usage->s.cached      += sectors;
+               fs_usage->cached        += sectors;
        else
-               fs_usage->s.data        += sectors;
-       fs_usage->data[idx]             += sectors;
+               fs_usage->data          += sectors;
+       fs_usage->replicas[idx]         += sectors;
 }
 
 static inline void update_cached_sectors(struct bch_fs *c,
@@ -911,9 +923,9 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                                fs_usage, journal_seq, flags, gc);
        case KEY_TYPE_inode:
                if (inserting)
-                       fs_usage->s.nr_inodes++;
+                       fs_usage->nr_inodes++;
                else
-                       fs_usage->s.nr_inodes--;
+                       fs_usage->nr_inodes--;
                return 0;
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -922,7 +934,7 @@ static int __bch2_mark_key(struct bch_fs *c, struct bkey_s_c k,
                replicas = clamp_t(unsigned, replicas, 1,
                                   ARRAY_SIZE(fs_usage->persistent_reserved));
 
-               fs_usage->s.reserved                            += sectors;
+               fs_usage->reserved                              += sectors;
                fs_usage->persistent_reserved[replicas - 1]     += sectors;
                return 0;
        }
@@ -1074,13 +1086,13 @@ static u64 bch2_recalc_sectors_available(struct bch_fs *c)
 {
        percpu_u64_set(&c->pcpu->sectors_available, 0);
 
-       return avail_factor(bch2_fs_sectors_free(c));
+       return avail_factor(__bch2_fs_usage_read_short(c).free);
 }
 
 void __bch2_disk_reservation_put(struct bch_fs *c, struct disk_reservation *res)
 {
        percpu_down_read(&c->mark_lock);
-       this_cpu_sub(c->usage[0]->s.online_reserved, res->sectors);
+       this_cpu_sub(c->usage[0]->online_reserved, res->sectors);
        percpu_up_read(&c->mark_lock);
 
        res->sectors = 0;
@@ -1120,7 +1132,7 @@ int bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res,
 
 out:
        pcpu->sectors_available         -= sectors;
-       this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+       this_cpu_add(c->usage[0]->online_reserved, sectors);
        res->sectors                    += sectors;
 
        preempt_enable();
@@ -1136,7 +1148,7 @@ recalculate:
            (flags & BCH_DISK_RESERVATION_NOFAIL)) {
                atomic64_set(&c->sectors_available,
                             max_t(s64, 0, sectors_available - sectors));
-               this_cpu_add(c->usage[0]->s.online_reserved, sectors);
+               this_cpu_add(c->usage[0]->online_reserved, sectors);
                res->sectors                    += sectors;
                ret = 0;
        } else {
index 973bf605cbd93849c69398b083044a26b26ee075..67a1d17610f3ce34221bfdcf9cb1317a96ccaac8 100644 (file)
@@ -225,18 +225,11 @@ static inline struct bch_fs_usage *bch2_fs_usage_get_scratch(struct bch_fs *c)
 
 struct bch_fs_usage *bch2_fs_usage_read(struct bch_fs *);
 
-u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage);
+u64 bch2_fs_sectors_used(struct bch_fs *, struct bch_fs_usage *);
 
 struct bch_fs_usage_short
 bch2_fs_usage_read_short(struct bch_fs *);
 
-static inline u64 bch2_fs_sectors_free(struct bch_fs *c)
-{
-       struct bch_fs_usage_short usage = bch2_fs_usage_read_short(c);
-
-       return usage.capacity - usage.used;
-}
-
 /* key/bucket marking: */
 
 void bch2_bucket_seq_cleanup(struct bch_fs *);
index 6eaee889f1e1acd9750b29e275cc8fe72ea7d370..348d062dd744dda54e55319b80e2e80160c16263 100644 (file)
@@ -64,35 +64,33 @@ struct bch_dev_usage {
 struct bch_fs_usage {
        /* all fields are in units of 512 byte sectors: */
 
-       /* summarized: */
-       struct bch_fs_usage_summarized {
-               u64             online_reserved;
+       u64                     online_reserved;
 
-               /* fields after online_reserved are cleared/recalculated by gc: */
-               u64             gc_start[0];
+       /* fields after online_reserved are cleared/recalculated by gc: */
+       u64                     gc_start[0];
 
-               u64             hidden;
-               u64             data;
-               u64             cached;
-               u64             reserved;
-               u64             nr_inodes;
+       u64                     hidden;
+       u64                     data;
+       u64                     cached;
+       u64                     reserved;
+       u64                     nr_inodes;
 
-               /* XXX: add stats for compression ratio */
+       /* XXX: add stats for compression ratio */
 #if 0
-               u64             uncompressed;
-               u64             compressed;
+       u64                     uncompressed;
+       u64                     compressed;
 #endif
-       } s;
 
        /* broken out: */
 
        u64                     persistent_reserved[BCH_REPLICAS_MAX];
-       u64                     data[];
+       u64                     replicas[];
 };
 
 struct bch_fs_usage_short {
        u64                     capacity;
        u64                     used;
+       u64                     free;
        u64                     nr_inodes;
 };
 
index f090b61f23f1c87880be3a856a42307a8001e08f..5ee38a6a442f601fa226d07e9807e692f8d333db 100644 (file)
@@ -403,10 +403,10 @@ static long bch2_ioctl_usage(struct bch_fs *c,
                if (!src)
                        return -ENOMEM;
 
-               percpu_up_read(&c->mark_lock);
+               dst.used                = bch2_fs_sectors_used(c, src);
+               dst.online_reserved     = src->online_reserved;
 
-               dst.used                = bch2_fs_sectors_used(c, *src);
-               dst.online_reserved     = src->s.online_reserved;
+               percpu_up_read(&c->mark_lock);
 
                for (i = 0; i < BCH_REPLICAS_MAX; i++) {
                        dst.persistent_reserved[i] =
index dd10f1c993e5a6e40dfbef5f18247d41573ded7e..cf4729b7a083547e47634d922136375ae8848f53 100644 (file)
@@ -212,6 +212,9 @@ static int journal_entry_open(struct journal *j)
        lockdep_assert_held(&j->lock);
        BUG_ON(journal_entry_is_open(j));
 
+       if (j->blocked)
+               return -EAGAIN;
+
        if (!fifo_free(&j->pin))
                return 0;
 
@@ -287,7 +290,7 @@ static bool __journal_entry_close(struct journal *j)
                spin_unlock(&j->lock);
                fallthrough;
        case JOURNAL_UNLOCKED:
-               return true;
+               return false;
        }
 }
 
@@ -297,6 +300,22 @@ static bool journal_entry_close(struct journal *j)
        return __journal_entry_close(j);
 }
 
+static bool journal_quiesced(struct journal *j)
+{
+       bool ret;
+
+       spin_lock(&j->lock);
+       ret = !j->reservations.prev_buf_unwritten &&
+               !journal_entry_is_open(j);
+       __journal_entry_close(j);
+       return ret;
+}
+
+static void journal_quiesce(struct journal *j)
+{
+       wait_event(j->wait, journal_quiesced(j));
+}
+
 static void journal_write_work(struct work_struct *work)
 {
        struct journal *j = container_of(work, struct journal, write_work.work);
@@ -722,6 +741,26 @@ int bch2_journal_flush(struct journal *j)
        return bch2_journal_flush_seq(j, seq);
 }
 
+/* block/unlock the journal: */
+
+void bch2_journal_unblock(struct journal *j)
+{
+       spin_lock(&j->lock);
+       j->blocked--;
+       spin_unlock(&j->lock);
+
+       journal_wake(j);
+}
+
+void bch2_journal_block(struct journal *j)
+{
+       spin_lock(&j->lock);
+       j->blocked++;
+       spin_unlock(&j->lock);
+
+       journal_quiesce(j);
+}
+
 /* allocate journal on a device: */
 
 static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
@@ -931,8 +970,7 @@ void bch2_fs_journal_stop(struct journal *j)
            c->btree_roots_dirty)
                bch2_journal_meta(j);
 
-       BUG_ON(journal_entry_is_open(j) ||
-              j->reservations.prev_buf_unwritten);
+       journal_quiesce(j);
 
        BUG_ON(!bch2_journal_error(j) &&
               test_bit(JOURNAL_NOT_EMPTY, &j->flags));
index 6ef34bdae628a02496df9f5c45ea9cd2d39057cc..5290cdeab585c940107c06fad23f7e8fb7d95da0 100644 (file)
@@ -370,6 +370,9 @@ static inline void bch2_journal_set_replay_done(struct journal *j)
        set_bit(JOURNAL_REPLAY_DONE, &j->flags);
 }
 
+void bch2_journal_unblock(struct journal *);
+void bch2_journal_block(struct journal *);
+
 ssize_t bch2_journal_print_debug(struct journal *, char *);
 ssize_t bch2_journal_print_pins(struct journal *, char *);
 
index 5f6d2320c5cd8fd2cd82ddaaa95a183a05f5551c..e952eb06eff52739525ad36a60ee46671231452b 100644 (file)
@@ -142,6 +142,9 @@ struct journal {
 
        spinlock_t              lock;
 
+       /* if nonzero, we may not open a new journal entry: */
+       unsigned                blocked;
+
        /* Used when waiting because the journal was full */
        wait_queue_head_t       wait;
        struct closure_waitlist async_wait;
index e28917cf2cec71f2c629fe8f3da28a9ce3110671..5ceab8c14d72e8cf9ce372df04338b9932e42629 100644 (file)
@@ -83,7 +83,7 @@ static int journal_replay_entry_early(struct bch_fs *c,
                                               le64_to_cpu(u->v));
                        break;
                case FS_USAGE_INODES:
-                       percpu_u64_set(&c->usage[0]->s.nr_inodes,
+                       percpu_u64_set(&c->usage[0]->nr_inodes,
                                       le64_to_cpu(u->v));
                        break;
                case FS_USAGE_KEY_VERSION:
index 6fee8fe376888b34fec8bd07285061c266cf8934..03bb6b51d15fe61c331dc1734de2815a8f30017b 100644 (file)
@@ -245,14 +245,14 @@ static void __replicas_table_update(struct bch_fs_usage __percpu *dst_p,
        *dst = *src;
 
        for (src_idx = 0; src_idx < src_r->nr; src_idx++) {
-               if (!src->data[src_idx])
+               if (!src->replicas[src_idx])
                        continue;
 
                dst_idx = __replicas_entry_idx(dst_r,
                                cpu_replicas_entry(src_r, src_idx));
                BUG_ON(dst_idx < 0);
 
-               dst->data[dst_idx] = src->data[src_idx];
+               dst->replicas[dst_idx] = src->replicas[src_idx];
        }
 }
 
@@ -457,7 +457,7 @@ int bch2_replicas_gc_end(struct bch_fs *c, int ret)
                if (__replicas_has_entry(&c->replicas_gc, e))
                        continue;
 
-               v = percpu_u64_get(&c->usage[0]->data[i]);
+               v = percpu_u64_get(&c->usage[0]->replicas[i]);
                if (!v)
                        continue;
 
@@ -558,7 +558,7 @@ int bch2_replicas_set_usage(struct bch_fs *c,
                BUG_ON(ret < 0);
        }
 
-       percpu_u64_set(&c->usage[0]->data[idx], sectors);
+       percpu_u64_set(&c->usage[0]->replicas[idx], sectors);
 
        return 0;
 }
index 0b3a761fe93e4fe95578266b30d5711af8661c0f..66e174d93a9c296efe39a2d5b33cebb8c04e48fd 100644 (file)
@@ -930,7 +930,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
        percpu_down_write(&c->mark_lock);
 
        {
-               u64 nr_inodes = percpu_u64_get(&c->usage[0]->s.nr_inodes);
+               u64 nr_inodes = percpu_u64_get(&c->usage[0]->nr_inodes);
                struct jset_entry_usage *u =
                        container_of(entry, struct jset_entry_usage, entry);
 
@@ -977,7 +977,7 @@ bch2_journal_super_entries_add_common(struct bch_fs *c,
        for (i = 0; i < c->replicas.nr; i++) {
                struct bch_replicas_entry *e =
                        cpu_replicas_entry(&c->replicas, i);
-               u64 sectors = percpu_u64_get(&c->usage[0]->data[i]);
+               u64 sectors = percpu_u64_get(&c->usage[0]->replicas[i]);
                struct jset_entry_data_usage *u =
                        container_of(entry, struct jset_entry_data_usage, entry);
 
index 8ad7b6026d1b6e3272236009fba7a51b5df45249..361f7b7addcfec7fd794d140cd8a0f8879e16e25 100644 (file)
@@ -244,17 +244,17 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
        pr_buf(&out, "capacity:\t\t\t%llu\n", c->capacity);
 
        pr_buf(&out, "hidden:\t\t\t\t%llu\n",
-              fs_usage->s.hidden);
+              fs_usage->hidden);
        pr_buf(&out, "data:\t\t\t\t%llu\n",
-              fs_usage->s.data);
+              fs_usage->data);
        pr_buf(&out, "cached:\t\t\t\t%llu\n",
-              fs_usage->s.cached);
+              fs_usage->cached);
        pr_buf(&out, "reserved:\t\t\t%llu\n",
-              fs_usage->s.reserved);
+              fs_usage->reserved);
        pr_buf(&out, "nr_inodes:\t\t\t%llu\n",
-              fs_usage->s.nr_inodes);
+              fs_usage->nr_inodes);
        pr_buf(&out, "online reserved:\t\t%llu\n",
-              fs_usage->s.online_reserved);
+              fs_usage->online_reserved);
 
        for (i = 0;
             i < ARRAY_SIZE(fs_usage->persistent_reserved);
@@ -270,7 +270,7 @@ static ssize_t show_fs_alloc_debug(struct bch_fs *c, char *buf)
 
                pr_buf(&out, "\t");
                bch2_replicas_entry_to_text(&out, e);
-               pr_buf(&out, ":\t%llu\n", fs_usage->data[i]);
+               pr_buf(&out, ":\t%llu\n", fs_usage->replicas[i]);
        }
 
        percpu_up_read(&c->mark_lock);