bcachefs: Stripes now properly subject to gc
authorKent Overstreet <kent.overstreet@gmail.com>
Sat, 24 Nov 2018 22:09:44 +0000 (17:09 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:12 +0000 (17:08 -0400)
gc now verifies the contents of the stripes radix tree, important for
persistent alloc info

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_gc.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/ec.c
fs/bcachefs/ec.h
fs/bcachefs/ec_types.h
fs/bcachefs/extents.c
fs/bcachefs/quota.c
fs/bcachefs/recovery.c
fs/bcachefs/super.c

index eaa2055000b6ccf53f096c6a50b15c027986c344..258a67d4437bd8658ec1b697b2884119a9e8c1ed 100644 (file)
        printk(KERN_WARNING bch2_fmt(c, fmt), ##__VA_ARGS__)
 #define bch_err(c, fmt, ...) \
        printk(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
+#define bch_err_ratelimited(c, fmt, ...) \
+       printk_ratelimited(KERN_ERR bch2_fmt(c, fmt), ##__VA_ARGS__)
 
 #define bch_verbose(c, fmt, ...)                                       \
 do {                                                                   \
@@ -334,6 +336,7 @@ enum bch_time_stats {
 struct btree;
 
 enum gc_phase {
+       GC_PHASE_NOT_RUNNING,
        GC_PHASE_START,
        GC_PHASE_SB,
 
@@ -687,16 +690,17 @@ struct bch_fs {
        /* REBALANCE */
        struct bch_fs_rebalance rebalance;
 
-       /* ERASURE CODING */
-       struct list_head        ec_new_stripe_list;
-       struct mutex            ec_new_stripe_lock;
-
-       GENRADIX(struct ec_stripe) ec_stripes;
-       struct mutex            ec_stripes_lock;
+       /* STRIPES: */
+       GENRADIX(struct stripe) stripes[2];
+       struct mutex            ec_stripe_create_lock;
 
        ec_stripes_heap         ec_stripes_heap;
        spinlock_t              ec_stripes_heap_lock;
 
+       /* ERASURE CODING */
+       struct list_head        ec_new_stripe_list;
+       struct mutex            ec_new_stripe_lock;
+
        struct bio_set          ec_bioset;
 
        struct work_struct      ec_stripe_delete_work;
index 55d49677d5fe6f6078e5780ed6ecf15fa0b169be..f350634ce7a0b23d1b8271753cf4322e7c73d878 100644 (file)
@@ -332,9 +332,6 @@ static int bch2_gc_btree(struct bch_fs *c, enum btree_id btree_id,
 
        gc_pos_set(c, gc_pos_btree(btree_id, POS_MIN, 0));
 
-       if (!c->btree_roots[btree_id].b)
-               return 0;
-
        /*
         * if expensive_debug_checks is on, run range_checks on all leaf nodes:
         *
@@ -582,6 +579,8 @@ static void bch2_gc_free(struct bch_fs *c)
        struct bch_dev *ca;
        unsigned i;
 
+       genradix_free(&c->stripes[1]);
+
        for_each_member_device(ca, c, i) {
                kvpfree(rcu_dereference_protected(ca->buckets[1], 1),
                        sizeof(struct bucket_array) +
@@ -602,6 +601,25 @@ static void bch2_gc_done_nocheck(struct bch_fs *c)
        unsigned i;
        int cpu;
 
+       {
+               struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
+               struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
+               struct stripe *dst, *src;
+
+               c->ec_stripes_heap.used = 0;
+
+               while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
+                      (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
+                       *dst = *src;
+
+                       if (dst->alive)
+                               bch2_stripes_heap_insert(c, dst, dst_iter.pos);
+
+                       genradix_iter_advance(&dst_iter, &c->stripes[0]);
+                       genradix_iter_advance(&src_iter, &c->stripes[1]);
+               }
+       }
+
        for_each_member_device(ca, c, i) {
                struct bucket_array *src = __bucket_array(ca, 1);
 
@@ -649,13 +667,21 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 
 #define copy_field(_f, _msg, ...)                                      \
        if (dst._f != src._f) {                                         \
-               pr_info(_msg ": got %llu, should be %llu, fixing"       \
+               bch_err(c, _msg ": got %llu, should be %llu, fixing"\
                        , ##__VA_ARGS__, dst._f, src._f);               \
                dst._f = src._f;                                        \
        }
+#define copy_stripe_field(_f, _msg, ...)                               \
+       if (dst->_f != src->_f) {                                       \
+               bch_err_ratelimited(c, "stripe %zu has wrong "_msg      \
+                       ": got %u, should be %u, fixing",               \
+                       dst_iter.pos, ##__VA_ARGS__,                    \
+                       dst->_f, src->_f);                              \
+               dst->_f = src->_f;                                      \
+       }
 #define copy_bucket_field(_f)                                          \
        if (dst->b[b].mark._f != src->b[b].mark._f) {                   \
-               pr_info("dev %u bucket %zu has wrong " #_f              \
+               bch_err_ratelimited(c, "dev %u bucket %zu has wrong " #_f\
                        ": got %u, should be %u, fixing",               \
                        i, b, dst->b[b].mark._f, src->b[b].mark._f);    \
                dst->b[b]._mark._f = src->b[b].mark._f;                 \
@@ -672,6 +698,36 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
                goto out;
        }
 
+       {
+               struct genradix_iter dst_iter = genradix_iter_init(&c->stripes[0], 0);
+               struct genradix_iter src_iter = genradix_iter_init(&c->stripes[1], 0);
+               struct stripe *dst, *src;
+               unsigned i;
+
+               c->ec_stripes_heap.used = 0;
+
+               while ((dst = genradix_iter_peek(&dst_iter, &c->stripes[0])) &&
+                      (src = genradix_iter_peek(&src_iter, &c->stripes[1]))) {
+                       copy_stripe_field(alive,        "alive");
+                       copy_stripe_field(sectors,      "sectors");
+                       copy_stripe_field(algorithm,    "algorithm");
+                       copy_stripe_field(nr_blocks,    "nr_blocks");
+                       copy_stripe_field(nr_redundant, "nr_redundant");
+                       copy_stripe_field(blocks_nonempty.counter,
+                                         "blocks_nonempty");
+
+                       for (i = 0; i < ARRAY_SIZE(dst->block_sectors); i++)
+                               copy_stripe_field(block_sectors[i].counter,
+                                                 "block_sectors[%u]", i);
+
+                       if (dst->alive)
+                               bch2_stripes_heap_insert(c, dst, dst_iter.pos);
+
+                       genradix_iter_advance(&dst_iter, &c->stripes[0]);
+                       genradix_iter_advance(&src_iter, &c->stripes[1]);
+               }
+       }
+
        for_each_member_device(ca, c, i) {
                struct bucket_array *dst = __bucket_array(ca, 0);
                struct bucket_array *src = __bucket_array(ca, 1);
@@ -756,10 +812,11 @@ static void bch2_gc_done(struct bch_fs *c, bool initial)
 out:
        percpu_up_write(&c->usage_lock);
 
-#undef copy_field
 #undef copy_fs_field
 #undef copy_dev_field
 #undef copy_bucket_field
+#undef copy_stripe_field
+#undef copy_field
 }
 
 static int bch2_gc_start(struct bch_fs *c)
@@ -767,6 +824,12 @@ static int bch2_gc_start(struct bch_fs *c)
        struct bch_dev *ca;
        unsigned i;
 
+       /*
+        * indicate to stripe code that we need to allocate for the gc stripes
+        * radix tree, too
+        */
+       gc_pos_set(c, gc_phase(GC_PHASE_START));
+
        BUG_ON(c->usage[1]);
 
        c->usage[1] = alloc_percpu(struct bch_fs_usage);
@@ -808,7 +871,7 @@ static int bch2_gc_start(struct bch_fs *c)
 
        percpu_up_write(&c->usage_lock);
 
-       return 0;
+       return bch2_ec_mem_alloc(c, true);
 }
 
 /**
@@ -873,7 +936,7 @@ out:
                bch2_gc_done(c, initial);
 
        /* Indicates that gc is no longer in progress: */
-       __gc_pos_set(c, gc_phase(GC_PHASE_START));
+       __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
        bch2_gc_free(c);
        up_write(&c->gc_lock);
index 2ebe8bad978e78dc7477e663256d2477de2e24b4..87ff4b2c8434c48ee287773569776a5c81d6ff7f 100644 (file)
@@ -303,7 +303,7 @@ static inline int is_fragmented_bucket(struct bucket_mark m,
 static inline enum bch_data_type bucket_type(struct bucket_mark m)
 {
        return m.cached_sectors && !m.dirty_sectors
-               ?  BCH_DATA_CACHED
+               ? BCH_DATA_CACHED
                : m.data_type;
 }
 
@@ -375,14 +375,14 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
        preempt_disable();
        dev_usage = this_cpu_ptr(ca->usage[gc]);
 
-       if (bucket_type(old) != bucket_type(new)) {
-               if (bucket_type(old)) {
-                       fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
-                       dev_usage->buckets[bucket_type(old)]--;
-               } else {
-                       fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
-                       dev_usage->buckets[bucket_type(new)]++;
-               }
+       if (bucket_type(old)) {
+               fs_usage->buckets[bucket_type(old)] -= ca->mi.bucket_size;
+               dev_usage->buckets[bucket_type(old)]--;
+       }
+
+       if (bucket_type(new)) {
+               fs_usage->buckets[bucket_type(new)] += ca->mi.bucket_size;
+               dev_usage->buckets[bucket_type(new)]++;
        }
 
        dev_usage->buckets_alloc +=
@@ -406,11 +406,11 @@ static void bch2_dev_usage_update(struct bch_fs *c, struct bch_dev *ca,
        bch2_dev_stats_verify(ca);
 }
 
-#define bucket_data_cmpxchg(c, ca, stats, g, new, expr)                \
+#define bucket_data_cmpxchg(c, ca, fs_usage, g, new, expr)             \
 ({                                                             \
        struct bucket_mark _old = bucket_cmpxchg(g, new, expr); \
                                                                \
-       bch2_dev_usage_update(c, ca, stats, _old, new, gc);     \
+       bch2_dev_usage_update(c, ca, fs_usage, _old, new, gc);  \
        _old;                                                   \
 })
 
@@ -638,23 +638,25 @@ static void bch2_mark_pointer(struct bch_fs *c,
        BUG_ON(!gc && bucket_became_unavailable(old, new));
 }
 
-static void bch2_mark_stripe_ptr(struct bch_fs *c,
-                                struct bch_extent_stripe_ptr p,
-                                s64 sectors, unsigned flags,
-                                s64 *adjusted_disk_sectors,
-                                unsigned *redundancy)
+static int bch2_mark_stripe_ptr(struct bch_fs *c,
+                               struct bch_extent_stripe_ptr p,
+                               s64 sectors, unsigned flags,
+                               s64 *adjusted_disk_sectors,
+                               unsigned *redundancy,
+                               bool gc)
 {
-       struct ec_stripe *m;
+       struct stripe *m;
        unsigned old, new, nr_data;
        int blocks_nonempty_delta;
        s64 parity_sectors;
 
-       m = genradix_ptr(&c->ec_stripes, p.idx);
-       if (WARN_ON(!m))
-               return;
+       m = genradix_ptr(&c->stripes[gc], p.idx);
 
-       if (WARN_ON(!m->alive))
-               return;
+       if (!m || !m->alive) {
+               bch_err_ratelimited(c, "pointer to nonexistent stripe %llu",
+                                   (u64) p.idx);
+               return -1;
+       }
 
        nr_data = m->nr_blocks - m->nr_redundant;
 
@@ -672,20 +674,23 @@ static void bch2_mark_stripe_ptr(struct bch_fs *c,
 
        blocks_nonempty_delta = (int) !!new - (int) !!old;
        if (!blocks_nonempty_delta)
-               return;
+               return 0;
 
        atomic_add(blocks_nonempty_delta, &m->blocks_nonempty);
 
        BUG_ON(atomic_read(&m->blocks_nonempty) < 0);
 
-       bch2_stripes_heap_update(c, m, p.idx);
+       if (!gc)
+               bch2_stripes_heap_update(c, m, p.idx);
+
+       return 0;
 }
 
-static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
-                            s64 sectors, enum bch_data_type data_type,
-                            struct bch_fs_usage *stats,
-                            u64 journal_seq, unsigned flags,
-                            bool gc)
+static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
+                           s64 sectors, enum bch_data_type data_type,
+                           struct bch_fs_usage *stats,
+                           u64 journal_seq, unsigned flags,
+                           bool gc)
 {
        BUG_ON(!sectors);
 
@@ -701,6 +706,7 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                unsigned replicas       = 0;
                unsigned ec_redundancy  = 0;
                unsigned i;
+               int ret;
 
                extent_for_each_ptr_decode(e, p, entry) {
                        s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
@@ -710,11 +716,14 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                                          stats, journal_seq, flags, gc);
 
                        if (!p.ptr.cached)
-                               for (i = 0; i < p.ec_nr; i++)
-                                       bch2_mark_stripe_ptr(c, p.ec[i],
+                               for (i = 0; i < p.ec_nr; i++) {
+                                       ret = bch2_mark_stripe_ptr(c, p.ec[i],
                                                        disk_sectors, flags,
                                                        &adjusted_disk_sectors,
-                                                       &ec_redundancy);
+                                                       &ec_redundancy, gc);
+                                       if (ret)
+                                               return ret;
+                               }
                        if (!p.ptr.cached)
                                replicas++;
 
@@ -747,6 +756,8 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
                break;
        }
        }
+
+       return 0;
 }
 
 static void bucket_set_stripe(struct bch_fs *c,
@@ -767,7 +778,7 @@ static void bucket_set_stripe(struct bch_fs *c,
 
                BUG_ON(ptr_stale(ca, ptr));
 
-               old = bucket_cmpxchg(g, new, ({
+               old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
                        new.stripe                      = enabled;
                        if (journal_seq) {
                                new.journal_seq_valid   = 1;
@@ -776,26 +787,33 @@ static void bucket_set_stripe(struct bch_fs *c,
                }));
 
                BUG_ON(old.stripe == enabled);
-
-               bch2_dev_usage_update(c, ca, fs_usage, old, new, gc);
        }
 }
 
-static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
-                            bool inserting,
-                            struct bch_fs_usage *fs_usage,
-                            u64 journal_seq, unsigned flags,
-                            bool gc)
+static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
+                           bool inserting,
+                           struct bch_fs_usage *fs_usage,
+                           u64 journal_seq, unsigned flags,
+                           bool gc)
 {
        switch (k.k->type) {
        case BCH_STRIPE: {
                struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
                size_t idx = s.k->p.offset;
-               struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx);
+               struct stripe *m = genradix_ptr(&c->stripes[gc], idx);
                unsigned i;
 
-               BUG_ON(!m);
-               BUG_ON(m->alive == inserting);
+               if (!m || (!inserting && !m->alive)) {
+                       bch_err_ratelimited(c, "error marking nonexistent stripe %zu",
+                                           idx);
+                       return -1;
+               }
+
+               if (inserting && m->alive) {
+                       bch_err_ratelimited(c, "error marking stripe %zu: already exists",
+                                           idx);
+                       return -1;
+               }
 
                BUG_ON(atomic_read(&m->blocks_nonempty));
 
@@ -809,70 +827,88 @@ static void bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                        m->nr_redundant = s.v->nr_redundant;
                }
 
-               if (inserting)
-                       bch2_stripes_heap_insert(c, m, idx);
-               else
-                       bch2_stripes_heap_del(c, m, idx);
+               if (!gc) {
+                       if (inserting)
+                               bch2_stripes_heap_insert(c, m, idx);
+                       else
+                               bch2_stripes_heap_del(c, m, idx);
+               } else {
+                       m->alive = inserting;
+               }
 
                bucket_set_stripe(c, s.v, inserting, fs_usage, 0, gc);
                break;
        }
        }
+
+       return 0;
 }
 
-static void __bch2_mark_key(struct bch_fs *c,
-                           enum bkey_type type, struct bkey_s_c k,
-                           bool inserting, s64 sectors,
-                           struct bch_fs_usage *stats,
-                           u64 journal_seq, unsigned flags,
-                           bool gc)
+static int __bch2_mark_key(struct bch_fs *c,
+                          enum bkey_type type, struct bkey_s_c k,
+                          bool inserting, s64 sectors,
+                          struct bch_fs_usage *stats,
+                          u64 journal_seq, unsigned flags,
+                          bool gc)
 {
+       int ret = 0;
+
        switch (type) {
        case BKEY_TYPE_BTREE:
-               bch2_mark_extent(c, k, inserting
-                                ?  c->opts.btree_node_size
-                                : -c->opts.btree_node_size,
-                                BCH_DATA_BTREE,
-                                stats, journal_seq, flags, gc);
+               ret = bch2_mark_extent(c, k, inserting
+                                      ?  c->opts.btree_node_size
+                                      : -c->opts.btree_node_size,
+                                      BCH_DATA_BTREE,
+                                      stats, journal_seq, flags, gc);
                break;
        case BKEY_TYPE_EXTENTS:
-               bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
-                                stats, journal_seq, flags, gc);
+               ret = bch2_mark_extent(c, k, sectors, BCH_DATA_USER,
+                                      stats, journal_seq, flags, gc);
                break;
        case BKEY_TYPE_EC:
-               bch2_mark_stripe(c, k, inserting,
-                                stats, journal_seq, flags, gc);
+               ret = bch2_mark_stripe(c, k, inserting,
+                                      stats, journal_seq, flags, gc);
                break;
        default:
                break;
        }
+
+       return ret;
 }
 
-void bch2_mark_key(struct bch_fs *c,
-                  enum bkey_type type, struct bkey_s_c k,
-                  bool inserting, s64 sectors,
-                  struct gc_pos pos,
-                  struct bch_fs_usage *stats,
-                  u64 journal_seq, unsigned flags)
+int bch2_mark_key(struct bch_fs *c,
+                 enum bkey_type type, struct bkey_s_c k,
+                 bool inserting, s64 sectors,
+                 struct gc_pos pos,
+                 struct bch_fs_usage *stats,
+                 u64 journal_seq, unsigned flags)
 {
+       int ret = 0;
+
        percpu_down_read(&c->usage_lock);
 
        if (!(flags & BCH_BUCKET_MARK_GC)) {
                if (!stats)
                        stats = this_cpu_ptr(c->usage[0]);
 
-               __bch2_mark_key(c, type, k, inserting, sectors,
-                               stats, journal_seq, flags, false);
+               ret = __bch2_mark_key(c, type, k, inserting, sectors,
+                                     stats, journal_seq, flags, false);
+               if (ret)
+                       goto out;
        }
 
        if ((flags & BCH_BUCKET_MARK_GC) ||
            gc_visited(c, pos)) {
-               __bch2_mark_key(c, type, k, inserting, sectors,
-                               this_cpu_ptr(c->usage[1]),
-                               journal_seq, flags, true);
+               ret = __bch2_mark_key(c, type, k, inserting, sectors,
+                                     this_cpu_ptr(c->usage[1]),
+                                     journal_seq, flags, true);
+               if (ret)
+                       goto out;
        }
-
+out:
        percpu_up_read(&c->usage_lock);
+
+       return ret;
 }
 
 void bch2_mark_update(struct btree_insert *trans,
index 813e0c44e1076a802a07a0f2b4c4b37f426edb35..4eec96101bf6eeb867d1ce9b9c12a569266a2506 100644 (file)
@@ -220,9 +220,9 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
 #define BCH_BUCKET_MARK_NOATOMIC               (1 << 0)
 #define BCH_BUCKET_MARK_GC                     (1 << 1)
 
-void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
-                  bool, s64, struct gc_pos,
-                  struct bch_fs_usage *, u64, unsigned);
+int bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
+                 bool, s64, struct gc_pos,
+                 struct bch_fs_usage *, u64, unsigned);
 void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
 
 void __bch2_disk_reservation_put(struct bch_fs *, struct disk_reservation *);
index 727324f15f43a9ebaddcddd5e96cf67f2f2d6cb4..091a1f0a043272095c7b6088919ca31fbc1b8d65 100644 (file)
@@ -530,7 +530,7 @@ err:
        return ret;
 }
 
-/* ec_stripe bucket accounting: */
+/* stripe bucket accounting: */
 
 static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
 {
@@ -551,7 +551,11 @@ static int __ec_stripe_mem_alloc(struct bch_fs *c, size_t idx, gfp_t gfp)
                free_heap(&n);
        }
 
-       if (!genradix_ptr_alloc(&c->ec_stripes, idx, gfp))
+       if (!genradix_ptr_alloc(&c->stripes[0], idx, gfp))
+               return -ENOMEM;
+
+       if (c->gc_pos.phase != GC_PHASE_NOT_RUNNING &&
+           !genradix_ptr_alloc(&c->stripes[1], idx, gfp))
                return -ENOMEM;
 
        return 0;
@@ -592,27 +596,26 @@ static inline void ec_stripes_heap_set_backpointer(ec_stripes_heap *h,
 {
        struct bch_fs *c = container_of(h, struct bch_fs, ec_stripes_heap);
 
-       genradix_ptr(&c->ec_stripes, h->data[i].idx)->heap_idx = i;
+       genradix_ptr(&c->stripes[0], h->data[i].idx)->heap_idx = i;
 }
 
 static void heap_verify_backpointer(struct bch_fs *c, size_t idx)
 {
        ec_stripes_heap *h = &c->ec_stripes_heap;
-       struct ec_stripe *m = genradix_ptr(&c->ec_stripes, idx);
+       struct stripe *m = genradix_ptr(&c->stripes[0], idx);
 
        BUG_ON(!m->alive);
        BUG_ON(m->heap_idx >= h->used);
        BUG_ON(h->data[m->heap_idx].idx != idx);
 }
 
-static inline unsigned stripe_entry_blocks(struct ec_stripe *m)
+static inline unsigned stripe_entry_blocks(struct stripe *m)
 {
-       return atomic_read(&m->pin)
-               ? UINT_MAX : atomic_read(&m->blocks_nonempty);
+       return atomic_read(&m->blocks_nonempty);
 }
 
 void bch2_stripes_heap_update(struct bch_fs *c,
-                             struct ec_stripe *m, size_t idx)
+                             struct stripe *m, size_t idx)
 {
        ec_stripes_heap *h = &c->ec_stripes_heap;
        bool queue_delete;
@@ -646,7 +649,7 @@ void bch2_stripes_heap_update(struct bch_fs *c,
 }
 
 void bch2_stripes_heap_del(struct bch_fs *c,
-                          struct ec_stripe *m, size_t idx)
+                          struct stripe *m, size_t idx)
 {
        spin_lock(&c->ec_stripes_heap_lock);
        heap_verify_backpointer(c, idx);
@@ -659,7 +662,7 @@ void bch2_stripes_heap_del(struct bch_fs *c,
 }
 
 void bch2_stripes_heap_insert(struct bch_fs *c,
-                             struct ec_stripe *m, size_t idx)
+                             struct stripe *m, size_t idx)
 {
        spin_lock(&c->ec_stripes_heap_lock);
 
@@ -678,7 +681,9 @@ void bch2_stripes_heap_insert(struct bch_fs *c,
        spin_unlock(&c->ec_stripes_heap_lock);
 }
 
-static void ec_stripe_delete(struct bch_fs *c, unsigned idx)
+/* stripe deletion */
+
+static void ec_stripe_delete(struct bch_fs *c, size_t idx)
 {
        struct btree_iter iter;
        struct bch_stripe *v = NULL;
@@ -717,6 +722,7 @@ static void ec_stripe_delete_work(struct work_struct *work)
        ssize_t idx;
 
        down_read(&c->gc_lock);
+       mutex_lock(&c->ec_stripe_create_lock);
 
        while (1) {
                spin_lock(&c->ec_stripes_heap_lock);
@@ -729,13 +735,15 @@ static void ec_stripe_delete_work(struct work_struct *work)
                ec_stripe_delete(c, idx);
        }
 
+       mutex_unlock(&c->ec_stripe_create_lock);
        up_read(&c->gc_lock);
 }
 
+/* stripe creation: */
+
 static int ec_stripe_bkey_insert(struct bch_fs *c,
                                 struct bkey_i_stripe *stripe)
 {
-       struct ec_stripe *m;
        struct btree_iter iter;
        struct bkey_s_c k;
        int ret;
@@ -755,18 +763,13 @@ retry:
 
        return bch2_btree_iter_unlock(&iter) ?: -ENOSPC;
 found_slot:
-       mutex_lock(&c->ec_stripes_lock);
        ret = ec_stripe_mem_alloc(c, &iter);
-       mutex_unlock(&c->ec_stripes_lock);
 
        if (ret == -EINTR)
                goto retry;
        if (ret)
                return ret;
 
-       m = genradix_ptr(&c->ec_stripes, iter.pos.offset);
-       atomic_inc(&m->pin);
-
        stripe->k.p = iter.pos;
 
        ret = bch2_btree_insert_at(c, NULL, NULL,
@@ -775,14 +778,9 @@ found_slot:
                                   BTREE_INSERT_ENTRY(&iter, &stripe->k_i));
        bch2_btree_iter_unlock(&iter);
 
-       if (ret)
-               atomic_dec(&m->pin);
-
        return ret;
 }
 
-/* stripe creation: */
-
 static void extent_stripe_ptr_add(struct bkey_s_extent e,
                                  struct ec_stripe_buf *s,
                                  struct bch_extent_ptr *ptr,
@@ -858,7 +856,6 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
  */
 static void ec_stripe_create(struct ec_stripe_new *s)
 {
-       struct ec_stripe *ec_stripe;
        struct bch_fs *c = s->c;
        struct open_bucket *ob;
        struct bkey_i *k;
@@ -898,10 +895,12 @@ static void ec_stripe_create(struct ec_stripe_new *s)
                        goto err_put_writes;
                }
 
+       mutex_lock(&c->ec_stripe_create_lock);
+
        ret = ec_stripe_bkey_insert(c, &s->stripe.key);
        if (ret) {
                bch_err(c, "error creating stripe: error creating stripe key");
-               goto err_put_writes;
+               goto err_unlock;
        }
 
        for_each_keylist_key(&s->keys, k) {
@@ -910,12 +909,8 @@ static void ec_stripe_create(struct ec_stripe_new *s)
                        break;
        }
 
-       ec_stripe = genradix_ptr(&c->ec_stripes, s->stripe.key.k.p.offset);
-
-       atomic_dec(&ec_stripe->pin);
-       bch2_stripes_heap_update(c, ec_stripe,
-                                s->stripe.key.k.p.offset);
-
+err_unlock:
+       mutex_unlock(&c->ec_stripe_create_lock);
 err_put_writes:
        percpu_ref_put(&c->writes);
 err:
@@ -1222,7 +1217,7 @@ unlock:
        mutex_unlock(&c->ec_new_stripe_lock);
 }
 
-int bch2_fs_ec_start(struct bch_fs *c)
+int bch2_ec_mem_alloc(struct bch_fs *c, bool gc)
 {
        struct btree_iter iter;
        struct bkey_s_c k;
@@ -1238,19 +1233,25 @@ int bch2_fs_ec_start(struct bch_fs *c)
        if (ret)
                return ret;
 
-       if (!init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
+       if (!gc &&
+           !init_heap(&c->ec_stripes_heap, roundup_pow_of_two(idx),
                       GFP_KERNEL))
                return -ENOMEM;
 #if 0
-       ret = genradix_prealloc(&c->ec_stripes, idx, GFP_KERNEL);
+       ret = genradix_prealloc(&c->stripes[gc], idx, GFP_KERNEL);
 #else
        for (i = 0; i < idx; i++)
-               if (!genradix_ptr_alloc(&c->ec_stripes, i, GFP_KERNEL))
+               if (!genradix_ptr_alloc(&c->stripes[gc], i, GFP_KERNEL))
                        return -ENOMEM;
 #endif
        return 0;
 }
 
+int bch2_fs_ec_start(struct bch_fs *c)
+{
+       return bch2_ec_mem_alloc(c, false);
+}
+
 void bch2_fs_ec_exit(struct bch_fs *c)
 {
        struct ec_stripe_head *h;
@@ -1271,7 +1272,7 @@ void bch2_fs_ec_exit(struct bch_fs *c)
        }
 
        free_heap(&c->ec_stripes_heap);
-       genradix_free(&c->ec_stripes);
+       genradix_free(&c->stripes[0]);
        bioset_exit(&c->ec_bioset);
 }
 
index bcf06529dcfc8a9db7d4641097855671e26b92bf..c35de8b1ef64d272384debe52d5f4314a6db2ee8 100644 (file)
@@ -93,14 +93,16 @@ void bch2_ec_stripe_head_put(struct ec_stripe_head *);
 struct ec_stripe_head *bch2_ec_stripe_head_get(struct bch_fs *, unsigned,
                                               unsigned, unsigned);
 
-void bch2_stripes_heap_update(struct bch_fs *, struct ec_stripe *, size_t);
-void bch2_stripes_heap_del(struct bch_fs *, struct ec_stripe *, size_t);
-void bch2_stripes_heap_insert(struct bch_fs *, struct ec_stripe *, size_t);
+void bch2_stripes_heap_update(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_del(struct bch_fs *, struct stripe *, size_t);
+void bch2_stripes_heap_insert(struct bch_fs *, struct stripe *, size_t);
 
 void bch2_ec_stop_dev(struct bch_fs *, struct bch_dev *);
 
 void bch2_ec_flush_new_stripes(struct bch_fs *);
 
+int bch2_ec_mem_alloc(struct bch_fs *, bool);
+
 int bch2_fs_ec_start(struct bch_fs *);
 
 void bch2_fs_ec_exit(struct bch_fs *);
index 00e89c3b776772c6092aad9039edf33f560a7707..a3216ca01913660a2153afc4caa48768b6bed107 100644 (file)
@@ -6,7 +6,7 @@
 
 #define EC_STRIPE_MAX  16
 
-struct ec_stripe {
+struct stripe {
        size_t                  heap_idx;
 
        u16                     sectors;
@@ -16,7 +16,6 @@ struct ec_stripe {
        u8                      nr_redundant;
 
        u8                      alive;
-       atomic_t                pin;
        atomic_t                blocks_nonempty;
        atomic_t                block_sectors[EC_STRIPE_MAX];
 };
index eeeebfaa4557dfaa3b61d12c6dea4260d393046b..30852090ce755374b6d3f5a919c761e45c0d6dd6 100644 (file)
@@ -208,8 +208,8 @@ static unsigned bch2_extent_ptr_durability(struct bch_fs *c,
                durability = max_t(unsigned, durability, ca->mi.durability);
 
        for (i = 0; i < p.ec_nr; i++) {
-               struct ec_stripe *s =
-                       genradix_ptr(&c->ec_stripes, p.idx);
+               struct stripe *s =
+                       genradix_ptr(&c->stripes[0], p.idx);
 
                if (WARN_ON(!s))
                        continue;
index 8127f4454dacf82744d3ffdc199e4663a53a6f17..cc20742d542b973e824828ca92e998280a01662a 100644 (file)
@@ -700,22 +700,19 @@ static int bch2_get_next_quota(struct super_block *sb, struct kqid *kqid,
        struct bch_fs *c                = sb->s_fs_info;
        struct bch_memquota_type *q     = &c->quotas[kqid->type];
        qid_t qid                       = from_kqid(&init_user_ns, *kqid);
-       struct genradix_iter iter       = genradix_iter_init(&q->table, qid);
+       struct genradix_iter iter;
        struct bch_memquota *mq;
        int ret = 0;
 
        mutex_lock(&q->lock);
 
-       while ((mq = genradix_iter_peek(&iter, &q->table))) {
+       genradix_for_each_from(&q->table, iter, mq, qid)
                if (memcmp(mq, page_address(ZERO_PAGE(0)), sizeof(*mq))) {
                        __bch2_quota_get(qdq, mq);
                        *kqid = make_kqid(current_user_ns(), kqid->type, iter.pos);
                        goto found;
                }
 
-               genradix_iter_advance(&iter, &q->table);
-       }
-
        ret = -ENOENT;
 found:
        mutex_unlock(&q->lock);
index ddfba16a29985f11effb5ab2a47ef6b13710dbbd..1cb0c9940ec1867cc26d6b027594c3a8459330c1 100644 (file)
@@ -305,6 +305,9 @@ int bch2_fs_initialize(struct bch_fs *c)
 
        set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
 
+       for (i = 0; i < BTREE_ID_NR; i++)
+               bch2_btree_root_alloc(c, i);
+
        ret = bch2_initial_gc(c, &journal);
        if (ret)
                goto err;
@@ -316,9 +319,6 @@ int bch2_fs_initialize(struct bch_fs *c)
                        goto err;
                }
 
-       for (i = 0; i < BTREE_ID_NR; i++)
-               bch2_btree_root_alloc(c, i);
-
        /*
         * journal_res_get() will crash if called before this has
         * set up the journal.pin FIFO and journal.cur pointer:
index 59f2aa7e047c3da7756adb18b775c2651f123f39..78a2668fc03e2fa22654211e7985d97bb83f94c8 100644 (file)
@@ -548,7 +548,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
 
        INIT_LIST_HEAD(&c->ec_new_stripe_list);
        mutex_init(&c->ec_new_stripe_lock);
-       mutex_init(&c->ec_stripes_lock);
+       mutex_init(&c->ec_stripe_create_lock);
        spin_lock_init(&c->ec_stripes_heap_lock);
 
        seqcount_init(&c->gc_pos_lock);