bcachefs: bch2_trans_mark_update()
authorKent Overstreet <kent.overstreet@gmail.com>
Mon, 11 Mar 2019 18:59:58 +0000 (14:59 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:21 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
18 files changed:
fs/bcachefs/alloc_background.c
fs/bcachefs/alloc_background.h
fs/bcachefs/bcachefs_format.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/buckets_types.h
fs/bcachefs/ec.c
fs/bcachefs/extents.c
fs/bcachefs/migrate.c
fs/bcachefs/move.c
fs/bcachefs/recovery.c
fs/bcachefs/replicas.c
fs/bcachefs/replicas.h
fs/bcachefs/super-io.c

index 5988971521eb7410b84cda04e63eb51bb82a2397..82a68fabdc5f6fd41a006b3acc4b6ca0b4f2b341 100644 (file)
@@ -141,8 +141,8 @@ struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *a)
        return ret;
 }
 
-static void bch2_alloc_pack(struct bkey_i_alloc *dst,
-                           const struct bkey_alloc_unpacked src)
+void bch2_alloc_pack(struct bkey_i_alloc *dst,
+                    const struct bkey_alloc_unpacked src)
 {
        unsigned idx = 0;
        void *d = dst->v.data;
@@ -962,7 +962,6 @@ retry:
 
        invalidating_cached_data = m.cached_sectors != 0;
 
-       //BUG_ON(u.dirty_sectors);
        u.data_type     = 0;
        u.dirty_sectors = 0;
        u.cached_sectors = 0;
@@ -974,6 +973,7 @@ retry:
         * we have to trust the in memory bucket @m, not the version in the
         * btree:
         */
+       //BUG_ON(u.dirty_sectors);
        u.gen           = m.gen + 1;
 
        a = bkey_alloc_init(&alloc_key.k);
index b75c56a5dae0107074de04b67f3f865c66b154f5..02354c80a102ae3549eb2c7120112309bc2d9dfa 100644 (file)
@@ -14,6 +14,8 @@ struct bkey_alloc_unpacked {
 };
 
 struct bkey_alloc_unpacked bch2_alloc_unpack(const struct bch_alloc *);
+void bch2_alloc_pack(struct bkey_i_alloc *,
+                    const struct bkey_alloc_unpacked);
 
 #define ALLOC_SCAN_BATCH(ca)           max_t(size_t, 1, (ca)->mi.nbuckets >> 9)
 
index 7edc410c53910d6a31241d6e077513ccdbcf5f97..8715a444f6d55013cca0a831d5805b1f8ec14228 100644 (file)
@@ -1300,6 +1300,7 @@ enum bch_sb_features {
 
 enum bch_sb_compat {
        BCH_COMPAT_FEAT_ALLOC_INFO      = 0,
+       BCH_COMPAT_FEAT_ALLOC_METADATA  = 1,
 };
 
 /* options: */
index 3fdf5ab255785ce1cc3727bfbdb9dfa795e50304..afede965102487990d443f3f606d753e4df50a02 100644 (file)
@@ -1005,7 +1005,7 @@ retry_all:
                        goto retry_all;
        }
 
-       ret = btree_trans_has_multiple_iters(trans) ? -EINTR : 0;
+       ret = hweight64(trans->iters_live) > 1 ? -EINTR : 0;
 out:
        bch2_btree_cache_cannibalize_unlock(c);
        return ret;
@@ -1103,8 +1103,6 @@ int __must_check bch2_btree_iter_traverse(struct btree_iter *iter)
        if (unlikely(ret))
                ret = __btree_iter_traverse_all(iter->trans, iter, ret);
 
-       BUG_ON(ret == -EINTR && !btree_trans_has_multiple_iters(iter->trans));
-
        return ret;
 }
 
index 7bd3adcd4b52fcdaea4232fc1cbabe728d82f1dd..ece4f30b3f858e6a4607c5c82f838f3a7a5d8262 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/rhashtable.h>
 
 #include "bkey_methods.h"
+#include "buckets_types.h"
 #include "journal_types.h"
 #include "six.h"
 
@@ -264,6 +265,7 @@ struct btree_insert_entry {
        };
 
        bool                    deferred;
+       bool                    triggered;
 };
 
 #define BTREE_ITER_MAX         64
@@ -302,6 +304,8 @@ struct btree_trans {
 
        struct btree_iter       iters_onstack[2];
        struct btree_insert_entry updates_onstack[6];
+
+       struct replicas_delta_list fs_usage_deltas;
 };
 
 #define BTREE_FLAG(flag)                                               \
index 7a638a76634f1105a28f52869e8f1f19bd99185e..4438a999244264bd45c18823c23491e1301b7741 100644 (file)
@@ -43,8 +43,11 @@ enum {
        __BTREE_INSERT_USE_ALLOC_RESERVE,
        __BTREE_INSERT_JOURNAL_REPLAY,
        __BTREE_INSERT_JOURNAL_RESERVED,
+       __BTREE_INSERT_NOMARK_INSERT,
        __BTREE_INSERT_NOMARK_OVERWRITES,
        __BTREE_INSERT_NOMARK,
+       __BTREE_INSERT_MARK_INMEM,
+       __BTREE_INSERT_NO_CLEAR_REPLICAS,
        __BTREE_INSERT_NOWAIT,
        __BTREE_INSERT_GC_LOCK_HELD,
        __BCH_HASH_SET_MUST_CREATE,
@@ -77,12 +80,20 @@ enum {
 
 #define BTREE_INSERT_JOURNAL_RESERVED  (1 << __BTREE_INSERT_JOURNAL_RESERVED)
 
+/* Don't mark new key, just overwrites: */
+#define BTREE_INSERT_NOMARK_INSERT     (1 << __BTREE_INSERT_NOMARK_INSERT)
+
 /* Don't mark overwrites, just new key: */
 #define BTREE_INSERT_NOMARK_OVERWRITES (1 << __BTREE_INSERT_NOMARK_OVERWRITES)
 
-/* Don't call bch2_mark_key: */
+/* Don't call mark new key at all: */
 #define BTREE_INSERT_NOMARK            (1 << __BTREE_INSERT_NOMARK)
 
+/* Don't mark transactionally: */
+#define BTREE_INSERT_MARK_INMEM                (1 << __BTREE_INSERT_MARK_INMEM)
+
+#define BTREE_INSERT_NO_CLEAR_REPLICAS (1 << __BTREE_INSERT_NO_CLEAR_REPLICAS)
+
 /* Don't block on allocation failure (for new btree nodes: */
 #define BTREE_INSERT_NOWAIT            (1 << __BTREE_INSERT_NOWAIT)
 #define BTREE_INSERT_GC_LOCK_HELD      (1 << __BTREE_INSERT_GC_LOCK_HELD)
index 5e13ad34ec42e9135150cc568c94411229fe77bc..b9b9accfb38c63e7cc6eefa07f6e6b71fa5f6ec0 100644 (file)
@@ -526,6 +526,22 @@ static inline void do_btree_insert_one(struct btree_trans *trans,
                btree_insert_key_deferred(trans, insert);
 }
 
+static inline bool update_triggers_transactional(struct btree_trans *trans,
+                                                struct btree_insert_entry *i)
+{
+       return likely(!(trans->flags & BTREE_INSERT_MARK_INMEM)) &&
+               (i->iter->btree_id == BTREE_ID_EXTENTS ||
+                i->iter->btree_id == BTREE_ID_INODES);
+}
+
+static inline bool update_has_triggers(struct btree_trans *trans,
+                                      struct btree_insert_entry *i)
+{
+       return likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
+               !i->deferred &&
+               btree_node_type_needs_gc(i->iter->btree_id);
+}
+
 /*
  * Get journal reservation, take write locks, and attempt to do btree update(s):
  */
@@ -538,29 +554,25 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        struct btree_iter *linked;
        int ret;
 
+       if (likely(!(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS))) {
+               memset(&trans->fs_usage_deltas.fs_usage, 0,
+                      sizeof(trans->fs_usage_deltas.fs_usage));
+               trans->fs_usage_deltas.top = trans->fs_usage_deltas.d;
+       }
+
        trans_for_each_update_iter(trans, i)
                BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK);
 
-       btree_trans_lock_write(c, trans);
-
-       if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
-               trans_for_each_update_iter(trans, i) {
-                       if (i->deferred ||
-                           !btree_node_type_needs_gc(i->iter->btree_id))
-                               continue;
-
-                       if (!fs_usage) {
-                               percpu_down_read(&c->mark_lock);
-                               fs_usage = bch2_fs_usage_scratch_get(c);
-                       }
-
-                       if (!bch2_bkey_replicas_marked_locked(c,
-                                       bkey_i_to_s_c(i->k), true)) {
-                               ret = BTREE_INSERT_NEED_MARK_REPLICAS;
-                               goto out;
-                       }
+       trans_for_each_update_iter(trans, i)
+               if (update_has_triggers(trans, i) &&
+                   update_triggers_transactional(trans, i)) {
+                       ret = bch2_trans_mark_update(trans, i,
+                                               &trans->fs_usage_deltas);
+                       if (ret)
+                               return ret;
                }
-       }
+
+       btree_trans_lock_write(c, trans);
 
        if (race_fault()) {
                ret = -EINTR;
@@ -578,6 +590,23 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
        if (ret)
                goto out;
 
+       trans_for_each_update_iter(trans, i) {
+               if (i->deferred ||
+                   !btree_node_type_needs_gc(i->iter->btree_id))
+                       continue;
+
+               if (!fs_usage) {
+                       percpu_down_read(&c->mark_lock);
+                       fs_usage = bch2_fs_usage_scratch_get(c);
+               }
+
+               if (!bch2_bkey_replicas_marked_locked(c,
+                       bkey_i_to_s_c(i->k), true)) {
+                       ret = BTREE_INSERT_NEED_MARK_REPLICAS;
+                       goto out;
+               }
+       }
+
        /*
         * Don't get journal reservation until after we know insert will
         * succeed:
@@ -606,20 +635,24 @@ static inline int do_btree_insert_at(struct btree_trans *trans,
                                linked->flags |= BTREE_ITER_NOUNLOCK;
        }
 
-       if (likely(!(trans->flags & BTREE_INSERT_NOMARK))) {
-               trans_for_each_update_iter(trans, i)
+       trans_for_each_update_iter(trans, i)
+               if (update_has_triggers(trans, i) &&
+                   !update_triggers_transactional(trans, i))
                        bch2_mark_update(trans, i, &fs_usage->u, 0);
-               if (fs_usage)
-                       bch2_trans_fs_usage_apply(trans, fs_usage);
-
-               if (unlikely(c->gc_pos.phase)) {
-                       trans_for_each_update_iter(trans, i)
-                               if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
-                                       bch2_mark_update(trans, i, NULL,
-                                                        BCH_BUCKET_MARK_GC);
-               }
+
+       if (fs_usage) {
+               bch2_replicas_delta_list_apply(c, &fs_usage->u,
+                                              &trans->fs_usage_deltas);
+               bch2_trans_fs_usage_apply(trans, fs_usage);
        }
 
+       if (likely(!(trans->flags & BTREE_INSERT_NOMARK)) &&
+           unlikely(c->gc_pos.phase))
+               trans_for_each_update_iter(trans, i)
+                       if (gc_visited(c, gc_pos_btree_node(i->iter->l[0].b)))
+                               bch2_mark_update(trans, i, NULL,
+                                                BCH_BUCKET_MARK_GC);
+
        trans_for_each_update(trans, i)
                do_btree_insert_one(trans, i);
 out:
@@ -646,6 +679,19 @@ int bch2_trans_commit_error(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        unsigned flags = trans->flags;
+       struct btree_insert_entry *src, *dst;
+
+       src = dst = trans->updates;
+
+       while (src < trans->updates + trans->nr_updates) {
+               if (!src->triggered) {
+                       *dst = *src;
+                       dst++;
+               }
+               src++;
+       }
+
+       trans->nr_updates = dst - trans->updates;
 
        /*
         * BTREE_INSERT_NOUNLOCK means don't unlock _after_ successful btree
@@ -808,6 +854,7 @@ int bch2_trans_commit(struct btree_trans *trans,
 {
        struct bch_fs *c = trans->c;
        struct btree_insert_entry *i;
+       unsigned orig_mem_top = trans->mem_top;
        int ret = 0;
 
        if (!trans->nr_updates)
@@ -885,8 +932,16 @@ out_noupdates:
        return ret;
 err:
        ret = bch2_trans_commit_error(trans, i, ret);
-       if (!ret)
+
+       /* can't loop if it was passed in and we changed it: */
+       if (unlikely(trans->flags & BTREE_INSERT_NO_CLEAR_REPLICAS) && !ret)
+               ret = -EINTR;
+
+       if (!ret) {
+               /* free memory used by triggers, they'll be reexecuted: */
+               trans->mem_top = orig_mem_top;
                goto retry;
+       }
 
        goto out;
 }
@@ -969,6 +1024,7 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
        int ret = 0;
 
        bch2_trans_init(&trans, c);
+       bch2_trans_preload_iters(&trans);
 
        iter = bch2_trans_get_iter(&trans, id, start, BTREE_ITER_INTENT);
 
@@ -1014,5 +1070,6 @@ int bch2_btree_delete_range(struct bch_fs *c, enum btree_id id,
        }
 
        bch2_trans_exit(&trans);
+       BUG_ON(ret == -EINTR);
        return ret;
 }
index fb5461df3bbf9f5ef79b77646c91ba1073dd955b..6d04474f0e3a2776ead4f1b27ffc1e69c33b3086 100644 (file)
@@ -653,19 +653,16 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
        ca = bch_dev_bkey_exists(c, k.k->p.inode);
        g = __bucket(ca, k.k->p.offset, gc);
 
-       /*
-        * this should currently only be getting called from the bucket
-        * invalidate path:
-        */
-       BUG_ON(u.dirty_sectors);
-       BUG_ON(u.cached_sectors);
-       BUG_ON(!g->mark.owned_by_allocator);
-
        old = bucket_data_cmpxchg(c, ca, fs_usage, g, m, ({
                m.gen                   = u.gen;
                m.data_type             = u.data_type;
                m.dirty_sectors         = u.dirty_sectors;
                m.cached_sectors        = u.cached_sectors;
+
+               if (!(flags & BCH_BUCKET_MARK_GC)) {
+                       m.journal_seq_valid     = 1;
+                       m.journal_seq           = journal_seq;
+               }
        }));
 
        g->io_time[READ]        = u.read_time;
@@ -673,6 +670,11 @@ static int bch2_mark_alloc(struct bch_fs *c, struct bkey_s_c k,
        g->oldest_gen           = u.oldest_gen;
        g->gen_valid            = 1;
 
+       /*
+        * need to know if we're getting called from the invalidate path or
+        * not:
+        */
+
        if (old.cached_sectors) {
                update_cached_sectors(c, fs_usage, ca->dev_idx,
                                      -old.cached_sectors);
@@ -762,11 +764,34 @@ static s64 ptr_disk_sectors_delta(struct extent_ptr_decoded p,
        }
 }
 
-/*
- * Checking against gc's position has to be done here, inside the cmpxchg()
- * loop, to avoid racing with the start of gc clearing all the marks - GC does
- * that with the gc pos seqlock held.
- */
+static void bucket_set_stripe(struct bch_fs *c,
+                             const struct bch_stripe *v,
+                             bool enabled,
+                             struct bch_fs_usage *fs_usage,
+                             u64 journal_seq,
+                             bool gc)
+{
+       unsigned i;
+
+       for (i = 0; i < v->nr_blocks; i++) {
+               const struct bch_extent_ptr *ptr = v->ptrs + i;
+               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+               struct bucket *g = PTR_BUCKET(ca, ptr, gc);
+               struct bucket_mark new, old;
+
+               BUG_ON(ptr_stale(ca, ptr));
+
+               old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
+                       new.dirty                       = true;
+                       new.stripe                      = enabled;
+                       if (journal_seq) {
+                               new.journal_seq_valid   = 1;
+                               new.journal_seq         = journal_seq;
+                       }
+               }));
+       }
+}
+
 static bool bch2_mark_pointer(struct bch_fs *c,
                              struct extent_ptr_decoded p,
                              s64 sectors, enum bch_data_type data_type,
@@ -776,8 +801,7 @@ static bool bch2_mark_pointer(struct bch_fs *c,
 {
        struct bucket_mark old, new;
        struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
-       size_t b = PTR_BUCKET_NR(ca, &p.ptr);
-       struct bucket *g = __bucket(ca, b, gc);
+       struct bucket *g = PTR_BUCKET(ca, &p.ptr, gc);
        bool overflow;
        u64 v;
 
@@ -946,35 +970,6 @@ static int bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
        return 0;
 }
 
-static void bucket_set_stripe(struct bch_fs *c,
-                             const struct bch_stripe *v,
-                             bool enabled,
-                             struct bch_fs_usage *fs_usage,
-                             u64 journal_seq,
-                             bool gc)
-{
-       unsigned i;
-
-       for (i = 0; i < v->nr_blocks; i++) {
-               const struct bch_extent_ptr *ptr = v->ptrs + i;
-               struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
-               size_t b = PTR_BUCKET_NR(ca, ptr);
-               struct bucket *g = __bucket(ca, b, gc);
-               struct bucket_mark new, old;
-
-               BUG_ON(ptr_stale(ca, ptr));
-
-               old = bucket_data_cmpxchg(c, ca, fs_usage, g, new, ({
-                       new.dirty                       = true;
-                       new.stripe                      = enabled;
-                       if (journal_seq) {
-                               new.journal_seq_valid   = 1;
-                               new.journal_seq         = journal_seq;
-                       }
-               }));
-       }
-}
-
 static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                            bool inserting,
                            struct bch_fs_usage *fs_usage,
@@ -1006,14 +1001,7 @@ static int bch2_mark_stripe(struct bch_fs *c, struct bkey_s_c k,
                m->nr_blocks    = s.v->nr_blocks;
                m->nr_redundant = s.v->nr_redundant;
 
-               memset(&m->r, 0, sizeof(m->r));
-
-               m->r.e.data_type        = BCH_DATA_USER;
-               m->r.e.nr_devs          = s.v->nr_blocks;
-               m->r.e.nr_required      = s.v->nr_blocks - s.v->nr_redundant;
-
-               for (i = 0; i < s.v->nr_blocks; i++)
-                       m->r.e.devs[i] = s.v->ptrs[i].dev;
+               bch2_bkey_to_replicas(&m->r.e, k);
 
        /*
         * XXX: account for stripes somehow here
@@ -1180,10 +1168,11 @@ int bch2_mark_update(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(iter->btree_id))
                return 0;
 
-       bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
-               bpos_min(insert->k->k.p, b->key.k.p).offset -
-               bkey_start_offset(&insert->k->k),
-               fs_usage, trans->journal_res.seq, flags);
+       if (!(trans->flags & BTREE_INSERT_NOMARK_INSERT))
+               bch2_mark_key_locked(c, bkey_i_to_s_c(insert->k), true,
+                       bpos_min(insert->k->k.p, b->key.k.p).offset -
+                       bkey_start_offset(&insert->k->k),
+                       fs_usage, trans->journal_res.seq, flags);
 
        if (unlikely(trans->flags & BTREE_INSERT_NOMARK_OVERWRITES))
                return 0;
@@ -1262,6 +1251,391 @@ void bch2_trans_fs_usage_apply(struct btree_trans *trans,
        }
 }
 
+/* trans_mark: */
+
+static inline void update_replicas_list(struct replicas_delta_list *d,
+                                       struct bch_replicas_entry *r,
+                                       s64 sectors)
+{
+       d->top->delta = sectors;
+       memcpy(&d->top->r, r, replicas_entry_bytes(r));
+
+       d->top = (void *) d->top + replicas_entry_bytes(r) + 8;
+
+       BUG_ON((void *) d->top > (void *) d->d + sizeof(d->pad));
+}
+
+static inline void update_cached_sectors_list(struct replicas_delta_list *d,
+                                             unsigned dev, s64 sectors)
+{
+       struct bch_replicas_padded r;
+
+       bch2_replicas_entry_cached(&r.e, dev);
+
+       update_replicas_list(d, &r.e, sectors);
+}
+
+void bch2_replicas_delta_list_apply(struct bch_fs *c,
+                                   struct bch_fs_usage *fs_usage,
+                                   struct replicas_delta_list *r)
+{
+       struct replicas_delta *d = r->d;
+
+       acc_u64s((u64 *) fs_usage,
+                (u64 *) &r->fs_usage, sizeof(*fs_usage) / sizeof(u64));
+
+       while (d != r->top) {
+               BUG_ON((void *) d > (void *) r->top);
+
+               update_replicas(c, fs_usage, &d->r, d->delta);
+
+               d = (void *) d + replicas_entry_bytes(&d->r) + 8;
+       }
+}
+
+static int trans_get_key(struct btree_trans *trans,
+                        enum btree_id btree_id, struct bpos pos,
+                        struct btree_insert_entry **insert,
+                        struct btree_iter **iter,
+                        struct bkey_s_c *k)
+{
+       unsigned i;
+       int ret;
+
+       *insert = NULL;
+
+       for (i = 0; i < trans->nr_updates; i++)
+               if (!trans->updates[i].deferred &&
+                   trans->updates[i].iter->btree_id == btree_id &&
+                   !bkey_cmp(pos, trans->updates[i].iter->pos)) {
+                       *insert = &trans->updates[i];
+                       *iter   = (*insert)->iter;
+                       *k      = bkey_i_to_s_c((*insert)->k);
+                       return 0;
+               }
+
+       *iter = __bch2_trans_get_iter(trans, btree_id, pos,
+                                  BTREE_ITER_SLOTS|BTREE_ITER_INTENT, 0);
+       if (IS_ERR(*iter))
+               return PTR_ERR(*iter);
+
+       *k = bch2_btree_iter_peek_slot(*iter);
+       ret = bkey_err(*k);
+       if (ret)
+               bch2_trans_iter_put(trans, *iter);
+       return ret;
+}
+
+static int trans_update_key(struct btree_trans *trans,
+                           struct btree_insert_entry **insert,
+                           struct btree_iter *iter,
+                           struct bkey_s_c k,
+                           unsigned extra_u64s)
+{
+       struct bkey_i *new_k;
+
+       if (*insert)
+               return 0;
+
+       new_k = bch2_trans_kmalloc(trans, bkey_bytes(k.k) +
+                                  extra_u64s * sizeof(u64));
+       if (IS_ERR(new_k))
+               return PTR_ERR(new_k);
+
+       *insert = bch2_trans_update(trans, ((struct btree_insert_entry) {
+                               .iter = iter,
+                               .k = new_k,
+                               .triggered = true,
+       }));
+
+       bkey_reassemble((*insert)->k, k);
+       return 0;
+}
+
+static int bch2_trans_mark_pointer(struct btree_trans *trans,
+                       struct extent_ptr_decoded p,
+                       s64 sectors, enum bch_data_type data_type,
+                       struct replicas_delta_list *d)
+{
+       struct bch_fs *c = trans->c;
+       struct bch_dev *ca = bch_dev_bkey_exists(c, p.ptr.dev);
+       struct btree_insert_entry *insert;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_alloc_unpacked u;
+       struct bkey_i_alloc *a;
+       bool overflow;
+       int ret;
+
+       ret = trans_get_key(trans, BTREE_ID_ALLOC,
+                           POS(p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr)),
+                           &insert, &iter, &k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_alloc) {
+               bch_err_ratelimited(c, "pointer to nonexistent bucket %u:%zu",
+                                   p.ptr.dev,
+                                   PTR_BUCKET_NR(ca, &p.ptr));
+               ret = -1;
+               goto out;
+       }
+
+       u = bch2_alloc_unpack(bkey_s_c_to_alloc(k).v);
+
+       if (gen_after(u.gen, p.ptr.gen)) {
+               ret = 1;
+               goto out;
+       }
+
+       if (!p.ptr.cached)
+               overflow = checked_add(u.dirty_sectors, sectors);
+       else
+               overflow = checked_add(u.cached_sectors, sectors);
+
+       u.data_type = u.dirty_sectors || u.cached_sectors
+               ? data_type : 0;
+
+       bch2_fs_inconsistent_on(overflow, c,
+               "bucket sector count overflow: %u + %lli > U16_MAX",
+               !p.ptr.cached
+               ? u.dirty_sectors
+               : u.cached_sectors, sectors);
+
+       ret = trans_update_key(trans, &insert, iter, k, 1);
+       if (ret)
+               goto out;
+
+       a = bkey_alloc_init(insert->k);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, u);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_stripe_ptr(struct btree_trans *trans,
+                       struct bch_extent_stripe_ptr p,
+                       s64 sectors, enum bch_data_type data_type,
+                       struct replicas_delta_list *d)
+{
+       struct bch_replicas_padded r;
+       struct btree_insert_entry *insert;
+       struct btree_iter *iter;
+       struct bkey_s_c k;
+       struct bkey_s_stripe s;
+       unsigned nr_data;
+       s64 parity_sectors;
+       int ret = 0;
+
+       BUG_ON(!sectors);
+
+       ret = trans_get_key(trans, BTREE_ID_EC, POS(0, p.idx),
+                           &insert, &iter, &k);
+       if (ret)
+               return ret;
+
+       if (k.k->type != KEY_TYPE_stripe) {
+               bch_err_ratelimited(trans->c,
+                                   "pointer to nonexistent stripe %llu",
+                                   (u64) p.idx);
+               ret = -1;
+               goto out;
+       }
+
+       ret = trans_update_key(trans, &insert, iter, k, 1);
+       if (ret)
+               goto out;
+
+       s = bkey_i_to_s_stripe(insert->k);
+
+       nr_data = s.v->nr_blocks - s.v->nr_redundant;
+
+       parity_sectors = DIV_ROUND_UP(abs(sectors) * s.v->nr_redundant, nr_data);
+
+       if (sectors < 0)
+               parity_sectors = -parity_sectors;
+
+       stripe_blockcount_set(s.v, p.block,
+               stripe_blockcount_get(s.v, p.block) +
+               sectors + parity_sectors);
+
+       bch2_bkey_to_replicas(&r.e, s.s_c);
+
+       update_replicas_list(d, &r.e, sectors);
+out:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
+static int bch2_trans_mark_extent(struct btree_trans *trans,
+                       struct bkey_s_c k,
+                       s64 sectors, enum bch_data_type data_type,
+                       struct replicas_delta_list *d)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       struct extent_ptr_decoded p;
+       struct bch_replicas_padded r;
+       s64 dirty_sectors = 0;
+       bool stale;
+       unsigned i;
+       int ret;
+
+       r.e.data_type   = data_type;
+       r.e.nr_devs     = 0;
+       r.e.nr_required = 1;
+
+       BUG_ON(!sectors);
+
+       bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+               s64 disk_sectors = data_type == BCH_DATA_BTREE
+                       ? sectors
+                       : ptr_disk_sectors_delta(p, sectors);
+
+               ret = bch2_trans_mark_pointer(trans, p, disk_sectors,
+                                             data_type, d);
+               if (ret < 0)
+                       return ret;
+
+               stale = ret > 0;
+
+               if (p.ptr.cached) {
+                       if (disk_sectors && !stale)
+                               update_cached_sectors_list(d, p.ptr.dev,
+                                                          disk_sectors);
+               } else if (!p.ec_nr) {
+                       dirty_sectors          += disk_sectors;
+                       r.e.devs[r.e.nr_devs++] = p.ptr.dev;
+               } else {
+                       for (i = 0; i < p.ec_nr; i++) {
+                               ret = bch2_trans_mark_stripe_ptr(trans, p.ec[i],
+                                               disk_sectors, data_type, d);
+                               if (ret)
+                                       return ret;
+                       }
+
+                       r.e.nr_required = 0;
+               }
+       }
+
+       if (dirty_sectors)
+               update_replicas_list(d, &r.e, dirty_sectors);
+
+       return 0;
+}
+
+int bch2_trans_mark_key(struct btree_trans *trans,
+                       struct bkey_s_c k,
+                       bool inserting, s64 sectors,
+                       struct replicas_delta_list *d)
+{
+       struct bch_fs *c = trans->c;
+
+       switch (k.k->type) {
+       case KEY_TYPE_btree_ptr:
+               return bch2_trans_mark_extent(trans, k, inserting
+                               ?  c->opts.btree_node_size
+                               : -c->opts.btree_node_size,
+                               BCH_DATA_BTREE, d);
+       case KEY_TYPE_extent:
+               return bch2_trans_mark_extent(trans, k,
+                               sectors, BCH_DATA_USER, d);
+       case KEY_TYPE_inode:
+               if (inserting)
+                       d->fs_usage.nr_inodes++;
+               else
+                       d->fs_usage.nr_inodes--;
+               return 0;
+       case KEY_TYPE_reservation: {
+               unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+               sectors *= replicas;
+               replicas = clamp_t(unsigned, replicas, 1,
+                                  ARRAY_SIZE(d->fs_usage.persistent_reserved));
+
+               d->fs_usage.reserved                            += sectors;
+               d->fs_usage.persistent_reserved[replicas - 1]   += sectors;
+               return 0;
+       }
+       default:
+               return 0;
+       }
+}
+
+int bch2_trans_mark_update(struct btree_trans *trans,
+                          struct btree_insert_entry *insert,
+                          struct replicas_delta_list *d)
+{
+       struct btree_iter       *iter = insert->iter;
+       struct btree            *b = iter->l[0].b;
+       struct btree_node_iter  node_iter = iter->l[0].iter;
+       struct bkey_packed      *_k;
+       int ret;
+
+       if (!btree_node_type_needs_gc(iter->btree_id))
+               return 0;
+
+       ret = bch2_trans_mark_key(trans,
+                       bkey_i_to_s_c(insert->k), true,
+                       bpos_min(insert->k->k.p, b->key.k.p).offset -
+                       bkey_start_offset(&insert->k->k), d);
+       if (ret)
+               return ret;
+
+       while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
+                                                     KEY_TYPE_discard))) {
+               struct bkey             unpacked;
+               struct bkey_s_c         k;
+               s64                     sectors = 0;
+
+               k = bkey_disassemble(b, _k, &unpacked);
+
+               if (btree_node_is_extents(b)
+                   ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
+                   : bkey_cmp(insert->k->k.p, k.k->p))
+                       break;
+
+               if (btree_node_is_extents(b)) {
+                       switch (bch2_extent_overlap(&insert->k->k, k.k)) {
+                       case BCH_EXTENT_OVERLAP_ALL:
+                               sectors = -((s64) k.k->size);
+                               break;
+                       case BCH_EXTENT_OVERLAP_BACK:
+                               sectors = bkey_start_offset(&insert->k->k) -
+                                       k.k->p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_FRONT:
+                               sectors = bkey_start_offset(k.k) -
+                                       insert->k->k.p.offset;
+                               break;
+                       case BCH_EXTENT_OVERLAP_MIDDLE:
+                               sectors = k.k->p.offset - insert->k->k.p.offset;
+                               BUG_ON(sectors <= 0);
+
+                               ret = bch2_trans_mark_key(trans, k, true,
+                                                         sectors, d);
+                               if (ret)
+                                       return ret;
+
+                               sectors = bkey_start_offset(&insert->k->k) -
+                                       k.k->p.offset;
+                               break;
+                       }
+
+                       BUG_ON(sectors >= 0);
+               }
+
+               ret = bch2_trans_mark_key(trans, k, false, sectors, d);
+               if (ret)
+                       return ret;
+
+               bch2_btree_node_iter_advance(&node_iter, b);
+       }
+
+       return 0;
+}
+
 /* Disk reservations: */
 
 #define SECTORS_CACHE  1024
index 86431cffb660dcb818d059e98b69f00757cf71e8..578019089a91ff9a0f34bb357bb354abc7267481 100644 (file)
@@ -100,7 +100,7 @@ static inline struct bucket_mark ptr_bucket_mark(struct bch_dev *ca,
        struct bucket_mark m;
 
        rcu_read_lock();
-       m = READ_ONCE(bucket(ca, PTR_BUCKET_NR(ca, ptr))->mark);
+       m = READ_ONCE(PTR_BUCKET(ca, ptr, 0)->mark);
        rcu_read_unlock();
 
        return m;
@@ -266,6 +266,15 @@ int bch2_mark_overwrite(struct btree_trans *, struct btree_iter *,
                        struct bch_fs_usage *, unsigned);
 int bch2_mark_update(struct btree_trans *, struct btree_insert_entry *,
                     struct bch_fs_usage *, unsigned);
+
+void bch2_replicas_delta_list_apply(struct bch_fs *,
+                                   struct bch_fs_usage *,
+                                   struct replicas_delta_list *);
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
+                       bool, s64, struct replicas_delta_list *);
+int bch2_trans_mark_update(struct btree_trans *,
+                          struct btree_insert_entry *,
+                          struct replicas_delta_list *);
 void bch2_trans_fs_usage_apply(struct btree_trans *, struct bch_fs_usage_online *);
 
 /* disk reservations: */
index 8e47b273360c3629dc60e8b1834ec60da99e93e1..ec4294d41518e275d45bc0520b87639bfc544fb2 100644 (file)
@@ -93,6 +93,19 @@ struct bch_fs_usage_short {
        u64                     nr_inodes;
 };
 
+struct replicas_delta {
+       s64                     delta;
+       struct bch_replicas_entry r;
+} __packed;
+
+struct replicas_delta_list {
+       struct bch_fs_usage     fs_usage;
+
+       struct replicas_delta   *top;
+       struct replicas_delta   d[0];
+       u8                      pad[256];
+};
+
 /*
  * A reservation for space on disk:
  */
index 1eacd9665c7d83ff6c19e4980422b308048988b0..6761b5c24a12a3b9882955e117e9ec179e40e589 100644 (file)
@@ -539,14 +539,17 @@ static int ec_stripe_mem_alloc(struct bch_fs *c,
                               struct btree_iter *iter)
 {
        size_t idx = iter->pos.offset;
+       int ret = 0;
 
        if (!__ec_stripe_mem_alloc(c, idx, GFP_NOWAIT|__GFP_NOWARN))
-               return 0;
+               return ret;
 
        bch2_btree_trans_unlock(iter->trans);
+       ret = -EINTR;
 
        if (!__ec_stripe_mem_alloc(c, idx, GFP_KERNEL))
-               return -EINTR;
+               return ret;
+
        return -ENOMEM;
 }
 
@@ -692,23 +695,22 @@ retry:
 
        if (!ret)
                ret = -ENOSPC;
-       goto out;
+       goto err;
 found_slot:
        ret = ec_stripe_mem_alloc(c, iter);
-
-       if (ret == -EINTR)
-               goto retry;
        if (ret)
-               return ret;
+               goto err;
 
        stripe->k.p = iter->pos;
 
        bch2_trans_update(&trans, BTREE_INSERT_ENTRY(iter, &stripe->k_i));
 
        ret = bch2_trans_commit(&trans, NULL, NULL,
-                               BTREE_INSERT_NOFAIL|
-                               BTREE_INSERT_USE_RESERVE);
-out:
+                               BTREE_INSERT_ATOMIC|
+                               BTREE_INSERT_NOFAIL);
+err:
+       if (ret == -EINTR)
+               goto retry;
        bch2_trans_exit(&trans);
 
        return ret;
@@ -745,6 +747,7 @@ static int ec_stripe_update_ptrs(struct bch_fs *c,
        int ret = 0, dev, idx;
 
        bch2_trans_init(&trans, c);
+       bch2_trans_preload_iters(&trans);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   bkey_start_pos(pos),
index a975f8f72da40cce4ac8abdf58ea0328f7ceb729..2ebde20c74f8b32fa03cffce4f3a81915470462a 100644 (file)
@@ -903,15 +903,54 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
        bch2_btree_iter_verify(iter, l->b);
 }
 
+static unsigned bch2_bkey_nr_alloc_ptrs(struct bkey_s_c k)
+{
+       struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
+       const union bch_extent_entry *entry;
+       unsigned ret = 0;
+
+       bkey_extent_entry_for_each(ptrs, entry) {
+               switch (__extent_entry_type(entry)) {
+               case BCH_EXTENT_ENTRY_ptr:
+               case BCH_EXTENT_ENTRY_stripe_ptr:
+                       ret++;
+               }
+       }
+
+       return ret;
+}
+
 static inline struct bpos
-bch2_extent_atomic_end(struct bkey_i *k, struct btree_iter *iter)
+bch2_extent_atomic_end(struct bkey_i *insert, struct btree_iter *iter)
 {
        struct btree *b = iter->l[0].b;
+       struct btree_node_iter  node_iter = iter->l[0].iter;
+       struct bkey_packed      *_k;
+       unsigned                nr_alloc_ptrs =
+               bch2_bkey_nr_alloc_ptrs(bkey_i_to_s_c(insert));
 
        BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-       BUG_ON(bkey_cmp(bkey_start_pos(&k->k), b->data->min_key) < 0);
+       BUG_ON(bkey_cmp(bkey_start_pos(&insert->k), b->data->min_key) < 0);
+
+       while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
+                                                     KEY_TYPE_discard))) {
+               struct bkey     unpacked;
+               struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
+
+               if (bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0)
+                       break;
+
+               nr_alloc_ptrs += bch2_bkey_nr_alloc_ptrs(k);
+
+               if (nr_alloc_ptrs > 20) {
+                       BUG_ON(bkey_cmp(k.k->p, bkey_start_pos(&insert->k)) <= 0);
+                       return bpos_min(insert->k.p, k.k->p);
+               }
+
+               bch2_btree_node_iter_advance(&node_iter, b);
+       }
 
-       return bpos_min(k->k.p, b->key.k.p);
+       return bpos_min(insert->k.p, b->key.k.p);
 }
 
 void bch2_extent_trim_atomic(struct bkey_i *k, struct btree_iter *iter)
index 98202fbabfafb4f69a23fa833c1d40e166296593..f9e6c9d9ef046ea564f0bb35cbf972d4dd155642 100644 (file)
@@ -43,6 +43,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
        int ret = 0;
 
        bch2_trans_init(&trans, c);
+       bch2_trans_preload_iters(&trans);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   POS_MIN, BTREE_ITER_PREFETCH);
@@ -96,6 +97,8 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
                        break;
        }
 
+       BUG_ON(ret == -EINTR);
+
        bch2_trans_exit(&trans);
 
        bch2_replicas_gc_end(c, ret);
index 1ad585ee27ca117cd9855332a9803c1112d20e00..ff426a2c8e7abff8c3b4df7e94f44b6f61535429 100644 (file)
@@ -62,6 +62,7 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
        int ret = 0;
 
        bch2_trans_init(&trans, c);
+       bch2_trans_preload_iters(&trans);
 
        iter = bch2_trans_get_iter(&trans, BTREE_ID_EXTENTS,
                                   bkey_start_pos(&bch2_keylist_front(keys)->k),
@@ -184,6 +185,7 @@ nomatch:
        }
 out:
        bch2_trans_exit(&trans);
+       BUG_ON(ret == -EINTR);
        return ret;
 }
 
index a3f07565efb0444e2715ff49927650086b0697f6..a80de5d814d69dc1e1a231c091b68f4eeb54a19e 100644 (file)
@@ -212,11 +212,6 @@ static int bch2_extent_replay_key(struct bch_fs *c, struct bkey_i *k)
                bch2_disk_reservation_init(c, 0);
        struct bkey_i *split;
        bool split_compressed = false;
-       unsigned flags = BTREE_INSERT_ATOMIC|
-               BTREE_INSERT_NOFAIL|
-               BTREE_INSERT_LAZY_RW|
-               BTREE_INSERT_JOURNAL_REPLAY|
-               BTREE_INSERT_NOMARK;
        int ret;
 
        bch2_trans_init(&trans, c);
@@ -252,9 +247,6 @@ retry:
                                        BCH_DISK_RESERVATION_NOFAIL);
                        BUG_ON(ret);
 
-                       flags &= ~BTREE_INSERT_JOURNAL_REPLAY;
-                       flags &= ~BTREE_INSERT_NOMARK;
-                       flags |=  BTREE_INSERT_NOMARK_OVERWRITES;
                        split_compressed = true;
                }
 
@@ -266,24 +258,31 @@ retry:
                bch2_btree_iter_set_pos(iter, split->k.p);
        } while (bkey_cmp(iter->pos, k->k.p) < 0);
 
-       ret = bch2_trans_commit(&trans, &disk_res, NULL, flags);
-       if (ret)
-               goto err;
-
        if (split_compressed) {
-               /*
-                * This isn't strictly correct - we should only be relying on
-                * the btree node lock for synchronization with gc when we've
-                * got a write lock held.
-                *
-                * but - there are other correctness issues if btree gc were to
-                * run before journal replay finishes
-                */
-               BUG_ON(c->gc_pos.phase);
-
-               bch2_mark_key(c, bkey_i_to_s_c(k), false, -((s64) k->k.size),
-                             NULL, 0, 0);
+               memset(&trans.fs_usage_deltas.fs_usage, 0,
+                      sizeof(trans.fs_usage_deltas.fs_usage));
+               trans.fs_usage_deltas.top = trans.fs_usage_deltas.d;
+
+               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k), false,
+                                         -((s64) k->k.size),
+                                         &trans.fs_usage_deltas) ?:
+                     bch2_trans_commit(&trans, &disk_res, NULL,
+                                       BTREE_INSERT_ATOMIC|
+                                       BTREE_INSERT_NOFAIL|
+                                       BTREE_INSERT_LAZY_RW|
+                                       BTREE_INSERT_NOMARK_OVERWRITES|
+                                       BTREE_INSERT_NO_CLEAR_REPLICAS);
+       } else {
+               ret = bch2_trans_commit(&trans, &disk_res, NULL,
+                                       BTREE_INSERT_ATOMIC|
+                                       BTREE_INSERT_NOFAIL|
+                                       BTREE_INSERT_LAZY_RW|
+                                       BTREE_INSERT_JOURNAL_REPLAY|
+                                       BTREE_INSERT_NOMARK);
        }
+
+       if (ret)
+               goto err;
 err:
        if (ret == -EINTR)
                goto retry;
@@ -527,7 +526,7 @@ static int verify_superblock_clean(struct bch_fs *c,
        struct bch_sb_field_clean *clean = *cleanp;
        int ret = 0;
 
-       if (!clean || !j)
+       if (!c->sb.clean || !j)
                return 0;
 
        if (mustfix_fsck_err_on(j->seq != clean->journal_seq, c,
@@ -653,6 +652,7 @@ int bch2_fs_recovery(struct bch_fs *c)
        u64 journal_seq;
        LIST_HEAD(journal_entries);
        struct journal_keys journal_keys = { NULL };
+       bool wrote = false, write_sb = false;
        int ret;
 
        if (c->sb.clean)
@@ -677,8 +677,12 @@ int bch2_fs_recovery(struct bch_fs *c)
                if (ret)
                        goto err;
 
-               fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
-                           "filesystem marked clean but journal not empty");
+               if (mustfix_fsck_err_on(c->sb.clean && !journal_empty(&journal_entries), c,
+                               "filesystem marked clean but journal not empty")) {
+                       c->sb.compat &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+                       SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
+                       c->sb.clean = false;
+               }
 
                if (!c->sb.clean && list_empty(&journal_entries)) {
                        bch_err(c, "no journal entries found");
@@ -736,12 +740,15 @@ int bch2_fs_recovery(struct bch_fs *c)
        if (ret)
                goto err;
 
+       bch_verbose(c, "starting alloc read");
        err = "error reading allocation information";
        ret = bch2_alloc_read(c, &journal_keys);
        if (ret)
                goto err;
+       bch_verbose(c, "alloc read done");
 
        bch_verbose(c, "starting stripes_read");
+       err = "error reading stripes";
        ret = bch2_stripes_read(c, &journal_keys);
        if (ret)
                goto err;
@@ -749,11 +756,26 @@ int bch2_fs_recovery(struct bch_fs *c)
 
        set_bit(BCH_FS_ALLOC_READ_DONE, &c->flags);
 
+       if ((c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) &&
+           !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA))) {
+               /*
+                * interior btree node updates aren't consistent with the
+                * journal; after an unclean shutdown we have to walk all
+                * pointers to metadata:
+                */
+               bch_verbose(c, "starting metadata mark and sweep:");
+               err = "error in mark and sweep";
+               ret = bch2_gc(c, NULL, true, true);
+               if (ret)
+                       goto err;
+               bch_verbose(c, "mark and sweep done");
+       }
+
        if (c->opts.fsck ||
            !(c->sb.compat & (1ULL << BCH_COMPAT_FEAT_ALLOC_INFO)) ||
            test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags)) {
                bch_verbose(c, "starting mark and sweep:");
-               err = "error in recovery";
+               err = "error in mark and sweep";
                ret = bch2_gc(c, &journal_keys, true, false);
                if (ret)
                        goto err;
@@ -780,6 +802,16 @@ int bch2_fs_recovery(struct bch_fs *c)
                goto err;
        bch_verbose(c, "journal replay done");
 
+       bch_verbose(c, "writing allocation info:");
+       err = "error writing out alloc info";
+       ret = bch2_stripes_write(c, BTREE_INSERT_LAZY_RW, &wrote) ?:
+               bch2_alloc_write(c, BTREE_INSERT_LAZY_RW, &wrote);
+       if (ret) {
+               bch_err(c, "error writing alloc info");
+               goto err;
+       }
+       bch_verbose(c, "alloc write done");
+
        if (c->opts.norecovery)
                goto out;
 
@@ -802,13 +834,23 @@ int bch2_fs_recovery(struct bch_fs *c)
                        c->disk_sb.sb->version_min =
                                le16_to_cpu(bcachefs_metadata_version_min);
                c->disk_sb.sb->version = le16_to_cpu(bcachefs_metadata_version_current);
+               write_sb = true;
+       }
+
+       if (!test_bit(BCH_FS_ERROR, &c->flags)) {
+               c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+               write_sb = true;
        }
 
        if (c->opts.fsck &&
            !test_bit(BCH_FS_ERROR, &c->flags)) {
                c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_ATOMIC_NLINK;
                SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
+               write_sb = true;
        }
+
+       if (write_sb)
+               bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 
        if (c->journal_seq_blacklist_table &&
@@ -821,7 +863,7 @@ out:
        return ret;
 err:
 fsck_err:
-       pr_err("Error in recovery: %s (%i)", err, ret);
+       bch_err(c, "Error in recovery: %s (%i)", err, ret);
        goto out;
 }
 
index cf13a628682f3e536d283f0ba5ac6184343d6b66..2482dbbbad38fb49cabd0e4f7f0f615f04326d84 100644 (file)
@@ -102,8 +102,8 @@ static void stripe_to_replicas(struct bkey_s_c k,
                r->devs[r->nr_devs++] = ptr->dev;
 }
 
-static void bkey_to_replicas(struct bch_replicas_entry *e,
-                            struct bkey_s_c k)
+void bch2_bkey_to_replicas(struct bch_replicas_entry *e,
+                          struct bkey_s_c k)
 {
        e->nr_devs = 0;
 
@@ -439,7 +439,7 @@ bool bch2_bkey_replicas_marked_locked(struct bch_fs *c,
                        return false;
        }
 
-       bkey_to_replicas(&search.e, k);
+       bch2_bkey_to_replicas(&search.e, k);
 
        return bch2_replicas_marked_locked(c, &search.e, check_gc_replicas);
 }
@@ -472,7 +472,7 @@ int bch2_mark_bkey_replicas(struct bch_fs *c, struct bkey_s_c k)
                        return ret;
        }
 
-       bkey_to_replicas(&search.e, k);
+       bch2_bkey_to_replicas(&search.e, k);
 
        return bch2_mark_replicas(c, &search.e);
 }
index 0777e7056d55dbc3f14e9eba003d940542274c5b..1ceedb6231fd26f7847cc73a245d9e3b69e2df48 100644 (file)
@@ -28,6 +28,7 @@ int bch2_mark_replicas(struct bch_fs *,
 
 bool bch2_bkey_replicas_marked_locked(struct bch_fs *,
                                      struct bkey_s_c, bool);
+void bch2_bkey_to_replicas(struct bch_replicas_entry *, struct bkey_s_c);
 bool bch2_bkey_replicas_marked(struct bch_fs *,
                               struct bkey_s_c, bool);
 int bch2_mark_bkey_replicas(struct bch_fs *, struct bkey_s_c);
index 6e69a4f74ca0db485b733f30eab98b164093699d..b991238c5bd23fa5c94fe3589f4177218b7c97e0 100644 (file)
@@ -946,7 +946,7 @@ int bch2_fs_mark_dirty(struct bch_fs *c)
 
        mutex_lock(&c->sb_lock);
        SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
-       c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_INFO);
+       c->disk_sb.sb->compat[0] &= ~(1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA);
        ret = bch2_write_super(c);
        mutex_unlock(&c->sb_lock);
 
@@ -1063,6 +1063,7 @@ void bch2_fs_mark_clean(struct bch_fs *c)
        SET_BCH_SB_CLEAN(c->disk_sb.sb, true);
 
        c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_INFO;
+       c->disk_sb.sb->compat[0] |= 1ULL << BCH_COMPAT_FEAT_ALLOC_METADATA;
 
        u64s = sizeof(*sb_clean) / sizeof(u64) + c->journal.entry_u64s_reserved;