bcachefs: Update transactional triggers interface to pass old & new keys
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 10 Dec 2020 18:13:56 +0000 (13:13 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:49 +0000 (17:08 -0400)
This is needed to fix a bug where we're overflowing iterators within a
btree transaction, because we're updating the stripes btree (to update
block counts) and the stripes btree trigger is unnecessarily updating
the alloc btree - it doesn't need to update the alloc btree when the
pointers within a stripe aren't changing.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_update_interior.c
fs/bcachefs/buckets.c
fs/bcachefs/buckets.h
fs/bcachefs/recovery.c

index 594bcd7975169f581043d4375f55af92a10af6f9..3ae920a223f9979e0c628be9130d5c5f652f1dac 100644 (file)
@@ -519,14 +519,18 @@ static int btree_update_nodes_written_trans(struct btree_trans *trans,
        trans->journal_pin = &as->journal;
 
        for_each_keylist_key(&as->new_keys, k) {
-               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(trans,
+                                         bkey_s_c_null,
+                                         bkey_i_to_s_c(k),
                                          0, 0, BTREE_TRIGGER_INSERT);
                if (ret)
                        return ret;
        }
 
        for_each_keylist_key(&as->old_keys, k) {
-               ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(trans,
+                                         bkey_i_to_s_c(k),
+                                         bkey_s_c_null,
                                          0, 0, BTREE_TRIGGER_OVERWRITE);
                if (ret)
                        return ret;
index 4762c5465ef05da627c47351762bf003ffcb958c..44d08434855dc0365cd02b121bf978916ba2673a 100644 (file)
@@ -1338,10 +1338,8 @@ static int bch2_mark_key_locked(struct bch_fs *c,
                ret = bch2_mark_stripe(c, old, new, fs_usage, journal_seq, flags);
                break;
        case KEY_TYPE_inode:
-               if (!(flags & BTREE_TRIGGER_OVERWRITE))
-                       fs_usage->nr_inodes++;
-               else
-                       fs_usage->nr_inodes--;
+               fs_usage->nr_inodes += new.k->type == KEY_TYPE_inode;
+               fs_usage->nr_inodes -= old.k->type == KEY_TYPE_inode;
                break;
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
@@ -1405,10 +1403,10 @@ int bch2_mark_update(struct btree_trans *trans,
        old = (struct bkey_s_c) { &unpacked, NULL };
 
        if (!btree_node_type_is_extents(iter->btree_id)) {
+               /* iterators should be uptodate, shouldn't get errors here: */
                if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
-                       _old = bch2_btree_node_iter_peek(&node_iter, b);
-                       if (_old)
-                               old = bkey_disassemble(b, _old, &unpacked);
+                       old = bch2_btree_iter_peek_slot(iter);
+                       BUG_ON(bkey_err(old));
                } else {
                        struct bkey_cached *ck = (void *) iter->l[0].b;
 
@@ -1753,59 +1751,92 @@ static int bch2_trans_mark_extent(struct btree_trans *trans,
        return 0;
 }
 
+static int bch2_trans_mark_stripe_alloc_ref(struct btree_trans *trans,
+                                           const struct bch_extent_ptr *ptr,
+                                           s64 sectors, bool parity)
+{
+       struct bkey_i_alloc *a;
+       struct btree_iter *iter;
+       struct bkey_alloc_unpacked u;
+       int ret;
+
+       ret = bch2_trans_start_alloc_update(trans, &iter, ptr, &u);
+       if (ret)
+               return ret;
+
+       if (parity) {
+               u.dirty_sectors += sectors;
+               u.data_type = u.dirty_sectors
+                       ? BCH_DATA_parity
+                       : 0;
+       }
+
+       a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
+       ret = PTR_ERR_OR_ZERO(a);
+       if (ret)
+               goto err;
+
+       bkey_alloc_init(&a->k_i);
+       a->k.p = iter->pos;
+       bch2_alloc_pack(a, u);
+       bch2_trans_update(trans, iter, &a->k_i, 0);
+err:
+       bch2_trans_iter_put(trans, iter);
+       return ret;
+}
+
 static int bch2_trans_mark_stripe(struct btree_trans *trans,
-                                 struct bkey_s_c k,
+                                 struct bkey_s_c old, struct bkey_s_c new,
                                  unsigned flags)
 {
-       const struct bch_stripe *s = bkey_s_c_to_stripe(k).v;
-       unsigned nr_data = s->nr_blocks - s->nr_redundant;
+       const struct bch_stripe *old_s = old.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(old).v : NULL;
+       const struct bch_stripe *new_s = new.k->type == KEY_TYPE_stripe
+               ? bkey_s_c_to_stripe(new).v : NULL;
        struct bch_replicas_padded r;
-       struct bkey_alloc_unpacked u;
-       struct bkey_i_alloc *a;
-       struct btree_iter *iter;
-       bool deleting = flags & BTREE_TRIGGER_OVERWRITE;
-       s64 sectors = le16_to_cpu(s->sectors);
        unsigned i;
        int ret = 0;
 
-       if (deleting)
-               sectors = -sectors;
-
-       bch2_bkey_to_replicas(&r.e, k);
-       update_replicas_list(trans, &r.e, sectors * s->nr_redundant);
-
        /*
-        * The allocator code doesn't necessarily update bucket gens in the
-        * btree when incrementing them, right before handing out new buckets -
-        * we just need to persist those updates here along with the new stripe:
+        * If the pointers aren't changing, we don't need to do anything:
         */
+       if (new_s && old_s &&
+           !memcmp(old_s->ptrs, new_s->ptrs,
+                   new_s->nr_blocks * sizeof(struct bch_extent_ptr)))
+               return 0;
 
-       for (i = 0; i < s->nr_blocks && !ret; i++) {
-               bool parity = i >= nr_data;
+       if (new_s) {
+               unsigned nr_data = new_s->nr_blocks - new_s->nr_redundant;
+               s64 sectors = le16_to_cpu(new_s->sectors);
 
-               ret = bch2_trans_start_alloc_update(trans, &iter,
-                                                   &s->ptrs[i], &u);
-               if (ret)
-                       break;
+               bch2_bkey_to_replicas(&r.e, new);
+               update_replicas_list(trans, &r.e, sectors * new_s->nr_redundant);
 
-               if (parity) {
-                       u.dirty_sectors += sectors;
-                       u.data_type = u.dirty_sectors
-                               ? BCH_DATA_parity
-                               : 0;
+               for (i = 0; i < new_s->nr_blocks; i++) {
+                       bool parity = i >= nr_data;
+
+                       ret = bch2_trans_mark_stripe_alloc_ref(trans,
+                                       &new_s->ptrs[i], sectors, parity);
+                       if (ret)
+                               return ret;
                }
+       }
 
-               a = bch2_trans_kmalloc(trans, BKEY_ALLOC_U64s_MAX * 8);
-               ret = PTR_ERR_OR_ZERO(a);
-               if (ret)
-                       goto put_iter;
-
-               bkey_alloc_init(&a->k_i);
-               a->k.p = iter->pos;
-               bch2_alloc_pack(a, u);
-               bch2_trans_update(trans, iter, &a->k_i, 0);
-put_iter:
-               bch2_trans_iter_put(trans, iter);
+       if (old_s) {
+               unsigned nr_data = old_s->nr_blocks - old_s->nr_redundant;
+               s64 sectors = -((s64) le16_to_cpu(old_s->sectors));
+
+               bch2_bkey_to_replicas(&r.e, old);
+               update_replicas_list(trans, &r.e, sectors * old_s->nr_redundant);
+
+               for (i = 0; i < old_s->nr_blocks; i++) {
+                       bool parity = i >= nr_data;
+
+                       ret = bch2_trans_mark_stripe_alloc_ref(trans,
+                                       &old_s->ptrs[i], sectors, parity);
+                       if (ret)
+                               return ret;
+               }
        }
 
        return ret;
@@ -1904,11 +1935,16 @@ static int bch2_trans_mark_reflink_p(struct btree_trans *trans,
        return ret;
 }
 
-int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
+int bch2_trans_mark_key(struct btree_trans *trans,
+                       struct bkey_s_c old,
+                       struct bkey_s_c new,
                        unsigned offset, s64 sectors, unsigned flags)
 {
-       struct replicas_delta_list *d;
        struct bch_fs *c = trans->c;
+       struct bkey_s_c k = flags & BTREE_TRIGGER_INSERT ? new : old;
+       struct replicas_delta_list *d;
+
+       BUG_ON(!(flags & (BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE)));
 
        switch (k.k->type) {
        case KEY_TYPE_btree_ptr:
@@ -1924,15 +1960,18 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
                return bch2_trans_mark_extent(trans, k, offset, sectors,
                                              flags, BCH_DATA_user);
        case KEY_TYPE_stripe:
-               return bch2_trans_mark_stripe(trans, k, flags);
-       case KEY_TYPE_inode:
-               d = replicas_deltas_realloc(trans, 0);
+               return bch2_trans_mark_stripe(trans, old, new, flags);
+       case KEY_TYPE_inode: {
+               int nr = (new.k->type == KEY_TYPE_inode) -
+                        (old.k->type == KEY_TYPE_inode);
+
+               if (nr) {
+                       d = replicas_deltas_realloc(trans, 0);
+                       d->nr_inodes += nr;
+               }
 
-               if (!(flags & BTREE_TRIGGER_OVERWRITE))
-                       d->nr_inodes++;
-               else
-                       d->nr_inodes--;
                return 0;
+       }
        case KEY_TYPE_reservation: {
                unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
 
@@ -1956,12 +1995,10 @@ int bch2_trans_mark_key(struct btree_trans *trans, struct bkey_s_c k,
 
 int bch2_trans_mark_update(struct btree_trans *trans,
                           struct btree_iter *iter,
-                          struct bkey_i *insert,
+                          struct bkey_i *new,
                           unsigned flags)
 {
-       struct btree            *b = iter_l(iter)->b;
-       struct btree_node_iter  node_iter = iter_l(iter)->iter;
-       struct bkey_packed      *_k;
+       struct bkey_s_c old;
        int ret;
 
        if (unlikely(flags & BTREE_TRIGGER_NORUN))
@@ -1970,68 +2007,93 @@ int bch2_trans_mark_update(struct btree_trans *trans,
        if (!btree_node_type_needs_gc(iter->btree_id))
                return 0;
 
-       ret = bch2_trans_mark_key(trans, bkey_i_to_s_c(insert),
-                       0, insert->k.size, BTREE_TRIGGER_INSERT);
-       if (ret)
-               return ret;
-
-       if (btree_iter_type(iter) == BTREE_ITER_CACHED) {
-               struct bkey_cached *ck = (void *) iter->l[0].b;
+       if (!btree_node_type_is_extents(iter->btree_id)) {
+               /* iterators should be uptodate, shouldn't get errors here: */
+               if (btree_iter_type(iter) != BTREE_ITER_CACHED) {
+                       old = bch2_btree_iter_peek_slot(iter);
+                       BUG_ON(bkey_err(old));
+               } else {
+                       struct bkey_cached *ck = (void *) iter->l[0].b;
 
-               return bch2_trans_mark_key(trans, bkey_i_to_s_c(ck->k),
-                                          0, 0, BTREE_TRIGGER_OVERWRITE);
-       }
+                       BUG_ON(!ck->valid);
+                       old = bkey_i_to_s_c(ck->k);
+               }
 
-       while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
+               if (old.k->type == new->k.type) {
+                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_INSERT|BTREE_TRIGGER_OVERWRITE|flags);
+               } else {
+                       ret   = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_INSERT|flags) ?:
+                               bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new), 0, 0,
+                                       BTREE_TRIGGER_OVERWRITE|flags);
+               }
+       } else {
+               struct btree            *b = iter_l(iter)->b;
+               struct btree_node_iter  node_iter = iter_l(iter)->iter;
+               struct bkey_packed      *_old;
                struct bkey             unpacked;
-               struct bkey_s_c         k;
-               unsigned                offset = 0;
-               s64                     sectors = 0;
-               unsigned                flags = BTREE_TRIGGER_OVERWRITE;
 
-               k = bkey_disassemble(b, _k, &unpacked);
+               EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
 
-               if (btree_node_is_extents(b)
-                   ? bkey_cmp(insert->k.p, bkey_start_pos(k.k)) <= 0
-                   : bkey_cmp(insert->k.p, k.k->p))
-                       break;
+               bkey_init(&unpacked);
+               old = (struct bkey_s_c) { &unpacked, NULL };
+
+               ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
+                                         0, new->k.size,
+                                         BTREE_TRIGGER_INSERT);
+               if (ret)
+                       return ret;
+
+               while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
+                       unsigned flags = BTREE_TRIGGER_OVERWRITE;
+                       unsigned offset = 0;
+                       s64 sectors;
+
+                       old = bkey_disassemble(b, _old, &unpacked);
+                       sectors = -((s64) old.k->size);
+
+                       flags |= BTREE_TRIGGER_OVERWRITE;
+
+                       if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
+                               return 0;
 
-               if (btree_node_is_extents(b)) {
-                       switch (bch2_extent_overlap(&insert->k, k.k)) {
+                       switch (bch2_extent_overlap(&new->k, old.k)) {
                        case BCH_EXTENT_OVERLAP_ALL:
                                offset = 0;
-                               sectors = -((s64) k.k->size);
+                               sectors = -((s64) old.k->size);
                                break;
                        case BCH_EXTENT_OVERLAP_BACK:
-                               offset = bkey_start_offset(&insert->k) -
-                                       bkey_start_offset(k.k);
-                               sectors = bkey_start_offset(&insert->k) -
-                                       k.k->p.offset;
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = bkey_start_offset(&new->k) -
+                                       old.k->p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_FRONT:
                                offset = 0;
-                               sectors = bkey_start_offset(k.k) -
-                                       insert->k.p.offset;
+                               sectors = bkey_start_offset(old.k) -
+                                       new->k.p.offset;
                                break;
                        case BCH_EXTENT_OVERLAP_MIDDLE:
-                               offset = bkey_start_offset(&insert->k) -
-                                       bkey_start_offset(k.k);
-                               sectors = -((s64) insert->k.size);
+                               offset = bkey_start_offset(&new->k) -
+                                       bkey_start_offset(old.k);
+                               sectors = -((s64) new->k.size);
                                flags |= BTREE_TRIGGER_OVERWRITE_SPLIT;
                                break;
                        }
 
                        BUG_ON(sectors >= 0);
-               }
 
-               ret = bch2_trans_mark_key(trans, k, offset, sectors, flags);
-               if (ret)
-                       return ret;
+                       ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
+                                       offset, sectors, flags);
+                       if (ret)
+                               return ret;
 
-               bch2_btree_node_iter_advance(&node_iter, b);
+                       bch2_btree_node_iter_advance(&node_iter, b);
+               }
        }
 
-       return 0;
+       return ret;
 }
 
 /* Disk reservations: */
index c85015071c6d8c0594e8a49c1b15bd86fbf07717..7ee63413f83c018fbf9bbcbf4af87ee4a46010a9 100644 (file)
@@ -270,7 +270,7 @@ int bch2_mark_update(struct btree_trans *, struct btree_iter *,
 int bch2_replicas_delta_list_apply(struct bch_fs *,
                                   struct bch_fs_usage *,
                                   struct replicas_delta_list *);
-int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c,
+int bch2_trans_mark_key(struct btree_trans *, struct bkey_s_c, struct bkey_s_c,
                        unsigned, s64, unsigned);
 int bch2_trans_mark_update(struct btree_trans *, struct btree_iter *iter,
                           struct bkey_i *insert, unsigned);
index ecd51d45743a3705da4f1a6261b2469876f73add..1883a1faf380c9d69bfd0bd1d720a1c127a94bb3 100644 (file)
@@ -458,7 +458,9 @@ retry:
                bch2_btree_iter_set_pos(iter, split->k.p);
 
                if (remark) {
-                       ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(split),
+                       ret = bch2_trans_mark_key(&trans,
+                                                 bkey_s_c_null,
+                                                 bkey_i_to_s_c(split),
                                                  0, split->k.size,
                                                  BTREE_TRIGGER_INSERT);
                        if (ret)
@@ -467,7 +469,9 @@ retry:
        } while (bkey_cmp(iter->pos, k->k.p) < 0);
 
        if (remark) {
-               ret = bch2_trans_mark_key(&trans, bkey_i_to_s_c(k),
+               ret = bch2_trans_mark_key(&trans,
+                                         bkey_i_to_s_c(k),
+                                         bkey_s_c_null,
                                          0, -((s64) k->k.size),
                                          BTREE_TRIGGER_OVERWRITE);
                if (ret)