bcachefs: BTREE_INSERT_JOURNAL_RES_FULL is no longer possible
authorKent Overstreet <kent.overstreet@gmail.com>
Sun, 5 Aug 2018 19:21:52 +0000 (15:21 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:09 +0000 (17:08 -0400)
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/bset.c
fs/bcachefs/bset.h
fs/bcachefs/btree_types.h
fs/bcachefs/btree_update_interior.h
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/extents.c

index 27fa3e230e6eaaf086e0ec180350fa307f1567cf..b95cfe7ece9a1ca1edd0ebbbd0d34d3cb8d2aede 100644 (file)
@@ -255,13 +255,6 @@ void bch2_verify_insert_pos(struct btree *b, struct bkey_packed *where,
 #endif
 }
 
-void bch2_verify_key_order(struct btree *b,
-                          struct btree_node_iter *_iter,
-                          struct bkey_packed *where)
-{
-       bch2_verify_insert_pos(b, where, where, where->u64s);
-}
-
 #else
 
 static inline void bch2_btree_node_iter_next_check(struct btree_node_iter *iter,
index 0787030ccc7e7f9ae870e38d75eeb09d6d157e17..66a8da2192edf4fcd94288055a028643c6334eb7 100644 (file)
@@ -632,8 +632,6 @@ void __bch2_verify_btree_nr_keys(struct btree *);
 void bch2_btree_node_iter_verify(struct btree_node_iter *, struct btree *);
 void bch2_verify_insert_pos(struct btree *, struct bkey_packed *,
                            struct bkey_packed *, unsigned);
-void bch2_verify_key_order(struct btree *, struct btree_node_iter *,
-                         struct bkey_packed *);
 
 #else
 
@@ -644,9 +642,6 @@ static inline void bch2_verify_insert_pos(struct btree *b,
                                          struct bkey_packed *where,
                                          struct bkey_packed *insert,
                                          unsigned clobber_u64s) {}
-static inline void bch2_verify_key_order(struct btree *b,
-                                       struct btree_node_iter *iter,
-                                       struct bkey_packed *where) {}
 #endif
 
 static inline void bch2_verify_btree_nr_keys(struct btree *b)
index 14d8c75a4e8d1097a42d0164c6b373d082c59c31..03c319611d72366c273e86de5bf2f3a58b02f115 100644 (file)
@@ -463,7 +463,6 @@ enum btree_insert_ret {
        /* write lock held for too long */
        /* leaf node needs to be split */
        BTREE_INSERT_BTREE_NODE_FULL,
-       BTREE_INSERT_JOURNAL_RES_FULL,
        BTREE_INSERT_ENOSPC,
        BTREE_INSERT_NEED_GC_LOCK,
 };
index 711fbe63eb3a3b636f47d69a12dedb27c64d8d37..4125cddded614cb18e00d7f4d47e6889b3d5a16a 100644 (file)
@@ -336,40 +336,14 @@ static inline void reserve_whiteout(struct btree *b, struct bkey_packed *k)
  * insert into could be written out from under us)
  */
 static inline bool bch2_btree_node_insert_fits(struct bch_fs *c,
-                                             struct btree *b, unsigned u64s)
+                                              struct btree *b, unsigned u64s)
 {
        if (unlikely(btree_node_fake(b)))
                return false;
 
-       if (btree_node_is_extents(b)) {
-               /* The insert key might split an existing key
-                * (bch2_insert_fixup_extent() -> BCH_EXTENT_OVERLAP_MIDDLE case:
-                */
-               u64s += BKEY_EXTENT_U64s_MAX;
-       }
-
        return u64s <= bch_btree_keys_u64s_remaining(c, b);
 }
 
-static inline bool journal_res_insert_fits(struct btree_insert *trans,
-                                          struct btree_insert_entry *insert)
-{
-       unsigned u64s = 0;
-       struct btree_insert_entry *i;
-
-       /*
-        * If we didn't get a journal reservation, we're in journal replay and
-        * we're not journalling updates:
-        */
-       if (!trans->journal_res.ref)
-               return true;
-
-       for (i = insert; i < trans->entries + trans->nr; i++)
-               u64s += jset_u64s(i->k->k.u64s + i->extra_res);
-
-       return u64s <= trans->journal_res.u64s;
-}
-
 ssize_t bch2_btree_updates_print(struct bch_fs *, char *);
 
 size_t bch2_btree_interior_updates_nr_pending(struct bch_fs *);
index 0ef519e8feed9cb087508b2425d95d8f4a3c964d..598d7a10779254c9d6a280f10c116c90857e12bf 100644 (file)
@@ -408,7 +408,6 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
                case BTREE_INSERT_OK:
                        i->done = true;
                        break;
-               case BTREE_INSERT_JOURNAL_RES_FULL:
                case BTREE_INSERT_NEED_TRAVERSE:
                        ret = -EINTR;
                        break;
index 6cc6961047cf3e8842c244663ebbb004293c0e27..b2f50e16278447f0d7b1da5e4c7f8b1bcb9c2684 100644 (file)
@@ -1060,7 +1060,8 @@ struct extent_insert_state {
 
        /* for deleting: */
        struct bkey_i                   whiteout;
-       bool                            do_journal;
+       bool                            update_journal;
+       bool                            update_btree;
        bool                            deleting;
 };
 
@@ -1117,28 +1118,6 @@ static bool bch2_extent_merge_inline(struct bch_fs *,
                                     struct bkey_packed *,
                                     bool);
 
-static enum btree_insert_ret
-extent_insert_should_stop(struct extent_insert_state *s)
-{
-       struct btree *b = s->insert->iter->l[0].b;
-
-       /*
-        * Check if we have sufficient space in both the btree node and the
-        * journal reservation:
-        *
-        * Each insert checks for room in the journal entry, but we check for
-        * room in the btree node up-front. In the worst case, bkey_cmpxchg()
-        * will insert two keys, and one iteration of this room will insert one
-        * key, so we need room for three keys.
-        */
-       if (!bch2_btree_node_insert_fits(s->trans->c, b, s->insert->k->k.u64s))
-               return BTREE_INSERT_BTREE_NODE_FULL;
-       else if (!journal_res_insert_fits(s->trans, s->insert))
-               return BTREE_INSERT_JOURNAL_RES_FULL; /* XXX worth tracing */
-       else
-               return BTREE_INSERT_OK;
-}
-
 static void verify_extent_nonoverlapping(struct btree *b,
                                         struct btree_node_iter *_iter,
                                         struct bkey_i *insert)
@@ -1193,55 +1172,30 @@ static void extent_bset_insert(struct bch_fs *c, struct btree_iter *iter,
 {
        struct btree_iter_level *l = &iter->l[0];
        struct bset_tree *t = bset_tree_last(l->b);
-       struct bkey_packed *where =
-               bch2_btree_node_iter_bset_pos(&l->iter, l->b, t);
-       struct bkey_packed *prev = bch2_bkey_prev_filter(l->b, t, where,
-                                                        KEY_TYPE_DISCARD);
-       struct bkey_packed *next_live_key = where;
-       unsigned clobber_u64s;
+       struct btree_node_iter node_iter;
+       struct bkey_packed *k;
+
+       BUG_ON(insert->k.u64s > bch_btree_keys_u64s_remaining(c, l->b));
 
        EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
        verify_extent_nonoverlapping(l->b, &l->iter, insert);
 
-       if (!prev) {
-               while ((prev = bch2_bkey_prev_all(l->b, t, where)) &&
-                      (bkey_cmp_left_packed(l->b, prev, &insert->k.p) ?:
-                       ((int) bkey_deleted(&insert->k) - (int) bkey_deleted(prev))) > 0)
-                       where = prev;
-       }
-
-       if (prev)
-               where = bkey_next(prev);
-
-       while (next_live_key != btree_bkey_last(l->b, t) &&
-              bkey_deleted(next_live_key))
-               next_live_key = bkey_next(next_live_key);
-
-       /*
-        * Everything between where and next_live_key is now deleted keys, and
-        * is overwritten:
-        */
-       clobber_u64s = (u64 *) next_live_key - (u64 *) where;
+       node_iter = l->iter;
+       k = bch2_btree_node_iter_prev_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+       if (k && !bkey_written(l->b, k) &&
+           bch2_extent_merge_inline(c, iter, k, bkey_to_packed(insert), true))
+               return;
 
-       if (prev &&
-           bch2_extent_merge_inline(c, iter, prev, bkey_to_packed(insert), true))
-               goto drop_deleted_keys;
+       node_iter = l->iter;
+       k = bch2_btree_node_iter_peek_filter(&node_iter, l->b, KEY_TYPE_DISCARD);
+       if (k && !bkey_written(l->b, k) &&
+           bch2_extent_merge_inline(c, iter, bkey_to_packed(insert), k, false))
+               return;
 
-       if (next_live_key != btree_bkey_last(l->b, t) &&
-           bch2_extent_merge_inline(c, iter, bkey_to_packed(insert),
-                                   next_live_key, false))
-               goto drop_deleted_keys;
+       k = bch2_btree_node_iter_bset_pos(&l->iter, l->b, t);
 
-       bch2_bset_insert(l->b, &l->iter, where, insert, clobber_u64s);
-       bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, where,
-                                clobber_u64s, where->u64s);
-       bch2_verify_key_order(l->b, &l->iter, where);
-       bch2_btree_iter_verify(iter, l->b);
-       return;
-drop_deleted_keys:
-       bch2_bset_delete(l->b, where, clobber_u64s);
-       bch2_btree_node_iter_fix(iter, l->b, &l->iter, t,
-                                where, clobber_u64s, 0);
+       bch2_bset_insert(l->b, &l->iter, k, insert, 0);
+       bch2_btree_node_iter_fix(iter, l->b, &l->iter, t, k, 0, k->u64s);
        bch2_btree_iter_verify(iter, l->b);
 }
 
@@ -1249,56 +1203,52 @@ static void extent_insert_committed(struct extent_insert_state *s)
 {
        struct bch_fs *c = s->trans->c;
        struct btree_iter *iter = s->insert->iter;
-       struct bkey_i *insert = !s->deleting
-               ? s->insert->k
-               : &s->whiteout;
+       struct bkey_i *insert = s->insert->k;
        BKEY_PADDED(k) split;
 
-       EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
        EBUG_ON(bkey_cmp(insert->k.p, s->committed) < 0);
        EBUG_ON(bkey_cmp(s->committed, bkey_start_pos(&insert->k)) < 0);
 
-       if (!bkey_cmp(s->committed, bkey_start_pos(&insert->k)))
+       bkey_copy(&split.k, insert);
+       if (s->deleting)
+               split.k.k.type = KEY_TYPE_DISCARD;
+
+       if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+               bch2_cut_subtract_back(s, s->committed,
+                                      bkey_i_to_s(&split.k));
+       else
+               bch2_cut_back(s->committed, &split.k.k);
+
+       if (!bkey_cmp(s->committed, iter->pos))
                return;
 
-       if (s->deleting && !s->do_journal) {
-               bch2_cut_front(s->committed, insert);
-               goto done;
-       }
+       bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
 
-       EBUG_ON(bkey_deleted(&insert->k) || !insert->k.size);
+       if (s->update_btree) {
+               if (debug_check_bkeys(c))
+                       bch2_bkey_debugcheck(c, iter->l[0].b,
+                                            bkey_i_to_s_c(&split.k));
 
-       bkey_copy(&split.k, insert);
+               EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
 
-       if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY) &&
-           bkey_cmp(s->committed, insert->k.p) &&
-           bch2_extent_is_compressed(bkey_i_to_s_c(insert))) {
-               /* XXX: possibly need to increase our reservation? */
-               bch2_cut_subtract_back(s, s->committed,
-                                     bkey_i_to_s(&split.k));
-               bch2_cut_front(s->committed, insert);
-               bch2_add_sectors(s, bkey_i_to_s_c(insert),
-                               bkey_start_offset(&insert->k),
-                               insert->k.size);
-       } else {
-               bch2_cut_back(s->committed, &split.k.k);
-               bch2_cut_front(s->committed, insert);
+               extent_bset_insert(c, iter, &split.k);
        }
 
-       if (debug_check_bkeys(c))
-               bch2_bkey_debugcheck(c, iter->l[0].b, bkey_i_to_s_c(&split.k));
+       if (s->update_journal) {
+               bkey_copy(&split.k, !s->deleting ? insert : &s->whiteout);
+               if (s->deleting)
+                       split.k.k.type = KEY_TYPE_DISCARD;
 
-       bch2_btree_journal_key(s->trans, iter, &split.k);
+               bch2_cut_back(s->committed, &split.k.k);
 
-       if (!s->deleting) {
-               bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
-               extent_bset_insert(c, iter, &split.k);
+               EBUG_ON(bkey_deleted(&split.k.k) || !split.k.k.size);
+
+               bch2_btree_journal_key(s->trans, iter, &split.k);
        }
-done:
-       bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
+
+       bch2_cut_front(s->committed, insert);
 
        insert->k.needs_whiteout        = false;
-       s->do_journal                   = false;
        s->trans->did_work              = true;
 }
 
@@ -1333,9 +1283,6 @@ extent_insert_advance_pos(struct extent_insert_state *s, struct bkey_s_c k)
                                        k.k ? k.k->p : b->key.k.p);
        enum btree_insert_ret ret;
 
-       if (race_fault())
-               return BTREE_INSERT_NEED_TRAVERSE;
-
        /* hole? */
        if (k.k && bkey_cmp(s->committed, bkey_start_pos(k.k)) < 0) {
                ret = __extent_insert_advance_pos(s, bkey_start_pos(k.k),
@@ -1364,6 +1311,15 @@ bch2_extent_can_insert(struct btree_insert *trans,
        struct bkey_s_c k;
        int sectors;
 
+       /*
+        * We avoid creating whiteouts whenever possible when deleting, but
+        * those optimizations mean we may potentially insert two whiteouts
+        * instead of one (when we overlap with the front of one extent and the
+        * back of another):
+        */
+       if (bkey_whiteout(&insert->k->k))
+               *u64s += BKEY_U64s;
+
        _k = bch2_btree_node_iter_peek_filter(&node_iter, l->b,
                                              KEY_TYPE_DISCARD);
        if (!_k)
@@ -1418,7 +1374,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
                bch2_cut_subtract_front(s, insert->k.p, k);
                BUG_ON(bkey_deleted(k.k));
                extent_save(b, _k, k.k);
-               bch2_verify_key_order(b, &l->iter, _k);
+               verify_modified_extent(iter, _k);
                break;
 
        case BCH_EXTENT_OVERLAP_BACK:
@@ -1435,7 +1391,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
                bch2_bset_fix_invalidated_key(b, t, _k);
                bch2_btree_node_iter_fix(iter, b, &l->iter, t,
                                         _k, _k->u64s, _k->u64s);
-               bch2_verify_key_order(b, &l->iter, _k);
+               verify_modified_extent(iter, _k);
                break;
 
        case BCH_EXTENT_OVERLAP_ALL: {
@@ -1457,7 +1413,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
                        extent_save(b, _k, k.k);
                        bch2_btree_node_iter_fix(iter, b, &l->iter, t,
                                                 _k, _k->u64s, _k->u64s);
-                       bch2_verify_key_order(b, &l->iter, _k);
+                       verify_modified_extent(iter, _k);
                }
 
                break;
@@ -1487,7 +1443,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
                bch2_cut_subtract_front(s, insert->k.p, k);
                BUG_ON(bkey_deleted(k.k));
                extent_save(b, _k, k.k);
-               bch2_verify_key_order(b, &l->iter, _k);
+               verify_modified_extent(iter, _k);
 
                bch2_add_sectors(s, bkey_i_to_s_c(&split.k),
                                bkey_start_offset(&split.k.k),
@@ -1501,7 +1457,6 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
 static enum btree_insert_ret
 __bch2_insert_fixup_extent(struct extent_insert_state *s)
 {
-       struct bch_fs *c = s->trans->c;
        struct btree_iter *iter = s->insert->iter;
        struct btree_iter_level *l = &iter->l[0];
        struct btree *b = l->b;
@@ -1511,13 +1466,12 @@ __bch2_insert_fixup_extent(struct extent_insert_state *s)
        enum btree_insert_ret ret = BTREE_INSERT_OK;
 
        while (bkey_cmp(s->committed, insert->k.p) < 0 &&
-              (ret = extent_insert_should_stop(s)) == BTREE_INSERT_OK &&
-              (_k = bch2_btree_node_iter_peek_filter(&l->iter, b, KEY_TYPE_DISCARD))) {
+              (_k = bch2_btree_node_iter_peek_filter(&l->iter, b,
+                                                     KEY_TYPE_DISCARD))) {
                struct bset_tree *t = bch2_bkey_to_bset(b, _k);
                struct bkey_s k = __bkey_disassemble(b, _k, &unpacked);
-               enum bch_extent_overlap overlap;
+               enum bch_extent_overlap overlap = bch2_extent_overlap(&insert->k, k.k);
 
-               EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k)));
                EBUG_ON(bkey_cmp(iter->pos, k.k->p) >= 0);
 
                if (bkey_cmp(bkey_start_pos(k.k), insert->k.p) >= 0)
@@ -1527,63 +1481,53 @@ __bch2_insert_fixup_extent(struct extent_insert_state *s)
                if (ret)
                        break;
 
-               overlap = bch2_extent_overlap(&insert->k, k.k);
-
-               if (!s->deleting) {
-                       if (k.k->needs_whiteout || bkey_written(b, _k))
-                               insert->k.needs_whiteout = true;
-
-                       if (overlap == BCH_EXTENT_OVERLAP_ALL &&
-                           bkey_whiteout(k.k) &&
-                           k.k->needs_whiteout) {
-                               unreserve_whiteout(b, _k);
-                               _k->needs_whiteout = false;
-                       }
-
-                       extent_squash(s, insert, t, _k, k, overlap);
-               } else {
-                       if (bkey_whiteout(k.k))
-                               goto next;
+               if (!bkey_whiteout(k.k))
+                       s->update_journal = true;
 
-                       s->do_journal = true;
+               if (!s->update_journal) {
+                       bch2_cut_front(s->committed, insert);
+                       bch2_cut_front(s->committed, &s->whiteout);
+                       bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
+                       goto next;
+               }
 
-                       if (overlap == BCH_EXTENT_OVERLAP_ALL) {
-                               btree_keys_account_key_drop(&b->nr,
-                                                       t - b->set, _k);
+               /*
+                * When deleting, if possible just do it by switching the type
+                * of the key we're deleting, instead of creating and inserting
+                * a new whiteout:
+                */
+               if (s->deleting &&
+                   !s->update_btree &&
+                   !bkey_cmp(insert->k.p, k.k->p) &&
+                   !bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
+                       if (!bkey_whiteout(k.k)) {
+                               btree_keys_account_key_drop(&b->nr, t - b->set, _k);
                                bch2_subtract_sectors(s, k.s_c,
-                                                    bkey_start_offset(k.k), k.k->size);
+                                                     bkey_start_offset(k.k), k.k->size);
                                _k->type = KEY_TYPE_DISCARD;
                                reserve_whiteout(b, _k);
-                       } else if (k.k->needs_whiteout ||
-                                  bkey_written(b, _k)) {
-                               struct bkey_i discard = *insert;
-
-                               discard.k.type = KEY_TYPE_DISCARD;
+                       }
+                       break;
+               }
 
-                               switch (overlap) {
-                               case BCH_EXTENT_OVERLAP_FRONT:
-                                       bch2_cut_front(bkey_start_pos(k.k), &discard);
-                                       break;
-                               case BCH_EXTENT_OVERLAP_BACK:
-                                       bch2_cut_back(k.k->p, &discard.k);
-                                       break;
-                               default:
-                                       break;
-                               }
+               if (k.k->needs_whiteout || bkey_written(b, _k)) {
+                       insert->k.needs_whiteout = true;
+                       s->update_btree = true;
+               }
 
-                               discard.k.needs_whiteout = true;
+               if (s->update_btree &&
+                   overlap == BCH_EXTENT_OVERLAP_ALL &&
+                   bkey_whiteout(k.k) &&
+                   k.k->needs_whiteout) {
+                       unreserve_whiteout(b, _k);
+                       _k->needs_whiteout = false;
+               }
 
-                               extent_squash(s, insert, t, _k, k, overlap);
+               extent_squash(s, insert, t, _k, k, overlap);
 
-                               extent_bset_insert(c, iter, &discard);
-                       } else {
-                               extent_squash(s, insert, t, _k, k, overlap);
-                       }
-next:
+               if (!s->update_btree)
                        bch2_cut_front(s->committed, insert);
-                       bch2_btree_iter_set_pos_same_leaf(iter, s->committed);
-               }
-
+next:
                if (overlap == BCH_EXTENT_OVERLAP_FRONT ||
                    overlap == BCH_EXTENT_OVERLAP_MIDDLE)
                        break;
@@ -1600,11 +1544,9 @@ next:
         */
        {
                struct btree_node_iter node_iter = l->iter;
-               struct bkey uk;
 
                while ((_k = bch2_btree_node_iter_prev_all(&node_iter, l->b)) &&
-                      (uk = bkey_unpack_key(l->b, _k),
-                       bkey_cmp(uk.p, s->committed) > 0))
+                      bkey_cmp_left_packed(b, _k, &s->committed) > 0)
                        l->iter = node_iter;
        }
 
@@ -1664,14 +1606,13 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
                .trans          = trans,
                .insert         = insert,
                .committed      = insert->iter->pos,
+
+               .whiteout       = *insert->k,
+               .update_journal = !bkey_whiteout(&insert->k->k),
+               .update_btree   = !bkey_whiteout(&insert->k->k),
                .deleting       = bkey_whiteout(&insert->k->k),
        };
 
-       if (s.deleting) {
-               s.whiteout = *insert->k;
-               s.whiteout.k.type = KEY_TYPE_DISCARD;
-       }
-
        EBUG_ON(iter->level);
        EBUG_ON(!insert->k->k.size);
 
@@ -1682,7 +1623,6 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
         * @insert->k and the node iterator that we're advancing:
         */
        EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
-       bch2_btree_iter_verify(iter, b);
 
        if (!s.deleting &&
            !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
@@ -1694,20 +1634,6 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
 
        extent_insert_committed(&s);
 
-       if (s.deleting)
-               bch2_cut_front(iter->pos, insert->k);
-
-       /*
-        * Subtract any remaining sectors from @insert, if we bailed out early
-        * and didn't fully insert @insert:
-        */
-       if (!s.deleting &&
-           !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY) &&
-           insert->k->k.size)
-               bch2_subtract_sectors(&s, bkey_i_to_s_c(insert->k),
-                                    bkey_start_offset(&insert->k->k),
-                                    insert->k->k.size);
-
        bch2_fs_usage_apply(c, &s.stats, trans->disk_res,
                           gc_pos_btree_node(b));