From 2177147b39098e6f08b3d8d45bbcf7dedd7ebdad Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Tue, 6 Apr 2021 15:33:19 -0400 Subject: [PATCH] bcachefs: Improve bset compaction The previous patch that fixed btree nodes being written too aggressively now meant that we weren't sorting btree node bsets optimally - this patch fixes that. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_cache.c | 2 +- fs/bcachefs/btree_io.c | 51 +++++++++++++++++++---------- fs/bcachefs/btree_io.h | 3 +- fs/bcachefs/btree_update_interior.h | 4 ++- 4 files changed, 39 insertions(+), 21 deletions(-) diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c index 2ec668c3427e9..8ed8610796fbe 100644 --- a/fs/bcachefs/btree_cache.c +++ b/fs/bcachefs/btree_cache.c @@ -215,7 +215,7 @@ static int __btree_node_reclaim(struct bch_fs *c, struct btree *b, bool flush) if (bch2_verify_btree_ondisk) bch2_btree_node_write(c, b, SIX_LOCK_intent); else - __bch2_btree_node_write(c, b, SIX_LOCK_read); + __bch2_btree_node_write(c, b); /* wait for any in flight btree write */ btree_node_wait_on_io(b); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index 3b45389a8e068..fd90e434c78c5 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -241,7 +241,6 @@ bool bch2_compact_whiteouts(struct bch_fs *c, struct btree *b, } static void btree_node_sort(struct bch_fs *c, struct btree *b, - struct btree_iter *iter, unsigned start_idx, unsigned end_idx, bool filter_whiteouts) @@ -377,8 +376,7 @@ void bch2_btree_sort_into(struct bch_fs *c, * We're about to add another bset to the btree node, so if there's currently * too many bsets - sort some of them together: */ -static bool btree_node_compact(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) +static bool btree_node_compact(struct bch_fs *c, struct btree *b) { unsigned unwritten_idx; bool ret = false; @@ -390,13 +388,13 @@ static bool btree_node_compact(struct bch_fs *c, struct btree *b, break; if (b->nsets - unwritten_idx > 1) { - btree_node_sort(c, b, iter, unwritten_idx, + btree_node_sort(c, b, unwritten_idx, b->nsets, false); ret = true; } if (unwritten_idx > 1) { - btree_node_sort(c, b, iter, 0, unwritten_idx, false); + btree_node_sort(c, b, 0, unwritten_idx, false); ret = true; } @@ -426,12 +424,30 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, struct btree_iter *iter) { struct btree_node_entry *bne; - bool did_sort; + bool reinit_iter = false; EBUG_ON(!(b->c.lock.state.seq & 1)); EBUG_ON(iter && iter->l[b->c.level].b != b); + BUG_ON(bset_written(b, bset(b, &b->set[1]))); + + if (b->nsets == MAX_BSETS) { + unsigned log_u64s[] = { + ilog2(bset_u64s(&b->set[0])), + ilog2(bset_u64s(&b->set[1])), + ilog2(bset_u64s(&b->set[2])), + }; + + if (log_u64s[1] >= (log_u64s[0] + log_u64s[2]) / 2) { + bch2_btree_node_write(c, b, SIX_LOCK_write); + reinit_iter = true; + } + } + + if (b->nsets == MAX_BSETS && + btree_node_compact(c, b)) + reinit_iter = true; - did_sort = btree_node_compact(c, b, iter); + BUG_ON(b->nsets >= MAX_BSETS); bne = want_new_bset(c, b); if (bne) @@ -439,7 +455,7 @@ void bch2_btree_init_next(struct bch_fs *c, struct btree *b, bch2_btree_build_aux_trees(b); - if (iter && did_sort) + if (iter && reinit_iter) bch2_btree_iter_reinit_node(iter, b); } @@ -1324,8 +1340,7 @@ static int validate_bset_for_write(struct bch_fs *c, struct btree *b, return ret; } -void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, - enum six_lock_type lock_type_held) +void __bch2_btree_node_write(struct bch_fs *c, struct btree *b) { struct btree_write_bio *wbio; struct bset_tree *t; @@ -1596,7 +1611,7 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) * single bset: */ if (b->nsets > 1) { - btree_node_sort(c, b, NULL, 0, b->nsets, true); + btree_node_sort(c, b, 0, b->nsets, true); invalidated_iter = true; } else { invalidated_iter = bch2_drop_whiteouts(b, COMPACT_ALL); @@ -1626,13 +1641,12 @@ bool bch2_btree_post_write_cleanup(struct bch_fs *c, struct btree *b) * Use this one if the node is intent locked: */ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, - enum six_lock_type lock_type_held) + enum six_lock_type lock_type_held) { - BUG_ON(lock_type_held == SIX_LOCK_write); - if (lock_type_held == SIX_LOCK_intent || - six_lock_tryupgrade(&b->c.lock)) { - __bch2_btree_node_write(c, b, SIX_LOCK_intent); + (lock_type_held == SIX_LOCK_read && + six_lock_tryupgrade(&b->c.lock))) { + __bch2_btree_node_write(c, b); /* don't cycle lock unnecessarily: */ if (btree_node_just_written(b) && @@ -1644,7 +1658,10 @@ void bch2_btree_node_write(struct bch_fs *c, struct btree *b, if (lock_type_held == SIX_LOCK_read) six_lock_downgrade(&b->c.lock); } else { - __bch2_btree_node_write(c, b, SIX_LOCK_read); + __bch2_btree_node_write(c, b); + if (lock_type_held == SIX_LOCK_write && + btree_node_just_written(b)) + bch2_btree_post_write_cleanup(c, b); } } diff --git a/fs/bcachefs/btree_io.h b/fs/bcachefs/btree_io.h index 9c14cd30a09e1..95c351611045a 100644 --- a/fs/bcachefs/btree_io.h +++ b/fs/bcachefs/btree_io.h @@ -144,8 +144,7 @@ void bch2_btree_complete_write(struct bch_fs *, struct btree *, struct btree_write *); void bch2_btree_write_error_work(struct work_struct *); -void __bch2_btree_node_write(struct bch_fs *, struct btree *, - enum six_lock_type); +void __bch2_btree_node_write(struct bch_fs *, struct btree *); bool bch2_btree_post_write_cleanup(struct bch_fs *, struct btree *); void bch2_btree_node_write(struct bch_fs *, struct btree *, diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h index f2925b0d7f17e..7eef3dbb6ef17 100644 --- a/fs/bcachefs/btree_update_interior.h +++ b/fs/bcachefs/btree_update_interior.h @@ -256,13 +256,15 @@ static inline size_t bch_btree_keys_u64s_remaining(struct bch_fs *c, return remaining; } +#define BTREE_WRITE_SET_U64s_BITS 9 + static inline unsigned btree_write_set_buffer(struct btree *b) { /* * Could buffer up larger amounts of keys for btrees with larger keys, * pending benchmarking: */ - return 4 << 10; + return 8 << BTREE_WRITE_SET_U64s_BITS; } static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, -- 2.30.2