From: Kent Overstreet Date: Fri, 15 Mar 2019 21:11:58 +0000 (-0400) Subject: bcachefs: Btree update path cleanup X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=9623ab27ab27e211e66aa1dd9a5ec90307160c8b;p=linux.git bcachefs: Btree update path cleanup Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h index 674a617a8018a..2904239b79477 100644 --- a/fs/bcachefs/btree_types.h +++ b/fs/bcachefs/btree_types.h @@ -289,12 +289,12 @@ struct btree_trans { struct btree_iter *iters; struct btree_insert_entry *updates; - struct disk_reservation *disk_res; /* update path: */ struct journal_res journal_res; struct journal_preres journal_preres; u64 *journal_seq; + struct disk_reservation *disk_res; unsigned flags; struct btree_iter iters_onstack[2]; @@ -501,6 +501,7 @@ enum btree_insert_ret { BTREE_INSERT_BTREE_NODE_FULL, BTREE_INSERT_ENOSPC, BTREE_INSERT_NEED_MARK_REPLICAS, + BTREE_INSERT_NEED_JOURNAL_RES, }; enum btree_gc_coalesce_fail_reason { diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h index 876b0e78f982a..4d7cef75a017a 100644 --- a/fs/bcachefs/btree_update.h +++ b/fs/bcachefs/btree_update.h @@ -128,4 +128,39 @@ int bch2_trans_commit(struct btree_trans *, _ret; \ }) +/* + * We sort transaction entries so that if multiple iterators point to the same + * leaf node they'll be adjacent: + */ +static inline bool same_leaf_as_prev(struct btree_trans *trans, + struct btree_insert_entry *i) +{ + return i != trans->updates && + !i->deferred && + i[0].iter->l[0].b == i[-1].iter->l[0].b; +} + +#define __trans_next_update(_trans, _i, _filter) \ +({ \ + while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\ + (_i)++; \ + \ + (_i) < (_trans)->updates + (_trans->nr_updates); \ +}) + +#define __trans_for_each_update(_trans, _i, _filter) \ + for ((_i) = (_trans)->updates; \ + __trans_next_update(_trans, _i, _filter); \ + (_i)++) + +#define trans_for_each_update(trans, i) \ + __trans_for_each_update(trans, i, true) + +#define trans_for_each_update_iter(trans, i) \ + __trans_for_each_update(trans, i, !(i)->deferred) + +#define trans_for_each_update_leaf(trans, i) \ + __trans_for_each_update(trans, i, !(i)->deferred && \ + !same_leaf_as_prev(trans, i)) + #endif /* _BCACHEFS_BTREE_UPDATE_H */ diff --git a/fs/bcachefs/btree_update_leaf.c b/fs/bcachefs/btree_update_leaf.c index d239aff7c13c8..42fdb6c2963aa 100644 --- a/fs/bcachefs/btree_update_leaf.c +++ b/fs/bcachefs/btree_update_leaf.c @@ -18,8 +18,64 @@ #include -static bool btree_trans_relock(struct btree_trans *); -static void btree_trans_unlock(struct btree_trans *); +inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, + struct btree_iter *iter) +{ + bch2_btree_node_lock_write(b, iter); + + if (btree_node_just_written(b) && + bch2_btree_post_write_cleanup(c, b)) + bch2_btree_iter_reinit_node(iter, b); + + /* + * If the last bset has been written, or if it's gotten too big - start + * a new bset to insert into: + */ + if (want_new_bset(c, b)) + bch2_btree_init_next(c, b, iter); +} + +static void btree_trans_lock_write(struct bch_fs *c, struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + trans_for_each_update_leaf(trans, i) + bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); +} + +static void btree_trans_unlock_write(struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + trans_for_each_update_leaf(trans, i) + bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); +} + +static bool btree_trans_relock(struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + trans_for_each_update_iter(trans, i) + return bch2_btree_iter_relock(i->iter); + return true; +} + +static void btree_trans_unlock(struct btree_trans *trans) +{ + struct btree_insert_entry *i; + + trans_for_each_update_iter(trans, i) { + bch2_btree_iter_unlock(i->iter); + break; + } +} + +static inline int btree_trans_cmp(struct btree_insert_entry l, + struct btree_insert_entry r) +{ + return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?: + btree_iter_cmp(l.iter, r.iter); +} /* Inserting into a given leaf node (last stage of insert): */ @@ -350,103 +406,86 @@ bch2_deferred_update_alloc(struct bch_fs *c, return d; } -/* struct btree_insert operations: */ +/* Normal update interface: */ -/* - * We sort transaction entries so that if multiple iterators point to the same - * leaf node they'll be adjacent: - */ -static bool same_leaf_as_prev(struct btree_trans *trans, - struct btree_insert_entry *i) +static inline void btree_insert_entry_checks(struct bch_fs *c, + struct btree_insert_entry *i) { - return i != trans->updates && - !i->deferred && - i[0].iter->l[0].b == i[-1].iter->l[0].b; -} - -#define __trans_next_entry(_trans, _i, _filter) \ -({ \ - while ((_i) < (_trans)->updates + (_trans->nr_updates) && !(_filter))\ - (_i)++; \ - \ - (_i) < (_trans)->updates + (_trans->nr_updates); \ -}) - -#define __trans_for_each_entry(_trans, _i, _filter) \ - for ((_i) = (_trans)->updates; \ - __trans_next_entry(_trans, _i, _filter); \ - (_i)++) + enum btree_id btree_id = !i->deferred + ? i->iter->btree_id + : i->d->btree_id; -#define trans_for_each_entry(trans, i) \ - __trans_for_each_entry(trans, i, true) + if (!i->deferred) { + BUG_ON(i->iter->level); + BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); + EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && + !bch2_extent_is_atomic(i->k, i->iter)); -#define trans_for_each_iter(trans, i) \ - __trans_for_each_entry(trans, i, !(i)->deferred) + bch2_btree_iter_verify_locks(i->iter); + } -#define trans_for_each_leaf(trans, i) \ - __trans_for_each_entry(trans, i, !(i)->deferred && \ - !same_leaf_as_prev(trans, i)) + BUG_ON(debug_check_bkeys(c) && + !bkey_deleted(&i->k->k) && + bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id)); +} -inline void bch2_btree_node_lock_for_insert(struct bch_fs *c, struct btree *b, - struct btree_iter *iter) +static int bch2_trans_journal_preres_get(struct btree_trans *trans) { - bch2_btree_node_lock_write(b, iter); + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; + unsigned u64s = 0; + int ret; - if (btree_node_just_written(b) && - bch2_btree_post_write_cleanup(c, b)) - bch2_btree_iter_reinit_node(iter, b); + trans_for_each_update(trans, i) + if (i->deferred) + u64s += jset_u64s(i->k->k.u64s); - /* - * If the last bset has been written, or if it's gotten too big - start - * a new bset to insert into: - */ - if (want_new_bset(c, b)) - bch2_btree_init_next(c, b, iter); -} + if (!u64s) + return 0; -static void multi_lock_write(struct bch_fs *c, struct btree_trans *trans) -{ - struct btree_insert_entry *i; + ret = bch2_journal_preres_get(&c->journal, + &trans->journal_preres, u64s, + JOURNAL_RES_GET_NONBLOCK); + if (ret != -EAGAIN) + return ret; - trans_for_each_leaf(trans, i) - bch2_btree_node_lock_for_insert(c, i->iter->l[0].b, i->iter); -} + btree_trans_unlock(trans); -static void multi_unlock_write(struct btree_trans *trans) -{ - struct btree_insert_entry *i; + ret = bch2_journal_preres_get(&c->journal, + &trans->journal_preres, u64s, 0); + if (ret) + return ret; - trans_for_each_leaf(trans, i) - bch2_btree_node_unlock_write(i->iter->l[0].b, i->iter); -} + if (!btree_trans_relock(trans)) { + trans_restart(" (iter relock after journal preres get blocked)"); + return -EINTR; + } -static inline int btree_trans_cmp(struct btree_insert_entry l, - struct btree_insert_entry r) -{ - return (l.deferred > r.deferred) - (l.deferred < r.deferred) ?: - btree_iter_cmp(l.iter, r.iter); + return 0; } -static bool btree_trans_relock(struct btree_trans *trans) +static int bch2_trans_journal_res_get(struct btree_trans *trans, + unsigned flags) { + struct bch_fs *c = trans->c; struct btree_insert_entry *i; + unsigned u64s = 0; + int ret; - trans_for_each_iter(trans, i) - return bch2_btree_iter_relock(i->iter); - return true; -} + if (unlikely(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) + return 0; -static void btree_trans_unlock(struct btree_trans *trans) -{ - struct btree_insert_entry *i; + if (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) + flags |= JOURNAL_RES_GET_RESERVED; - trans_for_each_iter(trans, i) { - bch2_btree_iter_unlock(i->iter); - break; - } -} + trans_for_each_update(trans, i) + u64s += jset_u64s(i->k->k.u64s); -/* Normal update interface: */ + ret = bch2_journal_res_get(&c->journal, &trans->journal_res, + u64s, flags); + + return ret == -EAGAIN ? BTREE_INSERT_NEED_JOURNAL_RES : ret; +} static enum btree_insert_ret btree_key_can_insert(struct btree_trans *trans, @@ -477,6 +516,29 @@ btree_key_can_insert(struct btree_trans *trans, return BTREE_INSERT_OK; } +static int btree_trans_check_can_insert(struct btree_trans *trans, + struct btree_insert_entry **stopped_at) +{ + struct btree_insert_entry *i; + unsigned u64s = 0; + int ret; + + trans_for_each_update_iter(trans, i) { + /* Multiple inserts might go to same leaf: */ + if (!same_leaf_as_prev(trans, i)) + u64s = 0; + + u64s += i->k->k.u64s; + ret = btree_key_can_insert(trans, i, &u64s); + if (ret) { + *stopped_at = i; + return ret; + } + } + + return 0; +} + static inline void do_btree_insert_one(struct btree_trans *trans, struct btree_insert_entry *insert) { @@ -495,15 +557,12 @@ static inline int do_btree_insert_at(struct btree_trans *trans, struct bch_fs *c = trans->c; struct btree_insert_entry *i; struct btree_iter *linked; - unsigned u64s; int ret; -retry: - trans_for_each_iter(trans, i) - BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); - memset(&trans->journal_res, 0, sizeof(trans->journal_res)); + trans_for_each_update_iter(trans, i) + BUG_ON(i->iter->uptodate >= BTREE_ITER_NEED_RELOCK); - multi_lock_write(c, trans); + btree_trans_lock_write(c, trans); if (race_fault()) { ret = -EINTR; @@ -516,59 +575,24 @@ retry: * held, otherwise another thread could write the node changing the * amount of space available: */ - u64s = 0; - trans_for_each_iter(trans, i) { - /* Multiple inserts might go to same leaf: */ - if (!same_leaf_as_prev(trans, i)) - u64s = 0; - - u64s += i->k->k.u64s; - ret = btree_key_can_insert(trans, i, &u64s); - if (ret) { - *stopped_at = i; - goto out; - } - } - - if (likely(!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))) { - unsigned flags = (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) - ? JOURNAL_RES_GET_RESERVED : 0; - - u64s = 0; - trans_for_each_entry(trans, i) - u64s += jset_u64s(i->k->k.u64s); - - ret = bch2_journal_res_get(&c->journal, - &trans->journal_res, u64s, - flags|JOURNAL_RES_GET_NONBLOCK); - if (likely(!ret)) - goto got_journal_res; - if (ret != -EAGAIN) - goto out; - - multi_unlock_write(trans); - btree_trans_unlock(trans); - - ret = bch2_journal_res_get(&c->journal, - &trans->journal_res, u64s, - flags|JOURNAL_RES_GET_CHECK); - if (ret) - return ret; + ret = btree_trans_check_can_insert(trans, stopped_at); + if (ret) + goto out; - if (!btree_trans_relock(trans)) { - trans_restart(" (iter relock after journal res get blocked)"); - return -EINTR; - } + /* + * Don't get journal reservation until after we know insert will + * succeed: + */ + ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_NONBLOCK); + if (ret) + goto out; - goto retry; - } -got_journal_res: if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY)) { if (journal_seq_verify(c)) - trans_for_each_entry(trans, i) + trans_for_each_update(trans, i) i->k->k.version.lo = trans->journal_res.seq; else if (inject_invalid_keys(c)) - trans_for_each_entry(trans, i) + trans_for_each_update(trans, i) i->k->k.version = MAX_VERSION; } @@ -578,7 +602,7 @@ got_journal_res: * have been traversed/locked, depending on what the caller was * doing: */ - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { for_each_btree_iter(i->iter, linked) if (linked->uptodate < BTREE_ITER_NEED_RELOCK) linked->flags |= BTREE_ITER_NOUNLOCK; @@ -586,40 +610,19 @@ got_journal_res: } } - trans_for_each_entry(trans, i) + trans_for_each_update(trans, i) do_btree_insert_one(trans, i); out: BUG_ON(ret && (trans->flags & BTREE_INSERT_JOURNAL_RESERVED) && trans->journal_res.ref); - multi_unlock_write(trans); + btree_trans_unlock_write(trans); bch2_journal_res_put(&c->journal, &trans->journal_res); return ret; } -static inline void btree_insert_entry_checks(struct bch_fs *c, - struct btree_insert_entry *i) -{ - enum btree_id btree_id = !i->deferred - ? i->iter->btree_id - : i->d->btree_id; - - if (!i->deferred) { - BUG_ON(i->iter->level); - BUG_ON(bkey_cmp(bkey_start_pos(&i->k->k), i->iter->pos)); - EBUG_ON((i->iter->flags & BTREE_ITER_IS_EXTENTS) && - !bch2_extent_is_atomic(i->k, i->iter)); - - bch2_btree_iter_verify_locks(i->iter); - } - - BUG_ON(debug_check_bkeys(c) && - !bkey_deleted(&i->k->k) && - bch2_bkey_invalid(c, bkey_i_to_s_c(i->k), btree_id)); -} - /** * __bch_btree_insert_at - insert keys at given iterator positions * @@ -631,60 +634,15 @@ static inline void btree_insert_entry_checks(struct bch_fs *c, * -EROFS: filesystem read only * -EIO: journal or btree node IO error */ -static int __bch2_btree_insert_at(struct btree_trans *trans) +static int __bch2_trans_commit(struct btree_trans *trans) { struct bch_fs *c = trans->c; struct btree_insert_entry *i; struct btree_iter *linked; - unsigned flags, u64s = 0; + unsigned flags; int ret; - - BUG_ON(!trans->nr_updates); - - /* for the sake of sanity: */ - BUG_ON(trans->nr_updates > 1 && !(trans->flags & BTREE_INSERT_ATOMIC)); - - if (trans->flags & BTREE_INSERT_GC_LOCK_HELD) - lockdep_assert_held(&c->gc_lock); - - memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); - - bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp); - - trans_for_each_entry(trans, i) - btree_insert_entry_checks(c, i); - - trans_for_each_entry(trans, i) - if (i->deferred) - u64s += jset_u64s(i->k->k.u64s); - - if (u64s) { - ret = bch2_journal_preres_get(&c->journal, - &trans->journal_preres, u64s, - JOURNAL_RES_GET_NONBLOCK); - if (!ret) - goto got_journal_preres; - if (ret != -EAGAIN) - return ret; - - btree_trans_unlock(trans); - ret = bch2_journal_preres_get(&c->journal, - &trans->journal_preres, u64s, 0); - if (ret) - return ret; - - if (!btree_trans_relock(trans)) { - trans_restart(" (iter relock after journal preres get blocked)"); - bch2_journal_preres_put(&c->journal, &trans->journal_preres); - return -EINTR; - } - } -got_journal_preres: - if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && - !percpu_ref_tryget(&c->writes))) - return -EROFS; retry: - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { unsigned old_locks_want = i->iter->locks_want; unsigned old_uptodate = i->iter->uptodate; @@ -705,24 +663,19 @@ retry: if (unlikely(ret)) goto err; - trans_for_each_leaf(trans, i) + trans_for_each_update_leaf(trans, i) bch2_foreground_maybe_merge(c, i->iter, 0, trans->flags); - trans_for_each_iter(trans, i) + trans_for_each_update_iter(trans, i) bch2_btree_iter_downgrade(i->iter); out: - bch2_journal_preres_put(&c->journal, &trans->journal_preres); - - if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) - percpu_ref_put(&c->writes); - /* make sure we didn't drop or screw up locks: */ - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { bch2_btree_iter_verify_locks(i->iter); break; } - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { for_each_btree_iter(i->iter, linked) linked->flags &= ~BTREE_ITER_NOUNLOCK; break; @@ -784,12 +737,25 @@ err: bch2_trans_unlock(trans); ret = -EINTR; - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { int ret2 = bch2_mark_bkey_replicas(c, bkey_i_to_s_c(i->k)); if (ret2) ret = ret2; } break; + case BTREE_INSERT_NEED_JOURNAL_RES: + btree_trans_unlock(trans); + + ret = bch2_trans_journal_res_get(trans, JOURNAL_RES_GET_CHECK); + if (ret) + goto out; + + if (btree_trans_relock(trans)) + goto retry; + + trans_restart(" (iter relock after journal res get blocked)"); + ret = -EINTR; + break; default: BUG_ON(ret >= 0); break; @@ -801,7 +767,7 @@ err: goto out; } - trans_for_each_iter(trans, i) { + trans_for_each_update_iter(trans, i) { int ret2 = bch2_btree_iter_traverse(i->iter); if (ret2) { ret = ret2; @@ -830,16 +796,44 @@ int bch2_trans_commit(struct btree_trans *trans, u64 *journal_seq, unsigned flags) { + struct bch_fs *c = trans->c; + struct btree_insert_entry *i; int ret; if (!trans->nr_updates) return 0; + /* for the sake of sanity: */ + BUG_ON(trans->nr_updates > 1 && !(flags & BTREE_INSERT_ATOMIC)); + + if (flags & BTREE_INSERT_GC_LOCK_HELD) + lockdep_assert_held(&c->gc_lock); + + memset(&trans->journal_res, 0, sizeof(trans->journal_res)); + memset(&trans->journal_preres, 0, sizeof(trans->journal_preres)); trans->disk_res = disk_res; trans->journal_seq = journal_seq; trans->flags = flags; - ret = __bch2_btree_insert_at(trans); + bubble_sort(trans->updates, trans->nr_updates, btree_trans_cmp); + + trans_for_each_update(trans, i) + btree_insert_entry_checks(c, i); + + if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW) && + !percpu_ref_tryget(&c->writes))) + return -EROFS; + + ret = bch2_trans_journal_preres_get(trans); + if (ret) + goto err; + + ret = __bch2_trans_commit(trans); +err: + bch2_journal_preres_put(&c->journal, &trans->journal_preres); + + if (unlikely(!(trans->flags & BTREE_INSERT_NOCHECK_RW))) + percpu_ref_put(&c->writes); trans->nr_updates = 0; @@ -861,7 +855,7 @@ int bch2_btree_delete_at(struct btree_trans *trans, } /** - * bch_btree_insert - insert keys into the extent btree + * bch2_btree_insert - insert keys into the extent btree * @c: pointer to struct bch_fs * @id: btree to insert into * @insert_keys: list of keys to insert