return b;
}
-struct btree *bch2_btree_node_get_sibling(struct bch_fs *c,
- struct btree_iter *iter,
- struct btree *b,
- enum btree_node_sibling sib)
-{
- struct btree_trans *trans = iter->trans;
- struct btree *parent;
- struct btree_node_iter node_iter;
- struct bkey_packed *k;
- struct bkey_buf tmp;
- struct btree *ret = NULL;
- unsigned level = b->c.level;
-
- bch2_bkey_buf_init(&tmp);
-
- parent = btree_iter_node(iter, level + 1);
- if (!parent)
- return NULL;
-
- /*
- * There's a corner case where a btree_iter might have a node locked
- * that is just outside its current pos - when
- * bch2_btree_iter_set_pos_same_leaf() gets to the end of the node.
- *
- * But the lock ordering checks in __bch2_btree_node_lock() go off of
- * iter->pos, not the node's key: so if the iterator is marked as
- * needing to be traversed, we risk deadlock if we don't bail out here:
- */
- if (iter->uptodate >= BTREE_ITER_NEED_TRAVERSE)
- return ERR_PTR(-EINTR);
-
- if (!bch2_btree_node_relock(iter, level + 1)) {
- ret = ERR_PTR(-EINTR);
- goto out;
- }
-
- node_iter = iter->l[parent->c.level].iter;
-
- k = bch2_btree_node_iter_peek_all(&node_iter, parent);
- BUG_ON(bkey_cmp_left_packed(parent, k, &b->key.k.p));
-
- k = sib == btree_prev_sib
- ? bch2_btree_node_iter_prev(&node_iter, parent)
- : (bch2_btree_node_iter_advance(&node_iter, parent),
- bch2_btree_node_iter_peek(&node_iter, parent));
- if (!k)
- goto out;
-
- bch2_bkey_buf_unpack(&tmp, c, parent, k);
-
- ret = bch2_btree_node_get(c, iter, tmp.k, level,
- SIX_LOCK_intent, _THIS_IP_);
-
- if (PTR_ERR_OR_ZERO(ret) == -EINTR && !trans->nounlock) {
- struct btree_iter *linked;
-
- if (!bch2_btree_node_relock(iter, level + 1))
- goto out;
-
- /*
- * We might have got -EINTR because trylock failed, and we're
- * holding other locks that would cause us to deadlock:
- */
- trans_for_each_iter(trans, linked)
- if (btree_iter_lock_cmp(iter, linked) < 0)
- __bch2_btree_iter_unlock(linked);
-
- if (sib == btree_prev_sib)
- btree_node_unlock(iter, level);
-
- ret = bch2_btree_node_get(c, iter, tmp.k, level,
- SIX_LOCK_intent, _THIS_IP_);
-
- /*
- * before btree_iter_relock() calls btree_iter_verify_locks():
- */
- if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
- btree_node_unlock(iter, level + 1);
-
- if (!bch2_btree_node_relock(iter, level)) {
- btree_iter_set_dirty(iter, BTREE_ITER_NEED_RELOCK);
-
- if (!IS_ERR(ret)) {
- six_unlock_intent(&ret->c.lock);
- ret = ERR_PTR(-EINTR);
- }
- }
-
- bch2_trans_relock(trans);
- }
-out:
- if (btree_lock_want(iter, level + 1) == BTREE_NODE_UNLOCKED)
- btree_node_unlock(iter, level + 1);
-
- if (PTR_ERR_OR_ZERO(ret) == -EINTR)
- bch2_btree_iter_upgrade(iter, level + 2);
-
- BUG_ON(!IS_ERR(ret) && !btree_node_locked(iter, level));
-
- if (!IS_ERR_OR_NULL(ret)) {
- struct btree *n1 = ret, *n2 = b;
-
- if (sib != btree_prev_sib)
- swap(n1, n2);
-
- if (bpos_cmp(bpos_successor(n1->key.k.p),
- n2->data->min_key)) {
- char buf1[200], buf2[200];
-
- bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
- bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
-
- bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
- "prev: %s\n"
- "next: %s\n",
- bch2_btree_ids[iter->btree_id], level,
- buf1, buf2);
-
- six_unlock_intent(&ret->c.lock);
- ret = NULL;
- }
- }
-
- bch2_btree_trans_verify_locks(trans);
-
- bch2_bkey_buf_exit(&tmp, c);
-
- return ret;
-}
-
void bch2_btree_node_prefetch(struct bch_fs *c, struct btree_iter *iter,
const struct bkey_i *k,
enum btree_id btree_id, unsigned level)
" format: u64s %u fields %u %u %u %u %u\n"
" unpack fn len: %u\n"
" bytes used %zu/%zu (%zu%% full)\n"
- " sib u64s: %u, %u (merge threshold %zu)\n"
+ " sib u64s: %u, %u (merge threshold %u)\n"
" nr packed keys %u\n"
" nr unpacked keys %u\n"
" floats %zu\n"
b->nr.live_u64s * 100 / btree_max_u64s(c),
b->sib_u64s[0],
b->sib_u64s[1],
- BTREE_FOREGROUND_MERGE_THRESHOLD(c),
+ c->btree_foreground_merge_threshold,
b->nr.packed_keys,
b->nr.unpacked_keys,
stats.floats,
enum btree_node_sibling sib)
{
struct btree_trans *trans = iter->trans;
+ struct btree_iter *sib_iter = NULL;
struct btree_update *as;
struct bkey_format_state new_s;
struct bkey_format new_f;
struct bkey_i delete;
struct btree *b, *m, *n, *prev, *next, *parent;
+ struct bpos sib_pos;
size_t sib_u64s;
int ret = 0;
+ if (trans->nounlock)
+ return;
+
BUG_ON(!btree_node_locked(iter, level));
retry:
+ ret = bch2_btree_iter_traverse(iter);
+ if (ret)
+ goto err;
+
BUG_ON(!btree_node_locked(iter, level));
b = iter->l[level].b;
- parent = btree_node_parent(iter, b);
- if (!parent)
+ if ((sib == btree_prev_sib && !bpos_cmp(b->data->min_key, POS_MIN)) ||
+ (sib == btree_next_sib && !bpos_cmp(b->data->max_key, POS_MAX))) {
+ b->sib_u64s[sib] = U16_MAX;
goto out;
+ }
- if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c))
- goto out;
+ sib_pos = sib == btree_prev_sib
+ ? bpos_predecessor(b->data->min_key)
+ : bpos_successor(b->data->max_key);
- /* XXX: can't be holding read locks */
- m = bch2_btree_node_get_sibling(c, iter, b, sib);
- if (IS_ERR(m)) {
- ret = PTR_ERR(m);
+ sib_iter = bch2_trans_get_node_iter(trans, iter->btree_id,
+ sib_pos, U8_MAX, level,
+ BTREE_ITER_INTENT);
+ ret = bch2_btree_iter_traverse(sib_iter);
+ if (ret)
goto err;
- }
- /* NULL means no sibling: */
- if (!m) {
+ m = sib_iter->l[level].b;
+
+ if (btree_node_parent(iter, b) !=
+ btree_node_parent(sib_iter, m)) {
b->sib_u64s[sib] = U16_MAX;
goto out;
}
next = m;
}
+ BUG_ON(bkey_cmp(bpos_successor(prev->data->max_key), next->data->min_key));
+
bch2_bkey_format_init(&new_s);
bch2_bkey_format_add_pos(&new_s, prev->data->min_key);
__bch2_btree_calc_format(&new_s, prev);
}
sib_u64s = min(sib_u64s, btree_max_u64s(c));
+ sib_u64s = min(sib_u64s, (size_t) U16_MAX - 1);
b->sib_u64s[sib] = sib_u64s;
- if (b->sib_u64s[sib] > BTREE_FOREGROUND_MERGE_THRESHOLD(c)) {
- six_unlock_intent(&m->c.lock);
+ if (b->sib_u64s[sib] > c->btree_foreground_merge_threshold)
goto out;
- }
+ parent = btree_node_parent(iter, b);
as = bch2_btree_update_start(iter, level,
btree_update_reserve_required(c, parent) + 1,
flags|
BTREE_INSERT_NOFAIL|
BTREE_INSERT_USE_RESERVE);
ret = PTR_ERR_OR_ZERO(as);
- if (ret) {
- six_unlock_intent(&m->c.lock);
+ if (ret)
goto err;
- }
trace_btree_merge(c, b);
bch2_btree_update_get_open_buckets(as, n);
six_lock_increment(&b->c.lock, SIX_LOCK_intent);
+ six_lock_increment(&m->c.lock, SIX_LOCK_intent);
bch2_btree_iter_node_drop(iter, b);
bch2_btree_iter_node_drop(iter, m);
bch2_btree_update_done(as);
out:
bch2_btree_trans_verify_locks(trans);
+ bch2_trans_iter_free(trans, sib_iter);
/*
* Don't downgrade locks here: we're called after successful insert,
*/
return;
err:
- BUG_ON(ret == -EAGAIN && (flags & BTREE_INSERT_NOUNLOCK));
+ bch2_trans_iter_put(trans, sib_iter);
+ sib_iter = NULL;
+
+ if (ret == -EINTR && bch2_trans_relock(trans)) {
+ ret = 0;
+ goto retry;
+ }
if (ret == -EINTR && !(flags & BTREE_INSERT_NOUNLOCK)) {
- bch2_trans_unlock(trans);
- ret = bch2_btree_iter_traverse(iter);
+ ret = bch2_btree_iter_traverse_all(trans);
if (!ret)
goto retry;
}