x(reflink_inline_data, 14) \
x(new_varint, 15) \
x(journal_no_flush, 16) \
- x(alloc_v2, 17)
+ x(alloc_v2, 17) \
+ x(extents_across_btree_nodes, 18)
+
+#define BCH_SB_FEATURES_ALWAYS \
+ ((1ULL << BCH_FEATURE_new_extent_overwrite)| \
+ (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
+ (1ULL << BCH_FEATURE_btree_updates_journalled)|\
+ (1ULL << BCH_FEATURE_extents_across_btree_nodes))
#define BCH_SB_FEATURES_ALL \
- ((1ULL << BCH_FEATURE_new_siphash)| \
- (1ULL << BCH_FEATURE_new_extent_overwrite)| \
+ (BCH_SB_FEATURES_ALWAYS| \
+ (1ULL << BCH_FEATURE_new_siphash)| \
(1ULL << BCH_FEATURE_btree_ptr_v2)| \
- (1ULL << BCH_FEATURE_extents_above_btree_updates)|\
(1ULL << BCH_FEATURE_new_varint)| \
(1ULL << BCH_FEATURE_journal_no_flush)| \
(1ULL << BCH_FEATURE_alloc_v2))
static inline struct bkey_s_c
__bch2_btree_iter_peek_slot_extents(struct btree_iter *iter)
{
- struct btree_iter_level *l = &iter->l[0];
- struct btree_node_iter node_iter;
struct bkey_s_c k;
- struct bkey n;
- int ret;
+ struct bpos pos, next_start;
/* keys & holes can't span inode numbers: */
if (iter->pos.offset == KEY_OFFSET_MAX) {
return bkey_s_c_null;
bch2_btree_iter_set_pos(iter, bkey_successor(iter->pos));
-
- ret = bch2_btree_iter_traverse(iter);
- if (unlikely(ret))
- return bkey_s_c_err(ret);
}
- /*
- * iterator is now at the correct position for inserting at iter->pos,
- * but we need to keep iterating until we find the first non whiteout so
- * we know how big a hole we have, if any:
- */
-
- node_iter = l->iter;
- k = __btree_iter_unpack(iter, l, &iter->k,
- bch2_btree_node_iter_peek(&node_iter, l->b));
-
- if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0) {
- /*
- * We're not setting iter->uptodate because the node iterator
- * doesn't necessarily point at the key we're returning:
- */
+ pos = iter->pos;
+ k = bch2_btree_iter_peek(iter);
+ iter->pos = pos;
- EBUG_ON(bkey_cmp(k.k->p, iter->pos) <= 0);
- bch2_btree_iter_verify(iter);
+ if (bkey_err(k))
return k;
- }
- /* hole */
+ if (k.k && bkey_cmp(bkey_start_pos(k.k), iter->pos) <= 0)
+ return k;
- if (!k.k)
- k.k = &l->b->key.k;
+ next_start = k.k ? bkey_start_pos(k.k) : POS_MAX;
- bkey_init(&n);
- n.p = iter->pos;
- bch2_key_resize(&n,
+ bkey_init(&iter->k);
+ iter->k.p = iter->pos;
+ bch2_key_resize(&iter->k,
min_t(u64, KEY_SIZE_MAX,
- (k.k->p.inode == n.p.inode
- ? bkey_start_offset(k.k)
+ (next_start.inode == iter->pos.inode
+ ? next_start.offset
: KEY_OFFSET_MAX) -
- n.p.offset));
+ iter->pos.offset));
- EBUG_ON(!n.size);
+ EBUG_ON(!iter->k.size);
- iter->k = n;
iter->uptodate = BTREE_ITER_UPTODATE;
bch2_btree_iter_verify_entry_exit(iter);
if (iter->uptodate == BTREE_ITER_UPTODATE)
return btree_iter_peek_uptodate(iter);
+ if (iter->flags & BTREE_ITER_IS_EXTENTS)
+ return __bch2_btree_iter_peek_slot_extents(iter);
+
ret = bch2_btree_iter_traverse(iter);
if (unlikely(ret))
return bkey_s_c_err(ret);
- if (iter->flags & BTREE_ITER_IS_EXTENTS)
- return __bch2_btree_iter_peek_slot_extents(iter);
-
k = __btree_iter_peek_all(iter, l, &iter->k);
EBUG_ON(k.k && bkey_deleted(k.k) && bkey_cmp(k.k->p, iter->pos) == 0);
EBUG_ON(btree_node_just_written(b));
EBUG_ON(bset_written(b, btree_bset_last(b)));
EBUG_ON(bkey_deleted(&insert->k) && bkey_val_u64s(&insert->k));
- EBUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
- bkey_cmp(bkey_start_pos(&insert->k),
- bkey_predecessor(b->data->min_key)) < 0);
EBUG_ON(bkey_cmp(insert->k.p, b->data->min_key) < 0);
EBUG_ON(bkey_cmp(insert->k.p, b->data->max_key) > 0);
EBUG_ON(insert->k.u64s >
bkey_cmp(l->pos, r->pos);
}
-static void bch2_trans_update2(struct btree_trans *trans,
+static int bch2_trans_update2(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_i *insert)
{
struct btree_insert_entry *i, n = (struct btree_insert_entry) {
.iter = iter, .k = insert
};
+ int ret;
btree_insert_entry_checks(trans, n.iter, n.k);
- BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-
EBUG_ON(trans->nr_updates2 >= BTREE_ITER_MAX);
+ ret = bch2_btree_iter_traverse(iter);
+ if (unlikely(ret))
+ return ret;
+
+ BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
+
iter->flags |= BTREE_ITER_KEEP_UNTIL_COMMIT;
trans_for_each_update2(trans, i) {
if (btree_iter_pos_cmp(n.iter, i->iter) == 0) {
*i = n;
- return;
+ return 0;
}
if (btree_iter_pos_cmp(n.iter, i->iter) <= 0)
array_insert_item(trans->updates2, trans->nr_updates2,
i - trans->updates2, n);
+ return 0;
}
static int extent_update_to_keys(struct btree_trans *trans,
iter->flags |= BTREE_ITER_INTENT;
__bch2_btree_iter_set_pos(iter, insert->k.p, false);
- bch2_trans_update2(trans, iter, insert);
+ ret = bch2_trans_update2(trans, iter, insert);
bch2_trans_iter_put(trans, iter);
- return 0;
+ return ret;
}
static int extent_handle_overwrites(struct btree_trans *trans,
bch2_cut_back(start, update);
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
- bch2_trans_update2(trans, update_iter, update);
+ ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
+ if (ret)
+ goto err;
}
if (bkey_cmp(k.k->p, end) > 0) {
bch2_cut_front(end, update);
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
- bch2_trans_update2(trans, update_iter, update);
+ ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
+ if (ret)
+ goto err;
} else {
update_iter = bch2_trans_copy_iter(trans, iter);
update->k.size = 0;
__bch2_btree_iter_set_pos(update_iter, update->k.p, false);
- bch2_trans_update2(trans, update_iter, update);
+ ret = bch2_trans_update2(trans, update_iter, update);
bch2_trans_iter_put(trans, update_iter);
+ if (ret)
+ goto err;
}
k = bch2_btree_iter_next_with_updates(iter);
trans_for_each_update(trans, i) {
if (i->iter->flags & BTREE_ITER_IS_EXTENTS) {
ret = extent_update_to_keys(trans, i->iter, i->k);
- if (ret)
- goto out;
} else {
- bch2_trans_update2(trans, i->iter, i->k);
+ ret = bch2_trans_update2(trans, i->iter, i->k);
}
+ if (ret)
+ goto out;
}
trans_for_each_update2(trans, i) {
unsigned flags)
{
struct bch_fs *c = trans->c;
- struct btree *b = iter_l(iter)->b;
- struct btree_node_iter node_iter = iter_l(iter)->iter;
- struct bkey_packed *_old;
struct bkey_s_c old;
struct bkey unpacked;
int ret = 0;
BTREE_TRIGGER_OVERWRITE|flags);
}
} else {
+ struct btree_iter *copy;
+
BUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
bch2_mark_key_locked(c, old, bkey_i_to_s_c(new),
0, new->k.size,
fs_usage, trans->journal_res.seq,
BTREE_TRIGGER_INSERT|flags);
- while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
- unsigned offset = 0;
- s64 sectors;
+ copy = bch2_trans_copy_iter(trans, iter);
- old = bkey_disassemble(b, _old, &unpacked);
- sectors = -((s64) old.k->size);
+ for_each_btree_key_continue(copy, 0, old, ret) {
+ unsigned offset = 0;
+ s64 sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
- return 0;
+ break;
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
trans->journal_res.seq, flags) ?: 1;
if (ret <= 0)
break;
-
- bch2_btree_node_iter_advance(&node_iter, b);
}
+ bch2_trans_iter_put(trans, copy);
}
return ret;
pr_err("overlapping with");
if (btree_iter_type(i->iter) != BTREE_ITER_CACHED) {
- struct btree *b = iter_l(i->iter)->b;
- struct btree_node_iter node_iter = iter_l(i->iter)->iter;
- struct bkey_packed *_k;
-
- while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
- struct bkey unpacked;
- struct bkey_s_c k;
+ struct btree_iter *copy = bch2_trans_copy_iter(trans, i->iter);
+ struct bkey_s_c k;
+ int ret;
- pr_info("_k %px format %u", _k, _k->format);
- k = bkey_disassemble(b, _k, &unpacked);
-
- if (btree_node_is_extents(b)
+ for_each_btree_key_continue(copy, 0, k, ret) {
+ if (btree_node_type_is_extents(i->iter->btree_id)
? bkey_cmp(i->k->k.p, bkey_start_pos(k.k)) <= 0
: bkey_cmp(i->k->k.p, k.k->p))
break;
bch2_bkey_val_to_text(&PBUF(buf), c, k);
pr_err("%s", buf);
-
- bch2_btree_node_iter_advance(&node_iter, b);
}
+ bch2_trans_iter_put(trans, copy);
} else {
struct bkey_cached *ck = (void *) i->iter->l[0].b;
}
bch2_btree_iter_set_pos(iter, bkey_start_pos(k.k));
- BUG_ON(iter->uptodate > BTREE_ITER_NEED_PEEK);
-
bch2_trans_update(trans, iter, n, 0);
out:
ret = sectors;
BTREE_TRIGGER_OVERWRITE|flags);
}
} else {
- struct btree *b = iter_l(iter)->b;
- struct btree_node_iter node_iter = iter_l(iter)->iter;
- struct bkey_packed *_old;
- struct bkey unpacked;
+ struct btree_iter *copy;
+ struct bkey _old;
EBUG_ON(btree_iter_type(iter) == BTREE_ITER_CACHED);
- bkey_init(&unpacked);
- old = (struct bkey_s_c) { &unpacked, NULL };
+ bkey_init(&_old);
+ old = (struct bkey_s_c) { &_old, NULL };
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
0, new->k.size,
if (ret)
return ret;
- while ((_old = bch2_btree_node_iter_peek(&node_iter, b))) {
- unsigned flags = BTREE_TRIGGER_OVERWRITE;
- unsigned offset = 0;
- s64 sectors;
+ copy = bch2_trans_copy_iter(trans, iter);
- old = bkey_disassemble(b, _old, &unpacked);
- sectors = -((s64) old.k->size);
+ for_each_btree_key_continue(copy, 0, old, ret) {
+ unsigned offset = 0;
+ s64 sectors = -((s64) old.k->size);
flags |= BTREE_TRIGGER_OVERWRITE;
if (bkey_cmp(new->k.p, bkey_start_pos(old.k)) <= 0)
- return 0;
+ break;
switch (bch2_extent_overlap(&new->k, old.k)) {
case BCH_EXTENT_OVERLAP_ALL:
ret = bch2_trans_mark_key(trans, old, bkey_i_to_s_c(new),
offset, sectors, flags);
if (ret)
- return ret;
-
- bch2_btree_node_iter_advance(&node_iter, b);
+ break;
}
+ bch2_trans_iter_put(trans, copy);
}
return ret;
struct bpos *end)
{
struct btree_trans *trans = iter->trans;
- struct btree *b;
- struct btree_node_iter node_iter;
- struct bkey_packed *_k;
- unsigned nr_iters = 0;
+ struct btree_iter *copy;
+ struct bkey_s_c k;
+ unsigned nr_iters = 0;
int ret;
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- return ret;
-
- b = iter->l[0].b;
- node_iter = iter->l[0].iter;
-
- BUG_ON(bkey_cmp(b->data->min_key, POS_MIN) &&
- bkey_cmp(bkey_start_pos(&insert->k),
- bkey_predecessor(b->data->min_key)) < 0);
-
- *end = bpos_min(insert->k.p, b->key.k.p);
+ *end = insert->k.p;
/* extent_update_to_keys(): */
nr_iters += 1;
if (ret < 0)
return ret;
- while ((_k = bch2_btree_node_iter_peek(&node_iter, b))) {
- struct bkey unpacked;
- struct bkey_s_c k = bkey_disassemble(b, _k, &unpacked);
+ copy = bch2_trans_copy_iter(trans, iter);
+
+ for_each_btree_key_continue(copy, 0, k, ret) {
unsigned offset = 0;
if (bkey_cmp(bkey_start_pos(k.k), *end) >= 0)
&nr_iters, EXTENT_ITERS_MAX);
if (ret)
break;
-
- bch2_btree_node_iter_advance(&node_iter, b);
}
+ bch2_trans_iter_put(trans, copy);
return ret < 0 ? ret : 0;
}
bch2_journal_pin_put(j, j->replay_journal_seq++);
}
-static int bch2_extent_replay_key(struct bch_fs *c, enum btree_id btree_id,
- struct bkey_i *k)
-{
- struct btree_trans trans;
- struct btree_iter *iter, *split_iter;
- /*
- * We might cause compressed extents to be split, so we need to pass in
- * a disk_reservation:
- */
- struct disk_reservation disk_res =
- bch2_disk_reservation_init(c, 0);
- struct bkey_i *split;
- struct bpos atomic_end;
- /*
- * Some extents aren't equivalent - w.r.t. what the triggers do
- * - if they're split:
- */
- bool remark_if_split = bch2_bkey_sectors_compressed(bkey_i_to_s_c(k)) ||
- k->k.type == KEY_TYPE_reflink_p;
- bool remark = false;
- int ret;
-
- bch2_trans_init(&trans, c, BTREE_ITER_MAX, 0);
-retry:
- bch2_trans_begin(&trans);
-
- iter = bch2_trans_get_iter(&trans, btree_id,
- bkey_start_pos(&k->k),
- BTREE_ITER_INTENT);
-
- do {
- ret = bch2_btree_iter_traverse(iter);
- if (ret)
- goto err;
-
- atomic_end = bpos_min(k->k.p, iter->l[0].b->key.k.p);
-
- split = bch2_trans_kmalloc(&trans, bkey_bytes(&k->k));
- ret = PTR_ERR_OR_ZERO(split);
- if (ret)
- goto err;
-
- if (!remark &&
- remark_if_split &&
- bkey_cmp(atomic_end, k->k.p) < 0) {
- ret = bch2_disk_reservation_add(c, &disk_res,
- k->k.size *
- bch2_bkey_nr_ptrs_allocated(bkey_i_to_s_c(k)),
- BCH_DISK_RESERVATION_NOFAIL);
- BUG_ON(ret);
-
- remark = true;
- }
-
- bkey_copy(split, k);
- bch2_cut_front(iter->pos, split);
- bch2_cut_back(atomic_end, split);
-
- split_iter = bch2_trans_copy_iter(&trans, iter);
-
- /*
- * It's important that we don't go through the
- * extent_handle_overwrites() and extent_update_to_keys() path
- * here: journal replay is supposed to treat extents like
- * regular keys
- */
- __bch2_btree_iter_set_pos(split_iter, split->k.p, false);
- bch2_trans_update(&trans, split_iter, split,
- BTREE_TRIGGER_NORUN);
- bch2_trans_iter_put(&trans, split_iter);
-
- bch2_btree_iter_set_pos(iter, split->k.p);
-
- if (remark) {
- ret = bch2_trans_mark_key(&trans,
- bkey_s_c_null,
- bkey_i_to_s_c(split),
- 0, split->k.size,
- BTREE_TRIGGER_INSERT);
- if (ret)
- goto err;
- }
- } while (bkey_cmp(iter->pos, k->k.p) < 0);
-
- if (remark) {
- ret = bch2_trans_mark_key(&trans,
- bkey_i_to_s_c(k),
- bkey_s_c_null,
- 0, -((s64) k->k.size),
- BTREE_TRIGGER_OVERWRITE);
- if (ret)
- goto err;
- }
-
- ret = bch2_trans_commit(&trans, &disk_res, NULL,
- BTREE_INSERT_NOFAIL|
- BTREE_INSERT_LAZY_RW|
- BTREE_INSERT_JOURNAL_REPLAY);
-err:
- bch2_trans_iter_put(&trans, iter);
-
- if (ret == -EINTR)
- goto retry;
-
- bch2_disk_reservation_put(c, &disk_res);
-
- return bch2_trans_exit(&trans) ?: ret;
-}
-
static int __bch2_journal_replay_key(struct btree_trans *trans,
enum btree_id id, unsigned level,
struct bkey_i *k)
replay_now_at(j, keys.journal_seq_base + i->journal_seq);
- ret = i->k->k.size
- ? bch2_extent_replay_key(c, i->btree_id, i->k)
- : bch2_journal_replay_key(c, i);
+ ret = bch2_journal_replay_key(c, i);
if (ret)
goto err;
}
mutex_lock(&c->sb_lock);
SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
- c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_new_extent_overwrite;
- c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_extents_above_btree_updates;
- c->disk_sb.sb->features[0] |= 1ULL << BCH_FEATURE_btree_updates_journalled;
+ c->disk_sb.sb->features[0] |= BCH_SB_FEATURES_ALWAYS;
ret = bch2_write_super(c);
mutex_unlock(&c->sb_lock);