#include "btree_iter.h"
#include "btree_locking.h"
#include "debug.h"
+#include "error.h"
#include "trace.h"
#include <linux/prefetch.h>
return ERR_PTR(-EIO);
}
- EBUG_ON(b->c.btree_id != iter->btree_id ||
- BTREE_NODE_LEVEL(b->data) != level ||
- bkey_cmp(b->data->max_key, k->k.p));
+ EBUG_ON(b->c.btree_id != iter->btree_id);
+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+ EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
+ EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ bkey_cmp(b->data->min_key,
+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
return b;
}
struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
const struct bkey_i *k,
enum btree_id btree_id,
- unsigned level)
+ unsigned level,
+ bool nofill)
{
struct btree_cache *bc = &c->btree_cache;
struct btree *b;
retry:
b = btree_cache_find(bc, k);
if (unlikely(!b)) {
+ if (nofill)
+ return NULL;
+
b = bch2_btree_node_fill(c, NULL, k, btree_id,
level, SIX_LOCK_read, true);
return ERR_PTR(-EIO);
}
- EBUG_ON(b->c.btree_id != btree_id ||
- BTREE_NODE_LEVEL(b->data) != level ||
- bkey_cmp(b->data->max_key, k->k.p));
+ EBUG_ON(b->c.btree_id != btree_id);
+ EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+ EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
+ EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+ bkey_cmp(b->data->min_key,
+ bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
return b;
}
if (sib != btree_prev_sib)
swap(n1, n2);
- BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
- n2->data->min_key));
+ if (bkey_cmp(bkey_successor(n1->key.k.p),
+ n2->data->min_key)) {
+ char buf1[200], buf2[200];
+
+ bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
+ bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
+
+ bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
+ "prev: %s\n"
+ "next: %s\n",
+ bch2_btree_ids[iter->btree_id], level,
+ buf1, buf2);
+
+ six_unlock_intent(&ret->c.lock);
+ ret = NULL;
+ }
}
bch2_btree_trans_verify_locks(trans);
__gc_pos_set(c, new_pos);
}
+/*
+ * Missing: if an interior btree node is empty, we need to do something -
+ * perhaps just kill it
+ */
static int bch2_gc_check_topology(struct bch_fs *c,
struct btree *b,
struct bkey_buf *prev,
? node_start
: bkey_successor(prev->k->k.p);
char buf1[200], buf2[200];
+ bool update_min = false;
+ bool update_max = false;
int ret = 0;
if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
- "btree node with incorrect min_key:\n prev %s\n cur %s",
+ "btree node with incorrect min_key at btree %s level %u:\n"
+ " prev %s\n"
+ " cur %s",
+ bch2_btree_ids[b->c.btree_id], b->c.level,
buf1,
- (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2))) {
- BUG();
- }
+ (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)))
+ update_min = true;
}
if (fsck_err_on(is_last &&
bkey_cmp(cur.k->k.p, node_end), c,
- "btree node with incorrect max_key:\n %s\n expected %s",
+ "btree node with incorrect max_key at btree %s level %u:\n"
+ " %s\n"
+ " expected %s",
+ bch2_btree_ids[b->c.btree_id], b->c.level,
(bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1),
- (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2))) {
- BUG();
- }
+ (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)))
+ update_max = true;
bch2_bkey_buf_copy(prev, c, cur.k);
+
+ if (update_min || update_max) {
+ struct bkey_i *new;
+ struct bkey_i_btree_ptr_v2 *bp = NULL;
+ struct btree *n;
+
+ if (update_max) {
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur.k->k.p);
+ if (ret)
+ return ret;
+ }
+
+ new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
+
+ bkey_copy(new, cur.k);
+
+ if (new->k.type == KEY_TYPE_btree_ptr_v2)
+ bp = bkey_i_to_btree_ptr_v2(new);
+
+ if (update_min)
+ bp->v.min_key = expected_start;
+ if (update_max)
+ new->k.p = node_end;
+ if (bp)
+ SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true);
+
+ ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new);
+ if (ret) {
+ kfree(new);
+ return ret;
+ }
+
+ n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id,
+ b->c.level - 1, true);
+ if (n) {
+ mutex_lock(&c->btree_cache.lock);
+ bch2_btree_node_hash_remove(&c->btree_cache, n);
+
+ bkey_copy(&n->key, new);
+ if (update_min)
+ n->data->min_key = expected_start;
+ if (update_max)
+ n->data->max_key = node_end;
+
+ ret = __bch2_btree_node_hash_insert(&c->btree_cache, n);
+ BUG_ON(ret);
+ mutex_unlock(&c->btree_cache.lock);
+ six_unlock_read(&n->c.lock);
+ }
+ }
fsck_err:
return ret;
}
ptr->dev, PTR_BUCKET_NR(ca, ptr),
bch2_data_types[ptr_data_type(k.k, ptr)],
ptr->gen, g->mark.gen)) {
+ /* XXX if it's a cached ptr, drop it */
g2->_mark.gen = g->_mark.gen = ptr->gen;
g2->gen_valid = g->gen_valid = true;
g2->_mark.data_type = 0;
g2->_mark.dirty_sectors = 0;
g2->_mark.cached_sectors = 0;
- set_bit(BCH_FS_FIXED_GENS, &c->flags);
+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
}
}
break;
if (b->c.level) {
- struct btree *child;
-
bch2_bkey_buf_reassemble(&cur, c, k);
k = bkey_i_to_s_c(cur.k);
!bch2_btree_and_journal_iter_peek(&iter).k);
if (ret)
break;
+ } else {
+ bch2_btree_and_journal_iter_advance(&iter);
+ }
+ }
- if (b->c.level > target_depth) {
- child = bch2_btree_node_get_noiter(c, cur.k,
- b->c.btree_id, b->c.level - 1);
- ret = PTR_ERR_OR_ZERO(child);
- if (ret)
- break;
+ if (b->c.level > target_depth) {
+ bch2_btree_and_journal_iter_exit(&iter);
+ bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
- ret = bch2_gc_btree_init_recurse(c, child,
- target_depth);
- six_unlock_read(&child->c.lock);
+ while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+ struct btree *child;
+
+ bch2_bkey_buf_reassemble(&cur, c, k);
+ bch2_btree_and_journal_iter_advance(&iter);
+ child = bch2_btree_node_get_noiter(c, cur.k,
+ b->c.btree_id, b->c.level - 1,
+ false);
+ ret = PTR_ERR_OR_ZERO(child);
+
+ if (fsck_err_on(ret == -EIO, c,
+ "unreadable btree node")) {
+ ret = bch2_journal_key_delete(c, b->c.btree_id,
+ b->c.level, cur.k->k.p);
if (ret)
- break;
+ return ret;
+
+ set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+ continue;
}
- } else {
- bch2_btree_and_journal_iter_advance(&iter);
+
+ if (ret)
+ break;
+
+ ret = bch2_gc_btree_init_recurse(c, child,
+ target_depth);
+ six_unlock_read(&child->c.lock);
+
+ if (ret)
+ break;
}
}
-
+fsck_err:
bch2_bkey_buf_exit(&cur, c);
bch2_bkey_buf_exit(&prev, c);
bch2_btree_and_journal_iter_exit(&iter);
bch2_mark_allocator_buckets(c);
c->gc_count++;
-out:
- if (!ret &&
- (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
- (!iter && bch2_test_restart_gc))) {
+
+ if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
+ (!iter && bch2_test_restart_gc)) {
/*
* XXX: make sure gens we fixed got saved
*/
if (iter++ <= 2) {
- bch_info(c, "Fixed gens, restarting mark and sweep:");
- clear_bit(BCH_FS_FIXED_GENS, &c->flags);
+ bch_info(c, "Second GC pass needed, restarting:");
+ clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
__gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
percpu_down_write(&c->mark_lock);
bch_info(c, "Unable to fix bucket gens, looping");
ret = -EINVAL;
}
-
+out:
if (!ret) {
bch2_journal_block(&c->journal);