#include "btree_iter.h"
 #include "btree_locking.h"
 #include "debug.h"
+#include "error.h"
 #include "trace.h"
 
 #include <linux/prefetch.h>
                return ERR_PTR(-EIO);
        }
 
-       EBUG_ON(b->c.btree_id != iter->btree_id ||
-               BTREE_NODE_LEVEL(b->data) != level ||
-               bkey_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->c.btree_id != iter->btree_id);
+       EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+       EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+               bkey_cmp(b->data->min_key,
+                        bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
 
        return b;
 }
 struct btree *bch2_btree_node_get_noiter(struct bch_fs *c,
                                         const struct bkey_i *k,
                                         enum btree_id btree_id,
-                                        unsigned level)
+                                        unsigned level,
+                                        bool nofill)
 {
        struct btree_cache *bc = &c->btree_cache;
        struct btree *b;
 retry:
        b = btree_cache_find(bc, k);
        if (unlikely(!b)) {
+               if (nofill)
+                       return NULL;
+
                b = bch2_btree_node_fill(c, NULL, k, btree_id,
                                         level, SIX_LOCK_read, true);
 
                return ERR_PTR(-EIO);
        }
 
-       EBUG_ON(b->c.btree_id != btree_id ||
-               BTREE_NODE_LEVEL(b->data) != level ||
-               bkey_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->c.btree_id != btree_id);
+       EBUG_ON(BTREE_NODE_LEVEL(b->data) != level);
+       EBUG_ON(bkey_cmp(b->data->max_key, k->k.p));
+       EBUG_ON(b->key.k.type == KEY_TYPE_btree_ptr_v2 &&
+               bkey_cmp(b->data->min_key,
+                        bkey_i_to_btree_ptr_v2(&b->key)->v.min_key));
 
        return b;
 }
                if (sib != btree_prev_sib)
                        swap(n1, n2);
 
-               BUG_ON(bkey_cmp(bkey_successor(n1->key.k.p),
-                               n2->data->min_key));
+               if (bkey_cmp(bkey_successor(n1->key.k.p),
+                            n2->data->min_key)) {
+                       char buf1[200], buf2[200];
+
+                       bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(&n1->key));
+                       bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(&n2->key));
+
+                       bch2_fs_inconsistent(c, "btree topology error at btree %s level %u:\n"
+                                            "prev: %s\n"
+                                            "next: %s\n",
+                                            bch2_btree_ids[iter->btree_id], level,
+                                            buf1, buf2);
+
+                       six_unlock_intent(&ret->c.lock);
+                       ret = NULL;
+               }
        }
 
        bch2_btree_trans_verify_locks(trans);
 
        __gc_pos_set(c, new_pos);
 }
 
+/*
+ * Missing: if an interior btree node is empty, we need to do something -
+ * perhaps just kill it
+ */
 static int bch2_gc_check_topology(struct bch_fs *c,
                                  struct btree *b,
                                  struct bkey_buf *prev,
                ? node_start
                : bkey_successor(prev->k->k.p);
        char buf1[200], buf2[200];
+       bool update_min = false;
+       bool update_max = false;
        int ret = 0;
 
        if (cur.k->k.type == KEY_TYPE_btree_ptr_v2) {
                        bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(prev->k));
 
                if (fsck_err_on(bkey_cmp(expected_start, bp->v.min_key), c,
-                               "btree node with incorrect min_key:\n  prev %s\n  cur %s",
+                               "btree node with incorrect min_key at btree %s level %u:\n"
+                               "  prev %s\n"
+                               "  cur %s",
+                               bch2_btree_ids[b->c.btree_id], b->c.level,
                                buf1,
-                               (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2))) {
-                       BUG();
-               }
+                               (bch2_bkey_val_to_text(&PBUF(buf2), c, bkey_i_to_s_c(cur.k)), buf2)))
+                       update_min = true;
        }
 
        if (fsck_err_on(is_last &&
                        bkey_cmp(cur.k->k.p, node_end), c,
-                       "btree node with incorrect max_key:\n  %s\n  expected %s",
+                       "btree node with incorrect max_key at btree %s level %u:\n"
+                       "  %s\n"
+                       "  expected %s",
+                       bch2_btree_ids[b->c.btree_id], b->c.level,
                        (bch2_bkey_val_to_text(&PBUF(buf1), c, bkey_i_to_s_c(cur.k)), buf1),
-                       (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2))) {
-               BUG();
-       }
+                       (bch2_bpos_to_text(&PBUF(buf2), node_end), buf2)))
+               update_max = true;
 
        bch2_bkey_buf_copy(prev, c, cur.k);
+
+       if (update_min || update_max) {
+               struct bkey_i *new;
+               struct bkey_i_btree_ptr_v2 *bp = NULL;
+               struct btree *n;
+
+               if (update_max) {
+                       ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                     b->c.level, cur.k->k.p);
+                       if (ret)
+                               return ret;
+               }
+
+               new = kmalloc(bkey_bytes(&cur.k->k), GFP_KERNEL);
+               if (!new)
+                       return -ENOMEM;
+
+               bkey_copy(new, cur.k);
+
+               if (new->k.type == KEY_TYPE_btree_ptr_v2)
+                       bp = bkey_i_to_btree_ptr_v2(new);
+
+               if (update_min)
+                       bp->v.min_key = expected_start;
+               if (update_max)
+                       new->k.p = node_end;
+               if (bp)
+                       SET_BTREE_PTR_RANGE_UPDATED(&bp->v, true);
+
+               ret = bch2_journal_key_insert(c, b->c.btree_id, b->c.level, new);
+               if (ret) {
+                       kfree(new);
+                       return ret;
+               }
+
+               n = bch2_btree_node_get_noiter(c, cur.k, b->c.btree_id,
+                                              b->c.level - 1, true);
+               if (n) {
+                       mutex_lock(&c->btree_cache.lock);
+                       bch2_btree_node_hash_remove(&c->btree_cache, n);
+
+                       bkey_copy(&n->key, new);
+                       if (update_min)
+                               n->data->min_key = expected_start;
+                       if (update_max)
+                               n->data->max_key = node_end;
+
+                       ret = __bch2_btree_node_hash_insert(&c->btree_cache, n);
+                       BUG_ON(ret);
+                       mutex_unlock(&c->btree_cache.lock);
+                       six_unlock_read(&n->c.lock);
+               }
+       }
 fsck_err:
        return ret;
 }
                                        ptr->dev, PTR_BUCKET_NR(ca, ptr),
                                        bch2_data_types[ptr_data_type(k.k, ptr)],
                                        ptr->gen, g->mark.gen)) {
+                               /* XXX if it's a cached ptr, drop it */
                                g2->_mark.gen   = g->_mark.gen          = ptr->gen;
                                g2->gen_valid   = g->gen_valid          = true;
                                g2->_mark.data_type             = 0;
                                g2->_mark.dirty_sectors         = 0;
                                g2->_mark.cached_sectors        = 0;
-                               set_bit(BCH_FS_FIXED_GENS, &c->flags);
+                               set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
                                set_bit(BCH_FS_NEED_ALLOC_WRITE, &c->flags);
                        }
                }
                        break;
 
                if (b->c.level) {
-                       struct btree *child;
-
                        bch2_bkey_buf_reassemble(&cur, c, k);
                        k = bkey_i_to_s_c(cur.k);
 
                                        !bch2_btree_and_journal_iter_peek(&iter).k);
                        if (ret)
                                break;
+               } else {
+                       bch2_btree_and_journal_iter_advance(&iter);
+               }
+       }
 
-                       if (b->c.level > target_depth) {
-                               child = bch2_btree_node_get_noiter(c, cur.k,
-                                                       b->c.btree_id, b->c.level - 1);
-                               ret = PTR_ERR_OR_ZERO(child);
-                               if (ret)
-                                       break;
+       if (b->c.level > target_depth) {
+               bch2_btree_and_journal_iter_exit(&iter);
+               bch2_btree_and_journal_iter_init_node_iter(&iter, c, b);
 
-                               ret = bch2_gc_btree_init_recurse(c, child,
-                                               target_depth);
-                               six_unlock_read(&child->c.lock);
+               while ((k = bch2_btree_and_journal_iter_peek(&iter)).k) {
+                       struct btree *child;
+
+                       bch2_bkey_buf_reassemble(&cur, c, k);
+                       bch2_btree_and_journal_iter_advance(&iter);
 
+                       child = bch2_btree_node_get_noiter(c, cur.k,
+                                               b->c.btree_id, b->c.level - 1,
+                                               false);
+                       ret = PTR_ERR_OR_ZERO(child);
+
+                       if (fsck_err_on(ret == -EIO, c,
+                                       "unreadable btree node")) {
+                               ret = bch2_journal_key_delete(c, b->c.btree_id,
+                                                             b->c.level, cur.k->k.p);
                                if (ret)
-                                       break;
+                                       return ret;
+
+                               set_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
+                               continue;
                        }
-               } else {
-                       bch2_btree_and_journal_iter_advance(&iter);
+
+                       if (ret)
+                               break;
+
+                       ret = bch2_gc_btree_init_recurse(c, child,
+                                                        target_depth);
+                       six_unlock_read(&child->c.lock);
+
+                       if (ret)
+                               break;
                }
        }
-
+fsck_err:
        bch2_bkey_buf_exit(&cur, c);
        bch2_bkey_buf_exit(&prev, c);
        bch2_btree_and_journal_iter_exit(&iter);
        bch2_mark_allocator_buckets(c);
 
        c->gc_count++;
-out:
-       if (!ret &&
-           (test_bit(BCH_FS_FIXED_GENS, &c->flags) ||
-            (!iter && bch2_test_restart_gc))) {
+
+       if (test_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags) ||
+           (!iter && bch2_test_restart_gc)) {
                /*
                 * XXX: make sure gens we fixed got saved
                 */
                if (iter++ <= 2) {
-                       bch_info(c, "Fixed gens, restarting mark and sweep:");
-                       clear_bit(BCH_FS_FIXED_GENS, &c->flags);
+                       bch_info(c, "Second GC pass needed, restarting:");
+                       clear_bit(BCH_FS_NEED_ANOTHER_GC, &c->flags);
                        __gc_pos_set(c, gc_phase(GC_PHASE_NOT_RUNNING));
 
                        percpu_down_write(&c->mark_lock);
                bch_info(c, "Unable to fix bucket gens, looping");
                ret = -EINVAL;
        }
-
+out:
        if (!ret) {
                bch2_journal_block(&c->journal);