From: Kent Overstreet Date: Tue, 15 Feb 2022 05:06:59 +0000 (-0500) Subject: bcachefs: Check for stale dirty pointer before reads X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=eb331fe5a4e801dc11d96ba7fbda0a91c8bd626c;p=linux.git bcachefs: Check for stale dirty pointer before reads Since we retry reads when we discover we read from a pointer that went stale, if a dirty pointer is erroniously stale it would cause us to loop retrying that read forever - unless we check before issuing the read, while the btree is still locked, when we know that a dirty pointer should never be stale. This patch adds that check, along with printing some helpful debug info. Signed-off-by: Kent Overstreet --- diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 5fce958bafc90..9161125aec177 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -1043,8 +1043,6 @@ retry: sectors = min(sectors, k.k->size - offset_into_extent); - bch2_trans_unlock(trans); - if (readpages_iter) readpage_bio_extend(readpages_iter, &rbio->bio, sectors, extent_partial_reads_expensive(k)); diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c index 218934b4e19b3..914e22c5c2478 100644 --- a/fs/bcachefs/io.c +++ b/fs/bcachefs/io.c @@ -2032,6 +2032,33 @@ err: return ret; } +static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, + struct bkey_s_c k, + struct bch_extent_ptr ptr) +{ + struct bch_fs *c = trans->c; + struct bch_dev *ca = bch_dev_bkey_exists(c, ptr.dev); + struct btree_iter iter; + char buf[200]; + int ret; + + bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch2_fs_inconsistent(c, "Attempting to read from stale dirty pointer: %s", buf); + + bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, + POS(ptr.dev, PTR_BUCKET_NR(ca, &ptr)), + BTREE_ITER_CACHED); + + ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); + if (ret) + return; + + bch2_bkey_val_to_text(&PBUF(buf), c, k); + bch_err(c, "%s", buf); + bch_err(c, "memory gen: %u", *bucket_gen(ca, iter.pos.offset)); + bch2_trans_iter_exit(trans, &iter); +} + int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bvec_iter iter, struct bpos read_pos, enum btree_id data_btree, struct bkey_s_c k, @@ -2041,7 +2068,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, struct bch_fs *c = trans->c; struct extent_ptr_decoded pick; struct bch_read_bio *rbio = NULL; - struct bch_dev *ca; + struct bch_dev *ca = NULL; struct promote_op *promote = NULL; bool bounce = false, read_full = false, narrow_crcs = false; struct bpos data_pos = bkey_start_pos(k.k); @@ -2058,7 +2085,7 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, zero_fill_bio_iter(&orig->bio, iter); goto out_read_done; } - +retry_pick: pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); /* hole or reservation - just zero fill: */ @@ -2071,8 +2098,27 @@ int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, goto err; } - if (pick_ret > 0) - ca = bch_dev_bkey_exists(c, pick.ptr.dev); + ca = bch_dev_bkey_exists(c, pick.ptr.dev); + + /* + * Stale dirty pointers are treated as IO errors, but @failed isn't + * allocated unless we're in the retry path - so if we're not in the + * retry path, don't check here, it'll be caught in bch2_read_endio() + * and we'll end up in the retry path: + */ + if ((flags & BCH_READ_IN_RETRY) && + !pick.ptr.cached && + unlikely(ptr_stale(ca, &pick.ptr))) { + read_from_stale_dirty_pointer(trans, k, pick.ptr); + bch2_mark_io_failure(failed, &pick); + goto retry_pick; + } + + /* + * Unlock the iterator while the btree node's lock is still in + * cache, before doing the IO: + */ + bch2_trans_unlock(trans); if (flags & BCH_READ_NODECODE) { /* @@ -2367,12 +2413,6 @@ retry: */ sectors = min(sectors, k.k->size - offset_into_extent); - /* - * Unlock the iterator while the btree node's lock is still in - * cache, before doing the IO: - */ - bch2_trans_unlock(&trans); - bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; swap(bvec_iter.bi_size, bytes); diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 04971bf847bf8..4751d79219cb2 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -752,10 +752,12 @@ static int __bch2_move_data(struct bch_fs *c, BUG(); } - /* unlock before doing IO: */ + /* + * The iterator gets unlocked by __bch2_read_extent - need to + * save a copy of @k elsewhere: + */ bch2_bkey_buf_reassemble(&sk, c, k); k = bkey_i_to_s_c(sk.k); - bch2_trans_unlock(&trans); ret2 = bch2_move_extent(&trans, ctxt, wp, io_opts, btree_id, k, data_cmd, data_opts);