From 3d86f13df67b554a7b27e28a4b144425710409bf Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 30 Mar 2023 16:04:02 -0400 Subject: [PATCH] bcachefs: Improve trans_restart_split_race tracepoint Seeing occasional test failures where we get stuck in a livelock that involves this event - this will help track it down. Signed-off-by: Kent Overstreet --- fs/bcachefs/btree_update_interior.c | 4 ++-- fs/bcachefs/trace.c | 2 ++ fs/bcachefs/trace.h | 31 ++++++++++++++++++++++++++--- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index a58d2a142b671..6ba0954e648e6 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -1680,7 +1680,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t BUG_ON(!as || as->b); bch2_verify_keylist_sorted(keys); - if (!(local_clock() & 63)) + if ((local_clock() & 63) == 63) return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); ret = bch2_btree_node_lock_write(trans, path, &b->c); @@ -1720,7 +1720,7 @@ split: * bch2_btree_path_upgrade() and allocating more nodes: */ if (b->c.level >= as->update_level) { - trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_); + trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b); return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race); } diff --git a/fs/bcachefs/trace.c b/fs/bcachefs/trace.c index 5c1d724cbb554..33efa6005c6f2 100644 --- a/fs/bcachefs/trace.c +++ b/fs/bcachefs/trace.c @@ -2,8 +2,10 @@ #include "bcachefs.h" #include "alloc_types.h" #include "buckets.h" +#include "btree_cache.h" #include "btree_iter.h" #include "btree_locking.h" +#include "btree_update_interior.h" #include "keylist.h" #include "opts.h" #include "six.h" diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h index 7e48e7676980f..65521c046254d 100644 --- a/fs/bcachefs/trace.h +++ b/fs/bcachefs/trace.h @@ -831,10 +831,35 @@ DEFINE_EVENT(transaction_event, trans_restart_injected, TP_ARGS(trans, caller_ip) ); -DEFINE_EVENT(transaction_event, trans_restart_split_race, +TRACE_EVENT(trans_restart_split_race, TP_PROTO(struct btree_trans *trans, - unsigned long caller_ip), - TP_ARGS(trans, caller_ip) + unsigned long caller_ip, + struct btree *b), + TP_ARGS(trans, caller_ip, b), + + TP_STRUCT__entry( + __array(char, trans_fn, 32 ) + __field(unsigned long, caller_ip ) + __field(u8, level ) + __field(u16, written ) + __field(u16, blocks ) + __field(u16, u64s_remaining ) + ), + + TP_fast_assign( + strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn)); + __entry->caller_ip = caller_ip; + __entry->level = b->c.level; + __entry->written = b->written; + __entry->blocks = btree_blocks(trans->c); + __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b); + ), + + TP_printk("%s %pS l=%u written %u/%u u64s remaining %u", + __entry->trans_fn, (void *) __entry->caller_ip, + __entry->level, + __entry->written, __entry->blocks, + __entry->u64s_remaining) ); DEFINE_EVENT(transaction_event, trans_blocked_journal_reclaim, -- 2.30.2