bcachefs: Improve trans_restart_split_race tracepoint
authorKent Overstreet <kent.overstreet@linux.dev>
Thu, 30 Mar 2023 20:04:02 +0000 (16:04 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:58 +0000 (17:09 -0400)
Seeing occasional test failures where we get stuck in a livelock that
involves this event - this will help track it down.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_update_interior.c
fs/bcachefs/trace.c
fs/bcachefs/trace.h

index a58d2a142b67106ff9e7bf0692dee4866bebbe95..6ba0954e648e6ac9161f84fbdc3aa654c780d349 100644 (file)
@@ -1680,7 +1680,7 @@ static int bch2_btree_insert_node(struct btree_update *as, struct btree_trans *t
        BUG_ON(!as || as->b);
        bch2_verify_keylist_sorted(keys);
 
-       if (!(local_clock() & 63))
+       if ((local_clock() & 63) == 63)
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
 
        ret = bch2_btree_node_lock_write(trans, path, &b->c);
@@ -1720,7 +1720,7 @@ split:
         * bch2_btree_path_upgrade() and allocating more nodes:
         */
        if (b->c.level >= as->update_level) {
-               trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_);
+               trace_and_count(c, trans_restart_split_race, trans, _THIS_IP_, b);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_split_race);
        }
 
index 5c1d724cbb554d3f31bed2440cda3a3c3fc0cb9b..33efa6005c6f2b1f0885a1f07f146bfd5de0a0a4 100644 (file)
@@ -2,8 +2,10 @@
 #include "bcachefs.h"
 #include "alloc_types.h"
 #include "buckets.h"
+#include "btree_cache.h"
 #include "btree_iter.h"
 #include "btree_locking.h"
+#include "btree_update_interior.h"
 #include "keylist.h"
 #include "opts.h"
 #include "six.h"
index 7e48e7676980f590a4500b32257425615990a7b9..65521c046254d239283fc77b5ab71535fa4349ef 100644 (file)
@@ -831,10 +831,35 @@ DEFINE_EVENT(transaction_event,   trans_restart_injected,
        TP_ARGS(trans, caller_ip)
 );
 
-DEFINE_EVENT(transaction_event,        trans_restart_split_race,
+TRACE_EVENT(trans_restart_split_race,
        TP_PROTO(struct btree_trans *trans,
-                unsigned long caller_ip),
-       TP_ARGS(trans, caller_ip)
+                unsigned long caller_ip,
+                struct btree *b),
+       TP_ARGS(trans, caller_ip, b),
+
+       TP_STRUCT__entry(
+               __array(char,                   trans_fn, 32    )
+               __field(unsigned long,          caller_ip       )
+               __field(u8,                     level           )
+               __field(u16,                    written         )
+               __field(u16,                    blocks          )
+               __field(u16,                    u64s_remaining  )
+       ),
+
+       TP_fast_assign(
+               strscpy(__entry->trans_fn, trans->fn, sizeof(__entry->trans_fn));
+               __entry->caller_ip              = caller_ip;
+               __entry->level          = b->c.level;
+               __entry->written        = b->written;
+               __entry->blocks         = btree_blocks(trans->c);
+               __entry->u64s_remaining = bch_btree_keys_u64s_remaining(trans->c, b);
+       ),
+
+       TP_printk("%s %pS l=%u written %u/%u u64s remaining %u",
+                 __entry->trans_fn, (void *) __entry->caller_ip,
+                 __entry->level,
+                 __entry->written, __entry->blocks,
+                 __entry->u64s_remaining)
 );
 
 DEFINE_EVENT(transaction_event,        trans_blocked_journal_reclaim,