bcachefs: Improve trace_trans_restart_would_deadlock
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 26 May 2023 20:59:07 +0000 (16:59 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Jan 2024 16:47:39 +0000 (11:47 -0500)
In the CI, we're seeing tests failing due to excessive would_deadlock
transaction restarts - the tracepoint now includes the lock cycle that
occured.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_locking.c
fs/bcachefs/trace.h

index f83fab9e62fc1bdf9f959761ffaea904b992476b..b304c7fc58b1a1d0d1073855ee9a87fb4a50587f 100644 (file)
@@ -3054,6 +3054,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
        struct btree_path *path;
        struct btree_bkey_cached_common *b;
        static char lock_types[] = { 'r', 'i', 'w' };
+       struct task_struct *task = READ_ONCE(trans->locking_wait.task);
        unsigned l, idx;
 
        if (!out->nr_tabstops) {
@@ -3061,7 +3062,7 @@ void bch2_btree_trans_to_text(struct printbuf *out, struct btree_trans *trans)
                printbuf_tabstop_push(out, 32);
        }
 
-       prt_printf(out, "%i %s\n", trans->locking_wait.task->pid, trans->fn);
+       prt_printf(out, "%i %s\n", task ? task->pid : 0, trans->fn);
 
        trans_for_each_path_safe(trans, path, idx) {
                if (!path->nodes_locked)
index 6039278121dc3966ba2a2f3237c4b06042f2fefd..1eca320e7574ef251c2f62a5463fb93da7530f7c 100644 (file)
@@ -142,10 +142,28 @@ static bool lock_graph_remove_non_waiters(struct lock_graph *g)
        return false;
 }
 
+static void trace_would_deadlock(struct lock_graph *g, struct btree_trans *trans,
+                                unsigned long ip)
+{
+       struct bch_fs *c = trans->c;
+
+       count_event(c, trans_restart_would_deadlock);
+
+       if (trace_trans_restart_would_deadlock_enabled()) {
+               struct printbuf buf = PRINTBUF;
+
+               buf.atomic++;
+               print_cycle(&buf, g);
+
+               trace_trans_restart_would_deadlock(trans, ip, buf.buf);
+               printbuf_exit(&buf);
+       }
+}
+
 static int abort_lock(struct lock_graph *g, struct trans_waiting_for_lock *i)
 {
        if (i == g->g) {
-               trace_and_count(i->trans->c, trans_restart_would_deadlock, i->trans, _RET_IP_);
+               trace_would_deadlock(g, i->trans, _RET_IP_);
                return btree_trans_restart(i->trans, BCH_ERR_transaction_restart_would_deadlock);
        } else {
                i->trans->lock_must_abort = true;
@@ -266,15 +284,16 @@ int bch2_check_for_deadlock(struct btree_trans *trans, struct printbuf *cycle)
        unsigned path_idx;
        int ret;
 
+       g.nr = 0;
+
        if (trans->lock_must_abort) {
                if (cycle)
                        return -1;
 
-               trace_and_count(trans->c, trans_restart_would_deadlock, trans, _RET_IP_);
+               trace_would_deadlock(&g, trans, _RET_IP_);
                return btree_trans_restart(trans, BCH_ERR_transaction_restart_would_deadlock);
        }
 
-       g.nr = 0;
        lock_graph_down(&g, trans);
 next:
        if (!g.nr)
index e0c8db352bff673c570b42fda0c99e25a3b74d11..6e2ad6f3db980d7d116c2800a2bdc37acc172e24 100644 (file)
@@ -1205,10 +1205,11 @@ DEFINE_EVENT(transaction_restart_iter,  trans_restart_memory_allocation_failure,
        TP_ARGS(trans, caller_ip, path)
 );
 
-DEFINE_EVENT(transaction_event,        trans_restart_would_deadlock,
+DEFINE_EVENT(trans_str, trans_restart_would_deadlock,
        TP_PROTO(struct btree_trans *trans,
-                unsigned long caller_ip),
-       TP_ARGS(trans, caller_ip)
+                unsigned long caller_ip,
+                const char *cycle),
+       TP_ARGS(trans, caller_ip, cycle)
 );
 
 DEFINE_EVENT(transaction_event,        trans_restart_would_deadlock_recursion_limit,