bcachefs: added lock held time stats
authorDaniel Hill <daniel@gluo.nz>
Thu, 14 Jul 2022 08:33:09 +0000 (20:33 +1200)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:35 +0000 (17:09 -0400)
We now record the length of time btree locks are held and expose this in debugfs.

Enabled via CONFIG_BCACHEFS_LOCK_TIME_STATS.

Signed-off-by: Daniel Hill <daniel@gluo.nz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/Kconfig
fs/bcachefs/bcachefs.h
fs/bcachefs/btree_iter.c
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_locking.h
fs/bcachefs/btree_types.h
fs/bcachefs/debug.c

index d2eb65e9032b14e891925e591b3d78a5273c14e5..7ae85900e5b4b00d9740c407791bda9a9a79e775 100644 (file)
@@ -59,6 +59,12 @@ config BCACHEFS_TESTS
        help
        Include some unit and performance tests for the core btree code
 
+config BCACHEFS_LOCK_TIME_STATS
+       bool "bcachefs lock time statistics"
+       depends on BCACHEFS_FS
+       help
+       Expose statistics for how long we held a lock in debugfs
+
 config BCACHEFS_NO_LATENCY_ACCT
        bool "disable latency accounting and time stats"
        depends on BCACHEFS_FS
index 7020eee5de21862d39fd5a26bd7cfd6288b26372..9cd6f840b71a7b862b6f4d58de517e837f84bf13 100644 (file)
@@ -319,6 +319,8 @@ BCH_DEBUG_PARAMS_DEBUG()
 #undef BCH_DEBUG_PARAM
 #endif
 
+#define BCH_LOCK_TIME_NR 128
+
 #define BCH_TIME_STATS()                       \
        x(btree_node_mem_alloc)                 \
        x(btree_node_split)                     \
@@ -527,6 +529,11 @@ struct btree_debug {
        unsigned                id;
 };
 
+struct lock_held_stats {
+       struct bch2_time_stats  times[BCH_LOCK_TIME_NR];
+       const char              *names[BCH_LOCK_TIME_NR];
+};
+
 struct bch_fs_pcpu {
        u64                     sectors_available;
 };
@@ -920,6 +927,8 @@ mempool_t           bio_bounce_pages;
        bool                    promote_whole_extents;
 
        struct bch2_time_stats  times[BCH_TIME_STAT_NR];
+
+       struct lock_held_stats  lock_held_stats;
 };
 
 static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
index d708bf32d40858fe6e16a8cab4f990e279399460..30958cbb95322a7f030b0905761ff15704d2efd8 100644 (file)
@@ -177,7 +177,7 @@ bool __bch2_btree_node_relock(struct btree_trans *trans,
        if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
            (btree_node_lock_seq_matches(path, b, level) &&
             btree_node_lock_increment(trans, b, level, want))) {
-               mark_btree_node_locked(path, level, want);
+               mark_btree_node_locked(trans, path, level, want);
                return true;
        }
 fail:
@@ -230,7 +230,7 @@ bool bch2_btree_node_upgrade(struct btree_trans *trans,
 
        return false;
 success:
-       mark_btree_node_intent_locked(path, level);
+       mark_btree_node_intent_locked(trans, path, level);
        return true;
 }
 
@@ -1161,7 +1161,7 @@ void bch2_trans_node_add(struct btree_trans *trans, struct btree *b)
                            t != BTREE_NODE_UNLOCKED) {
                                btree_node_unlock(trans, path, b->c.level);
                                six_lock_increment(&b->c.lock, (enum six_lock_type) t);
-                               mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
+                               mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
                        }
 
                        btree_path_level_init(trans, path, b);
@@ -1238,7 +1238,7 @@ static inline int btree_path_lock_root(struct btree_trans *trans,
                        for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
                                path->l[i].b = NULL;
 
-                       mark_btree_node_locked(path, path->level, lock_type);
+                       mark_btree_node_locked(trans, path, path->level, lock_type);
                        btree_path_level_init(trans, path, b);
                        return 0;
                }
@@ -1402,7 +1402,7 @@ static __always_inline int btree_path_down(struct btree_trans *trans,
        if (unlikely(ret))
                goto err;
 
-       mark_btree_node_locked(path, level, lock_type);
+       mark_btree_node_locked(trans, path, level, lock_type);
        btree_path_level_init(trans, path, b);
 
        if (likely(!trans->journal_replay_not_finished &&
@@ -3273,6 +3273,15 @@ void __bch2_trans_init(struct btree_trans *trans, struct bch_fs *c,
        trans->journal_replay_not_finished =
                !test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
 
+       while (c->lock_held_stats.names[trans->lock_name_idx] != fn
+              && c->lock_held_stats.names[trans->lock_name_idx] != 0)
+               trans->lock_name_idx++;
+
+       if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
+               pr_warn_once("lock_times array not big enough!");
+       else
+               c->lock_held_stats.names[trans->lock_name_idx] = fn;
+
        bch2_trans_alloc_paths(trans, c);
 
        if (expected_mem_bytes) {
index baf1f25b91cad37cc9ec4accc0d7d93558a0ccdd..e5a29240bbcc7dfbdaeb06e067d57c0baa3be29d 100644 (file)
@@ -382,7 +382,7 @@ retry:
                if (!ck)
                        goto retry;
 
-               mark_btree_node_locked(path, 0, SIX_LOCK_intent);
+               mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
                path->locks_want = 1;
        } else {
                enum six_lock_type lock_want = __btree_lock_want(path, 0);
@@ -403,7 +403,7 @@ retry:
                        goto retry;
                }
 
-               mark_btree_node_locked(path, 0, lock_want);
+               mark_btree_node_locked(trans, path, 0, lock_want);
        }
 
        path->l[0].lock_seq     = ck->c.lock.state.seq;
index 230f376993ae2f108d5288162a8adef926bd923d..b8708466c4e3bce260d9b19bdf7f59fdfb52487d 100644 (file)
@@ -57,7 +57,8 @@ static inline void mark_btree_node_unlocked(struct btree_path *path,
        path->nodes_intent_locked &= ~(1 << level);
 }
 
-static inline void mark_btree_node_locked(struct btree_path *path,
+static inline void mark_btree_node_locked_noreset(struct btree_trans *trans,
+                                         struct btree_path *path,
                                          unsigned level,
                                          enum six_lock_type type)
 {
@@ -69,10 +70,22 @@ static inline void mark_btree_node_locked(struct btree_path *path,
        path->nodes_intent_locked |= type << level;
 }
 
-static inline void mark_btree_node_intent_locked(struct btree_path *path,
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+                                         struct btree_path *path,
+                                         unsigned level,
+                                         enum six_lock_type type)
+{
+       mark_btree_node_locked_noreset(trans, path, level, type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+       path->l[level].lock_taken_time = ktime_get_ns();
+#endif
+}
+
+static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
+                                                struct btree_path *path,
                                                 unsigned level)
 {
-       mark_btree_node_locked(path, level, SIX_LOCK_intent);
+       mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent);
 }
 
 static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
@@ -101,8 +114,18 @@ static inline void btree_node_unlock(struct btree_trans *trans,
 
        EBUG_ON(level >= BTREE_MAX_DEPTH);
 
-       if (lock_type != BTREE_NODE_UNLOCKED)
+       if (lock_type != BTREE_NODE_UNLOCKED) {
                six_unlock_type(&path->l[level].b->c.lock, lock_type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
+                       struct bch_fs *c = trans->c;
+
+                       __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
+                                              path->l[level].lock_taken_time,
+                                                ktime_get_ns());
+               }
+#endif
+       }
        mark_btree_node_unlocked(path, level);
 }
 
@@ -196,10 +219,17 @@ static inline bool btree_node_lock(struct btree_trans *trans,
        EBUG_ON(level >= BTREE_MAX_DEPTH);
        EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
 
-       return likely(six_trylock_type(&b->c.lock, type)) ||
+       if (likely(six_trylock_type(&b->c.lock, type)) ||
                btree_node_lock_increment(trans, b, level, type) ||
                __bch2_btree_node_lock(trans, path, b, pos, level, type,
-                                      should_sleep_fn, p, ip);
+                                      should_sleep_fn, p, ip)) {
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               path->l[b->c.level].lock_taken_time = ktime_get_ns();
+#endif
+               return true;
+       } else {
+               return false;
+       }
 }
 
 bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
@@ -252,5 +282,3 @@ static inline void bch2_btree_node_lock_write(struct btree_trans *trans,
 }
 
 #endif /* _BCACHEFS_BTREE_LOCKING_H */
-
-
index b184ec512499e51ae8bc1c4c7a4fdbcab710b322..8cf3ef74902037a8851a042b1b3420325148419f 100644 (file)
@@ -251,6 +251,9 @@ struct btree_path {
                struct btree    *b;
                struct btree_node_iter iter;
                u32             lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+               u64             lock_taken_time;
+#endif
        }                       l[BTREE_MAX_DEPTH];
 #ifdef CONFIG_BCACHEFS_DEBUG
        unsigned long           ip_allocated;
@@ -436,6 +439,7 @@ struct btree_trans {
        unsigned                journal_u64s;
        unsigned                journal_preres_u64s;
        struct replicas_delta_list *fs_usage_deltas;
+       int                      lock_name_idx;
 };
 
 #define BTREE_FLAGS()                                                  \
index 0f25b75e3de7474efccfb8f537b0d403bf6c7572..45f5229f20ebf6553eefea7372119ca5dc7335a1 100644 (file)
@@ -638,6 +638,75 @@ static const struct file_operations journal_pins_ops = {
        .read           = bch2_journal_pins_read,
 };
 
+static int lock_held_stats_open(struct inode *inode, struct file *file)
+{
+       struct bch_fs *c = inode->i_private;
+       struct dump_iter *i;
+
+       i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+       if (!i)
+               return -ENOMEM;
+
+       i->iter = 0;
+       i->c    = c;
+       i->buf  = PRINTBUF;
+       file->private_data = i;
+
+       return 0;
+}
+
+static int lock_held_stats_release(struct inode *inode, struct file *file)
+{
+       struct dump_iter *i = file->private_data;
+
+       printbuf_exit(&i->buf);
+       kfree(i);
+
+       return 0;
+}
+
+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
+                                     size_t size, loff_t *ppos)
+{
+       struct dump_iter        *i = file->private_data;
+       struct lock_held_stats *lhs = &i->c->lock_held_stats;
+       int err;
+
+       i->ubuf = buf;
+       i->size = size;
+       i->ret  = 0;
+
+       while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+               err = flush_buf(i);
+               if (err)
+                       return err;
+
+               if (!i->size)
+                       break;
+
+               prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+               prt_newline(&i->buf);
+               printbuf_indent_add(&i->buf, 8);
+               bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
+               printbuf_indent_sub(&i->buf, 8);
+               prt_newline(&i->buf);
+               i->iter++;
+       }
+
+       if (i->buf.allocation_failure)
+               return -ENOMEM;
+
+       return i->ret;
+}
+
+static const struct file_operations lock_held_stats_op = {
+       .owner = THIS_MODULE,
+       .open = lock_held_stats_open,
+       .release = lock_held_stats_release,
+       .read = lock_held_stats_read,
+};
+
 void bch2_fs_debug_exit(struct bch_fs *c)
 {
        if (!IS_ERR_OR_NULL(c->fs_debug_dir))
@@ -668,6 +737,11 @@ void bch2_fs_debug_init(struct bch_fs *c)
        debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
                            c->btree_debug, &journal_pins_ops);
 
+       if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+               debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
+                               c, &lock_held_stats_op);
+       }
+
        c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
        if (IS_ERR_OR_NULL(c->btree_debug_dir))
                return;