We now record the length of time btree locks are held and expose this in debugfs.
Enabled via CONFIG_BCACHEFS_LOCK_TIME_STATS.
Signed-off-by: Daniel Hill <daniel@gluo.nz>
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
help
Include some unit and performance tests for the core btree code
+config BCACHEFS_LOCK_TIME_STATS
+ bool "bcachefs lock time statistics"
+ depends on BCACHEFS_FS
+ help
+ Expose statistics for how long we held a lock in debugfs
+
config BCACHEFS_NO_LATENCY_ACCT
bool "disable latency accounting and time stats"
depends on BCACHEFS_FS
#undef BCH_DEBUG_PARAM
#endif
+#define BCH_LOCK_TIME_NR 128
+
#define BCH_TIME_STATS() \
x(btree_node_mem_alloc) \
x(btree_node_split) \
unsigned id;
};
+struct lock_held_stats {
+ struct bch2_time_stats times[BCH_LOCK_TIME_NR];
+ const char *names[BCH_LOCK_TIME_NR];
+};
+
struct bch_fs_pcpu {
u64 sectors_available;
};
bool promote_whole_extents;
struct bch2_time_stats times[BCH_TIME_STAT_NR];
+
+ struct lock_held_stats lock_held_stats;
};
static inline void bch2_set_ra_pages(struct bch_fs *c, unsigned ra_pages)
if (six_relock_type(&b->c.lock, want, path->l[level].lock_seq) ||
(btree_node_lock_seq_matches(path, b, level) &&
btree_node_lock_increment(trans, b, level, want))) {
- mark_btree_node_locked(path, level, want);
+ mark_btree_node_locked(trans, path, level, want);
return true;
}
fail:
return false;
success:
- mark_btree_node_intent_locked(path, level);
+ mark_btree_node_intent_locked(trans, path, level);
return true;
}
t != BTREE_NODE_UNLOCKED) {
btree_node_unlock(trans, path, b->c.level);
six_lock_increment(&b->c.lock, (enum six_lock_type) t);
- mark_btree_node_locked(path, b->c.level, (enum six_lock_type) t);
+ mark_btree_node_locked(trans, path, b->c.level, (enum six_lock_type) t);
}
btree_path_level_init(trans, path, b);
for (i = path->level + 1; i < BTREE_MAX_DEPTH; i++)
path->l[i].b = NULL;
- mark_btree_node_locked(path, path->level, lock_type);
+ mark_btree_node_locked(trans, path, path->level, lock_type);
btree_path_level_init(trans, path, b);
return 0;
}
if (unlikely(ret))
goto err;
- mark_btree_node_locked(path, level, lock_type);
+ mark_btree_node_locked(trans, path, level, lock_type);
btree_path_level_init(trans, path, b);
if (likely(!trans->journal_replay_not_finished &&
trans->journal_replay_not_finished =
!test_bit(JOURNAL_REPLAY_DONE, &c->journal.flags);
+ while (c->lock_held_stats.names[trans->lock_name_idx] != fn
+ && c->lock_held_stats.names[trans->lock_name_idx] != 0)
+ trans->lock_name_idx++;
+
+ if (trans->lock_name_idx >= BCH_LOCK_TIME_NR)
+ pr_warn_once("lock_times array not big enough!");
+ else
+ c->lock_held_stats.names[trans->lock_name_idx] = fn;
+
bch2_trans_alloc_paths(trans, c);
if (expected_mem_bytes) {
if (!ck)
goto retry;
- mark_btree_node_locked(path, 0, SIX_LOCK_intent);
+ mark_btree_node_locked(trans, path, 0, SIX_LOCK_intent);
path->locks_want = 1;
} else {
enum six_lock_type lock_want = __btree_lock_want(path, 0);
goto retry;
}
- mark_btree_node_locked(path, 0, lock_want);
+ mark_btree_node_locked(trans, path, 0, lock_want);
}
path->l[0].lock_seq = ck->c.lock.state.seq;
path->nodes_intent_locked &= ~(1 << level);
}
-static inline void mark_btree_node_locked(struct btree_path *path,
+static inline void mark_btree_node_locked_noreset(struct btree_trans *trans,
+ struct btree_path *path,
unsigned level,
enum six_lock_type type)
{
path->nodes_intent_locked |= type << level;
}
-static inline void mark_btree_node_intent_locked(struct btree_path *path,
+static inline void mark_btree_node_locked(struct btree_trans *trans,
+ struct btree_path *path,
+ unsigned level,
+ enum six_lock_type type)
+{
+ mark_btree_node_locked_noreset(trans, path, level, type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[level].lock_taken_time = ktime_get_ns();
+#endif
+}
+
+static inline void mark_btree_node_intent_locked(struct btree_trans *trans,
+ struct btree_path *path,
unsigned level)
{
- mark_btree_node_locked(path, level, SIX_LOCK_intent);
+ mark_btree_node_locked_noreset(trans, path, level, SIX_LOCK_intent);
}
static inline enum six_lock_type __btree_lock_want(struct btree_path *path, int level)
EBUG_ON(level >= BTREE_MAX_DEPTH);
- if (lock_type != BTREE_NODE_UNLOCKED)
+ if (lock_type != BTREE_NODE_UNLOCKED) {
six_unlock_type(&path->l[level].b->c.lock, lock_type);
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ if (trans->lock_name_idx < BCH_LOCK_TIME_NR) {
+ struct bch_fs *c = trans->c;
+
+ __bch2_time_stats_update(&c->lock_held_stats.times[trans->lock_name_idx],
+ path->l[level].lock_taken_time,
+ ktime_get_ns());
+ }
+#endif
+ }
mark_btree_node_unlocked(path, level);
}
EBUG_ON(level >= BTREE_MAX_DEPTH);
EBUG_ON(!(trans->paths_allocated & (1ULL << path->idx)));
- return likely(six_trylock_type(&b->c.lock, type)) ||
+ if (likely(six_trylock_type(&b->c.lock, type)) ||
btree_node_lock_increment(trans, b, level, type) ||
__bch2_btree_node_lock(trans, path, b, pos, level, type,
- should_sleep_fn, p, ip);
+ should_sleep_fn, p, ip)) {
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ path->l[b->c.level].lock_taken_time = ktime_get_ns();
+#endif
+ return true;
+ } else {
+ return false;
+ }
}
bool __bch2_btree_node_relock(struct btree_trans *, struct btree_path *, unsigned);
}
#endif /* _BCACHEFS_BTREE_LOCKING_H */
-
-
struct btree *b;
struct btree_node_iter iter;
u32 lock_seq;
+#ifdef CONFIG_BCACHEFS_LOCK_TIME_STATS
+ u64 lock_taken_time;
+#endif
} l[BTREE_MAX_DEPTH];
#ifdef CONFIG_BCACHEFS_DEBUG
unsigned long ip_allocated;
unsigned journal_u64s;
unsigned journal_preres_u64s;
struct replicas_delta_list *fs_usage_deltas;
+ int lock_name_idx;
};
#define BTREE_FLAGS() \
.read = bch2_journal_pins_read,
};
+static int lock_held_stats_open(struct inode *inode, struct file *file)
+{
+ struct bch_fs *c = inode->i_private;
+ struct dump_iter *i;
+
+ i = kzalloc(sizeof(struct dump_iter), GFP_KERNEL);
+
+ if (!i)
+ return -ENOMEM;
+
+ i->iter = 0;
+ i->c = c;
+ i->buf = PRINTBUF;
+ file->private_data = i;
+
+ return 0;
+}
+
+static int lock_held_stats_release(struct inode *inode, struct file *file)
+{
+ struct dump_iter *i = file->private_data;
+
+ printbuf_exit(&i->buf);
+ kfree(i);
+
+ return 0;
+}
+
+static ssize_t lock_held_stats_read(struct file *file, char __user *buf,
+ size_t size, loff_t *ppos)
+{
+ struct dump_iter *i = file->private_data;
+ struct lock_held_stats *lhs = &i->c->lock_held_stats;
+ int err;
+
+ i->ubuf = buf;
+ i->size = size;
+ i->ret = 0;
+
+ while (lhs->names[i->iter] != 0 && i->iter < BCH_LOCK_TIME_NR) {
+ err = flush_buf(i);
+ if (err)
+ return err;
+
+ if (!i->size)
+ break;
+
+ prt_printf(&i->buf, "%s:", lhs->names[i->iter]);
+ prt_newline(&i->buf);
+ printbuf_indent_add(&i->buf, 8);
+ bch2_time_stats_to_text(&i->buf, &lhs->times[i->iter]);
+ printbuf_indent_sub(&i->buf, 8);
+ prt_newline(&i->buf);
+ i->iter++;
+ }
+
+ if (i->buf.allocation_failure)
+ return -ENOMEM;
+
+ return i->ret;
+}
+
+static const struct file_operations lock_held_stats_op = {
+ .owner = THIS_MODULE,
+ .open = lock_held_stats_open,
+ .release = lock_held_stats_release,
+ .read = lock_held_stats_read,
+};
+
void bch2_fs_debug_exit(struct bch_fs *c)
{
if (!IS_ERR_OR_NULL(c->fs_debug_dir))
debugfs_create_file("journal_pins", 0400, c->fs_debug_dir,
c->btree_debug, &journal_pins_ops);
+ if (IS_ENABLED(CONFIG_BCACHEFS_LOCK_TIME_STATS)) {
+ debugfs_create_file("lock_held_stats", 0400, c->fs_debug_dir,
+ c, &lock_held_stats_op);
+ }
+
c->btree_debug_dir = debugfs_create_dir("btrees", c->fs_debug_dir);
if (IS_ERR_OR_NULL(c->btree_debug_dir))
return;