bcachefs: Btree key cache instrumentation
authorKent Overstreet <kent.overstreet@linux.dev>
Sat, 20 Apr 2024 18:49:22 +0000 (14:49 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Wed, 8 May 2024 21:29:20 +0000 (17:29 -0400)
It turns out the btree key cache shrinker wasn't actually reclaiming
anything, prior to the previous patch. This adds instrumentation so that
if we have further issues we can see what's going on.

Specifically, sysfs internal/btree_key_cache is greatly expanded with
new counters, and the SRCU sequence numbers of the first 10 entries on
each pending freelist, and we also add trigger_btree_key_cache_shrink
for testing without having to prune all the system caches.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/btree_key_cache.c
fs/bcachefs/btree_key_cache_types.h
fs/bcachefs/sysfs.c

index 6645264a481bdd060ec941439d86b827e12a966b..203fbb38e9d476a0a6a43a77d1deaaef71feb9be 100644 (file)
@@ -822,6 +822,8 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
        int srcu_idx;
 
        mutex_lock(&bc->lock);
+       bc->requested_to_free += sc->nr_to_scan;
+
        srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
        flags = memalloc_nofs_save();
 
@@ -840,6 +842,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                atomic_long_dec(&bc->nr_freed);
                freed++;
                bc->nr_freed_nonpcpu--;
+               bc->freed++;
        }
 
        list_for_each_entry_safe(ck, t, &bc->freed_pcpu, list) {
@@ -853,6 +856,7 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                atomic_long_dec(&bc->nr_freed);
                freed++;
                bc->nr_freed_pcpu--;
+               bc->freed++;
        }
 
        rcu_read_lock();
@@ -871,13 +875,18 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
                        ck = container_of(pos, struct bkey_cached, hash);
 
                        if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
+                               bc->skipped_dirty++;
                                goto next;
                        } else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
                                clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
+                               bc->skipped_accessed++;
                                goto next;
                        } else if (bkey_cached_lock_for_evict(ck)) {
                                bkey_cached_evict(bc, ck);
                                bkey_cached_free(bc, ck);
+                               bc->moved_to_freelist++;
+                       } else {
+                               bc->skipped_lock_fail++;
                        }
 
                        scanned++;
@@ -1024,11 +1033,47 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
        return 0;
 }
 
-void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *c)
+void bch2_btree_key_cache_to_text(struct printbuf *out, struct btree_key_cache *bc)
 {
-       prt_printf(out, "nr_freed:\t%lu\n",     atomic_long_read(&c->nr_freed));
-       prt_printf(out, "nr_keys:\t%lu\n",      atomic_long_read(&c->nr_keys));
-       prt_printf(out, "nr_dirty:\t%lu\n",     atomic_long_read(&c->nr_dirty));
+       struct bch_fs *c = container_of(bc, struct bch_fs, btree_key_cache);
+
+       printbuf_tabstop_push(out, 24);
+       printbuf_tabstop_push(out, 12);
+
+       unsigned flags = memalloc_nofs_save();
+       mutex_lock(&bc->lock);
+       prt_printf(out, "keys:\t%lu\r\n",               atomic_long_read(&bc->nr_keys));
+       prt_printf(out, "dirty:\t%lu\r\n",              atomic_long_read(&bc->nr_dirty));
+       prt_printf(out, "freelist:\t%lu\r\n",           atomic_long_read(&bc->nr_freed));
+       prt_printf(out, "nonpcpu freelist:\t%lu\r\n",   bc->nr_freed_nonpcpu);
+       prt_printf(out, "pcpu freelist:\t%lu\r\n",      bc->nr_freed_pcpu);
+
+       prt_printf(out, "\nshrinker:\n");
+       prt_printf(out, "requested_to_free:\t%lu\r\n",  bc->requested_to_free);
+       prt_printf(out, "freed:\t%lu\r\n",              bc->freed);
+       prt_printf(out, "moved_to_freelist:\t%lu\r\n",  bc->moved_to_freelist);
+       prt_printf(out, "skipped_dirty:\t%lu\r\n",      bc->skipped_dirty);
+       prt_printf(out, "skipped_accessed:\t%lu\r\n",   bc->skipped_accessed);
+       prt_printf(out, "skipped_lock_fail:\t%lu\r\n",  bc->skipped_lock_fail);
+
+       prt_printf(out, "srcu seq:\t%lu\r\n",           get_state_synchronize_srcu(&c->btree_trans_barrier));
+
+       struct bkey_cached *ck;
+       unsigned iter = 0;
+       list_for_each_entry(ck, &bc->freed_nonpcpu, list) {
+               prt_printf(out, "freed_nonpcpu:\t%lu\r\n", ck->btree_trans_barrier_seq);
+               if (++iter > 10)
+                       break;
+       }
+
+       iter = 0;
+       list_for_each_entry(ck, &bc->freed_pcpu, list) {
+               prt_printf(out, "freed_pcpu:\t%lu\r\n", ck->btree_trans_barrier_seq);
+               if (++iter > 10)
+                       break;
+       }
+       mutex_unlock(&bc->lock);
+       memalloc_flags_restore(flags);
 }
 
 void bch2_btree_key_cache_exit(void)
index 290e4e57df5bbcfeffb38d666aa18c89a7c1c5a6..237e8bb3ac407f1273ee781069b945fade71f0cb 100644 (file)
@@ -24,6 +24,14 @@ struct btree_key_cache {
        atomic_long_t           nr_freed;
        atomic_long_t           nr_keys;
        atomic_long_t           nr_dirty;
+
+       /* shrinker stats */
+       unsigned long           requested_to_free;
+       unsigned long           freed;
+       unsigned long           moved_to_freelist;
+       unsigned long           skipped_dirty;
+       unsigned long           skipped_accessed;
+       unsigned long           skipped_lock_fail;
 };
 
 struct bkey_cached_key {
index 43edda74d3cbcf9df765a910385f0508f30827f5..4627b0ba179e7a917110d852be53d2d6056504b7 100644 (file)
@@ -140,8 +140,8 @@ write_attribute(trigger_gc);
 write_attribute(trigger_discards);
 write_attribute(trigger_invalidates);
 write_attribute(trigger_journal_flush);
-write_attribute(prune_cache);
-write_attribute(btree_wakeup);
+write_attribute(trigger_btree_cache_shrink);
+write_attribute(trigger_btree_key_cache_shrink);
 rw_attribute(gc_gens_pos);
 
 read_attribute(uuid);
@@ -346,21 +346,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
        prt_printf(out, "\n");
 }
 
-static void bch2_btree_wakeup_all(struct bch_fs *c)
-{
-       struct btree_trans *trans;
-
-       seqmutex_lock(&c->btree_trans_lock);
-       list_for_each_entry(trans, &c->btree_trans_list, list) {
-               struct btree_bkey_cached_common *b = READ_ONCE(trans->locking);
-
-               if (b)
-                       six_lock_wakeup_all(&b->lock);
-
-       }
-       seqmutex_unlock(&c->btree_trans_lock);
-}
-
 static void fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
 {
        unsigned nr[BCH_DATA_NR];
@@ -513,7 +498,7 @@ STORE(bch2_fs)
        if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_sysfs))
                return -EROFS;
 
-       if (attr == &sysfs_prune_cache) {
+       if (attr == &sysfs_trigger_btree_cache_shrink) {
                struct shrink_control sc;
 
                sc.gfp_mask = GFP_KERNEL;
@@ -521,8 +506,13 @@ STORE(bch2_fs)
                c->btree_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
        }
 
-       if (attr == &sysfs_btree_wakeup)
-               bch2_btree_wakeup_all(c);
+       if (attr == &sysfs_trigger_btree_key_cache_shrink) {
+               struct shrink_control sc;
+
+               sc.gfp_mask = GFP_KERNEL;
+               sc.nr_to_scan = strtoul_or_return(buf);
+               c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
+       }
 
        if (attr == &sysfs_trigger_gc)
                bch2_gc_gens(c);
@@ -656,8 +646,8 @@ struct attribute *bch2_fs_internal_files[] = {
        &sysfs_trigger_discards,
        &sysfs_trigger_invalidates,
        &sysfs_trigger_journal_flush,
-       &sysfs_prune_cache,
-       &sysfs_btree_wakeup,
+       &sysfs_trigger_btree_cache_shrink,
+       &sysfs_trigger_btree_key_cache_shrink,
 
        &sysfs_gc_gens_pos,