bcachefs: bch2_print_allocator_stuck()
authorKent Overstreet <kent.overstreet@linux.dev>
Fri, 3 May 2024 18:49:23 +0000 (14:49 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Wed, 8 May 2024 21:29:22 +0000 (17:29 -0400)
If we block on the allocator for more than 10 seconds, print out some
useful debugging info.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_foreground.h
fs/bcachefs/io_write.c
fs/bcachefs/journal.c
fs/bcachefs/sysfs.c

index 438e00c8316bd39288cad71143d141d1f2b5dde3..13460aab3cae66bdd51f674bbecc7614e31622b5 100644 (file)
@@ -1630,3 +1630,104 @@ void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
        prt_str(out, "Btree write point\n");
        bch2_write_point_to_text(out, c, &c->btree_write_point);
 }
+
+void bch2_fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       unsigned nr[BCH_DATA_NR];
+
+       memset(nr, 0, sizeof(nr));
+
+       for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+               nr[c->open_buckets[i].data_type]++;
+
+       printbuf_tabstop_push(out, 24);
+
+       percpu_down_read(&c->mark_lock);
+       prt_printf(out, "hidden\t%llu\n",                       bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
+       prt_printf(out, "btree\t%llu\n",                        bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
+       prt_printf(out, "data\t%llu\n",                         bch2_fs_usage_read_one(c, &c->usage_base->b.data));
+       prt_printf(out, "cached\t%llu\n",                       bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
+       prt_printf(out, "reserved\t%llu\n",                     bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
+       prt_printf(out, "online_reserved\t%llu\n",              percpu_u64_get(c->online_reserved));
+       prt_printf(out, "nr_inodes\t%llu\n",                    bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
+       percpu_up_read(&c->mark_lock);
+
+       prt_newline(out);
+       prt_printf(out, "freelist_wait\t%s\n",                  c->freelist_wait.list.first ? "waiting" : "empty");
+       prt_printf(out, "open buckets allocated\t%i\n",         OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
+       prt_printf(out, "open buckets total\t%u\n",             OPEN_BUCKETS_COUNT);
+       prt_printf(out, "open_buckets_wait\t%s\n",              c->open_buckets_wait.list.first ? "waiting" : "empty");
+       prt_printf(out, "open_buckets_btree\t%u\n",             nr[BCH_DATA_btree]);
+       prt_printf(out, "open_buckets_user\t%u\n",              nr[BCH_DATA_user]);
+       prt_printf(out, "btree reserve cache\t%u\n",            c->btree_reserve_cache_nr);
+}
+
+void bch2_dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
+{
+       struct bch_fs *c = ca->fs;
+       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
+       unsigned nr[BCH_DATA_NR];
+
+       memset(nr, 0, sizeof(nr));
+
+       for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
+               nr[c->open_buckets[i].data_type]++;
+
+       printbuf_tabstop_push(out, 12);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 16);
+       printbuf_tabstop_push(out, 16);
+
+       bch2_dev_usage_to_text(out, &stats);
+
+       prt_newline(out);
+
+       prt_printf(out, "reserves:\n");
+       for (unsigned i = 0; i < BCH_WATERMARK_NR; i++)
+               prt_printf(out, "%s\t%llu\r\n", bch2_watermarks[i], bch2_dev_buckets_reserved(ca, i));
+
+       prt_newline(out);
+
+       printbuf_tabstops_reset(out);
+       printbuf_tabstop_push(out, 12);
+       printbuf_tabstop_push(out, 16);
+
+       prt_printf(out, "open buckets\t%i\r\n", ca->nr_open_buckets);
+       prt_printf(out, "buckets to invalidate\t%llu\r\n",      should_invalidate_buckets(ca, stats));
+}
+
+void bch2_print_allocator_stuck(struct bch_fs *c)
+{
+       struct printbuf buf = PRINTBUF;
+
+       prt_printf(&buf, "Allocator stuck? Waited for 10 seconds\n");
+
+       prt_printf(&buf, "Allocator debug:\n");
+       printbuf_indent_add(&buf, 2);
+       bch2_fs_alloc_debug_to_text(&buf, c);
+       printbuf_indent_sub(&buf, 2);
+       prt_newline(&buf);
+
+       for_each_online_member(c, ca) {
+               prt_printf(&buf, "Dev %u:\n", ca->dev_idx);
+               printbuf_indent_add(&buf, 2);
+               bch2_dev_alloc_debug_to_text(&buf, ca);
+               printbuf_indent_sub(&buf, 2);
+               prt_newline(&buf);
+       }
+
+       prt_printf(&buf, "Copygc debug:\n");
+       printbuf_indent_add(&buf, 2);
+       bch2_copygc_wait_to_text(&buf, c);
+       printbuf_indent_sub(&buf, 2);
+       prt_newline(&buf);
+
+       prt_printf(&buf, "Journal debug:\n");
+       printbuf_indent_add(&buf, 2);
+       bch2_journal_debug_to_text(&buf, &c->journal);
+       printbuf_indent_sub(&buf, 2);
+
+       bch2_print_string_as_lines(KERN_ERR, buf.buf);
+       printbuf_exit(&buf);
+}
index f0339eb7e30e90493172aacafbe85b4a5d79634f..96f24b67d0663c0fa14722a10cc14734efb8de65 100644 (file)
@@ -222,4 +222,9 @@ void bch2_open_buckets_partial_to_text(struct printbuf *, struct bch_fs *);
 
 void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
 
+void bch2_fs_alloc_debug_to_text(struct printbuf *, struct bch_fs *);
+void bch2_dev_alloc_debug_to_text(struct printbuf *, struct bch_dev *);
+
+void bch2_print_allocator_stuck(struct bch_fs *);
+
 #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
index e96dad0dd60d4a8f2f72d7272fc17ff78c51ac5f..55e24c83fb19788829a85bafc10021b4007263b9 100644 (file)
@@ -1494,7 +1494,11 @@ err:
        if ((op->flags & BCH_WRITE_SYNC) ||
            (!(op->flags & BCH_WRITE_DONE) &&
             !(op->flags & BCH_WRITE_IN_WORKER))) {
-               closure_sync(&op->cl);
+               if (closure_sync_timeout(&op->cl, HZ * 10)) {
+                       bch2_print_allocator_stuck(c);
+                       closure_sync(&op->cl);
+               }
+
                __bch2_write_index(op);
 
                if (!(op->flags & BCH_WRITE_DONE))
index c984cdc2795d38be07a9e0ac4a52401bb1df9848..adec8e1ea73eab37b144f4aaa461773ceb123e1d 100644 (file)
@@ -1416,7 +1416,7 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
        u64 nr_writes = j->nr_flush_writes + j->nr_noflush_writes;
 
        if (!out->nr_tabstops)
-               printbuf_tabstop_push(out, 24);
+               printbuf_tabstop_push(out, 28);
        out->atomic++;
 
        rcu_read_lock();
index 4627b0ba179e7a917110d852be53d2d6056504b7..df3d28659bfd4a216267f95286eb8fa82c36c930 100644 (file)
@@ -346,37 +346,6 @@ static void bch2_gc_gens_pos_to_text(struct printbuf *out, struct bch_fs *c)
        prt_printf(out, "\n");
 }
 
-static void fs_alloc_debug_to_text(struct printbuf *out, struct bch_fs *c)
-{
-       unsigned nr[BCH_DATA_NR];
-
-       memset(nr, 0, sizeof(nr));
-
-       for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-               nr[c->open_buckets[i].data_type]++;
-
-       printbuf_tabstop_push(out, 24);
-
-       percpu_down_read(&c->mark_lock);
-       prt_printf(out, "hidden\t%llu\n",               bch2_fs_usage_read_one(c, &c->usage_base->b.hidden));
-       prt_printf(out, "btree\t%llu\n",                bch2_fs_usage_read_one(c, &c->usage_base->b.btree));
-       prt_printf(out, "data\t%llu\n",                 bch2_fs_usage_read_one(c, &c->usage_base->b.data));
-       prt_printf(out, "cached\t%llu\n",               bch2_fs_usage_read_one(c, &c->usage_base->b.cached));
-       prt_printf(out, "reserved\t%llu\n",             bch2_fs_usage_read_one(c, &c->usage_base->b.reserved));
-       prt_printf(out, "online_reserved\t%llu\n",      percpu_u64_get(c->online_reserved));
-       prt_printf(out, "nr_inodes\t%llu\n",            bch2_fs_usage_read_one(c, &c->usage_base->b.nr_inodes));
-       percpu_up_read(&c->mark_lock);
-
-       prt_newline(out);
-       prt_printf(out, "freelist_wait\t%s\n",                  c->freelist_wait.list.first ? "waiting" : "empty");
-       prt_printf(out, "open buckets allocated\t%i\n",         OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
-       prt_printf(out, "open buckets total\t%u\n",             OPEN_BUCKETS_COUNT);
-       prt_printf(out, "open_buckets_wait\t%s\n",              c->open_buckets_wait.list.first ? "waiting" : "empty");
-       prt_printf(out, "open_buckets_btree\t%u\n",             nr[BCH_DATA_btree]);
-       prt_printf(out, "open_buckets_user\t%u\n",              nr[BCH_DATA_user]);
-       prt_printf(out, "btree reserve cache\t%u\n",            c->btree_reserve_cache_nr);
-}
-
 SHOW(bch2_fs)
 {
        struct bch_fs *c = container_of(kobj, struct bch_fs, kobj);
@@ -458,7 +427,7 @@ SHOW(bch2_fs)
                bch2_disk_groups_to_text(out, c);
 
        if (attr == &sysfs_alloc_debug)
-               fs_alloc_debug_to_text(out, c);
+               bch2_fs_alloc_debug_to_text(out, c);
 
        return 0;
 }
@@ -779,47 +748,6 @@ struct attribute *bch2_fs_time_stats_files[] = {
        NULL
 };
 
-static void dev_alloc_debug_to_text(struct printbuf *out, struct bch_dev *ca)
-{
-       struct bch_fs *c = ca->fs;
-       struct bch_dev_usage stats = bch2_dev_usage_read(ca);
-       unsigned nr[BCH_DATA_NR];
-
-       memset(nr, 0, sizeof(nr));
-
-       for (unsigned i = 0; i < ARRAY_SIZE(c->open_buckets); i++)
-               nr[c->open_buckets[i].data_type]++;
-
-       printbuf_tabstop_push(out, 8);
-       printbuf_tabstop_push(out, 16);
-       printbuf_tabstop_push(out, 16);
-       printbuf_tabstop_push(out, 16);
-       printbuf_tabstop_push(out, 16);
-
-       bch2_dev_usage_to_text(out, &stats);
-
-       prt_newline(out);
-
-       prt_printf(out, "reserves:\n");
-       for (unsigned i = 0; i < BCH_WATERMARK_NR; i++)
-               prt_printf(out, "%s\t%llu\r\n", bch2_watermarks[i], bch2_dev_buckets_reserved(ca, i));
-
-       prt_newline(out);
-
-       printbuf_tabstops_reset(out);
-       printbuf_tabstop_push(out, 24);
-
-       prt_printf(out, "freelist_wait\t%s\n",                  c->freelist_wait.list.first ? "waiting" : "empty");
-       prt_printf(out, "open buckets allocated\t%i\n",         OPEN_BUCKETS_COUNT - c->open_buckets_nr_free);
-       prt_printf(out, "open buckets this dev\t%i\n",          ca->nr_open_buckets);
-       prt_printf(out, "open buckets total\t%u\n",             OPEN_BUCKETS_COUNT);
-       prt_printf(out, "open_buckets_wait\t%s\n",              c->open_buckets_wait.list.first ? "waiting" : "empty");
-       prt_printf(out, "open_buckets_btree\t%u\n",             nr[BCH_DATA_btree]);
-       prt_printf(out, "open_buckets_user\t%u\n",              nr[BCH_DATA_user]);
-       prt_printf(out, "buckets_to_invalidate\t%llu\n",        should_invalidate_buckets(ca, stats));
-       prt_printf(out, "btree reserve cache\t%u\n",            c->btree_reserve_cache_nr);
-}
-
 static const char * const bch2_rw[] = {
        "read",
        "write",
@@ -889,7 +817,7 @@ SHOW(bch2_dev)
                     * 100 / CONGESTED_MAX);
 
        if (attr == &sysfs_alloc_debug)
-               dev_alloc_debug_to_text(out, ca);
+               bch2_dev_alloc_debug_to_text(out, ca);
 
        return 0;
 }