bcachefs: BCH_WATERMARK_interior_updates
authorKent Overstreet <kent.overstreet@linux.dev>
Mon, 1 Apr 2024 23:20:36 +0000 (19:20 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Tue, 2 Apr 2024 01:14:02 +0000 (21:14 -0400)
This adds a new watermark, higher priority than BCH_WATERMARK_reclaim,
for interior btree updates. We've seen a deadlock where journal replay
triggers a ton of btree node merges, and these use up all available open
buckets and then interior updates get stuck.

One cause of this is that we're currently lacking btree node merging on
write buffer btrees - that needs to be fixed as well.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/alloc_foreground.c
fs/bcachefs/alloc_types.h
fs/bcachefs/btree_io.c
fs/bcachefs/btree_trans_commit.c
fs/bcachefs/btree_update_interior.c
fs/bcachefs/buckets.h

index 214b15c84d1f3258d2ba23effacc5ad4e8852783..a1fc30adf9129da53ab8a97586dee34fd04878d9 100644 (file)
@@ -188,8 +188,10 @@ long bch2_bucket_alloc_new_fs(struct bch_dev *ca)
 static inline unsigned open_buckets_reserved(enum bch_watermark watermark)
 {
        switch (watermark) {
-       case BCH_WATERMARK_reclaim:
+       case BCH_WATERMARK_interior_updates:
                return 0;
+       case BCH_WATERMARK_reclaim:
+               return OPEN_BUCKETS_COUNT / 6;
        case BCH_WATERMARK_btree:
        case BCH_WATERMARK_btree_copygc:
                return OPEN_BUCKETS_COUNT / 4;
index b91b7a46105608d089828db3bd65d1cc359475af..c2226e947c41fbcd7e462a7baa49d8726de44ecc 100644 (file)
@@ -22,7 +22,8 @@ struct bucket_alloc_state {
        x(copygc)                       \
        x(btree)                        \
        x(btree_copygc)                 \
-       x(reclaim)
+       x(reclaim)                      \
+       x(interior_updates)
 
 enum bch_watermark {
 #define x(name)        BCH_WATERMARK_##name,
index 9c71e6fb9c41b6f47fd9a178006e5f420f53a83b..f3f27bb85a5ba4466b5ae6403837ce887073d81c 100644 (file)
@@ -1861,7 +1861,7 @@ static void btree_node_write_work(struct work_struct *work)
        } else {
                ret = bch2_trans_do(c, NULL, NULL, 0,
                        bch2_btree_node_update_key_get_iter(trans, b, &wbio->key,
-                                       BCH_WATERMARK_reclaim|
+                                       BCH_WATERMARK_interior_updates|
                                        BCH_TRANS_COMMIT_journal_reclaim|
                                        BCH_TRANS_COMMIT_no_enospc|
                                        BCH_TRANS_COMMIT_no_check_rw,
index 96669fede7d344d1214389acd11e4e06c0eeedaa..aa9da49707404015a558c9c6e9339b733d0c98c3 100644 (file)
@@ -887,6 +887,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
                            int ret, unsigned long trace_ip)
 {
        struct bch_fs *c = trans->c;
+       enum bch_watermark watermark = flags & BCH_WATERMARK_MASK;
 
        switch (ret) {
        case -BCH_ERR_btree_insert_btree_node_full:
@@ -905,7 +906,7 @@ int bch2_trans_commit_error(struct btree_trans *trans, unsigned flags,
                 * flag
                 */
                if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   (flags & BCH_WATERMARK_MASK) != BCH_WATERMARK_reclaim) {
+                   watermark < BCH_WATERMARK_reclaim) {
                        ret = -BCH_ERR_journal_reclaim_would_deadlock;
                        break;
                }
index 29aee215384a68b1e64cd185399774afc6dff4b2..9fd2dd0f468259b97aa1f41d0174cd6186cd5388 100644 (file)
@@ -687,7 +687,7 @@ static void btree_update_nodes_written(struct btree_update *as)
         * which may require allocations as well.
         */
        ret = commit_do(trans, &as->disk_res, &journal_seq,
-                       BCH_WATERMARK_reclaim|
+                       BCH_WATERMARK_interior_updates|
                        BCH_TRANS_COMMIT_no_enospc|
                        BCH_TRANS_COMMIT_no_check_rw|
                        BCH_TRANS_COMMIT_journal_reclaim,
@@ -1121,7 +1121,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                unsigned journal_flags = watermark|JOURNAL_RES_GET_CHECK;
 
                if ((flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark != BCH_WATERMARK_reclaim)
+                   watermark < BCH_WATERMARK_reclaim)
                        journal_flags |= JOURNAL_RES_GET_NONBLOCK;
 
                ret = drop_locks_do(trans,
@@ -1217,7 +1217,7 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
                 */
                if (bch2_err_matches(ret, ENOSPC) &&
                    (flags & BCH_TRANS_COMMIT_journal_reclaim) &&
-                   watermark != BCH_WATERMARK_reclaim) {
+                   watermark < BCH_WATERMARK_reclaim) {
                        ret = -BCH_ERR_journal_reclaim_would_deadlock;
                        goto err;
                }
index 6387e039f7897534e27c207dd3818dc4b6afb3b7..00aaf4bb513974a6b9c0353ea9445f92671c32eb 100644 (file)
@@ -226,6 +226,7 @@ static inline u64 bch2_dev_buckets_reserved(struct bch_dev *ca, enum bch_waterma
                fallthrough;
        case BCH_WATERMARK_btree_copygc:
        case BCH_WATERMARK_reclaim:
+       case BCH_WATERMARK_interior_updates:
                break;
        }