bcachefs: Run btree updates after write out of write_point

author Kent Overstreet <kent.overstreet@linux.dev>

Mon, 31 Oct 2022 20:13:05 +0000 (16:13 -0400)

committer Kent Overstreet <kent.overstreet@linux.dev>

Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
author Kent Overstreet <kent.overstreet@linux.dev>
Mon, 31 Oct 2022 20:13:05 +0000 (16:13 -0400)
committer Kent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
diff --git a/fs/bcachefs/alloc_foreground.c b/fs/bcachefs/alloc_foreground.c

index 76a4b8029bdfeae00c0e536bf6892f00d117dfb2..c4b4689fdd0f128346416a874c5857e7b335fbad 100644 (file)
--- a/fs/bcachefs/alloc_foreground.c
+++ b/fs/bcachefs/alloc_foreground.c
@@ -762,16 +762,17 @@ out:
  /*
   * Get us an open_bucket we can allocate from, return with it locked:
   */
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *c,
-                               unsigned target,
-                               unsigned erasure_code,
-                               struct write_point_specifier write_point,
-                               struct bch_devs_list *devs_have,
-                               unsigned nr_replicas,
-                               unsigned nr_replicas_required,
-                               enum alloc_reserve reserve,
-                               unsigned flags,
-                               struct closure *cl)
+int bch2_alloc_sectors_start(struct bch_fs *c,
+                            unsigned target,
+                            unsigned erasure_code,
+                            struct write_point_specifier write_point,
+                            struct bch_devs_list *devs_have,
+                            unsigned nr_replicas,
+                            unsigned nr_replicas_required,
+                            enum alloc_reserve reserve,
+                            unsigned flags,
+                            struct closure *cl,
+                            struct write_point **wp_ret)
  {
         struct write_point *wp;
         struct open_bucket *ob;
@@ -792,7 +793,7 @@ retry:
         write_points_nr = c->write_points_nr;
         have_cache      = false;
  
-       wp = writepoint_find(c, write_point.v);
+       *wp_ret = wp = writepoint_find(c, write_point.v);
  
         if (wp->data_type == BCH_DATA_user)
                 ob_flags |= BUCKET_MAY_ALLOC_PARTIAL;
@@ -848,7 +849,7 @@ alloc_done:
  
         BUG_ON(!wp->sectors_free || wp->sectors_free == UINT_MAX);
  
-       return wp;
+       return 0;
  err:
         open_bucket_for_each(c, &wp->ptrs, ob, i)
                 if (ptrs.nr < ARRAY_SIZE(ptrs.v))
@@ -866,9 +867,9 @@ err:
         switch (ret) {
         case -OPEN_BUCKETS_EMPTY:
         case -FREELIST_EMPTY:
-               return cl ? ERR_PTR(-EAGAIN) : ERR_PTR(-ENOSPC);
+               return cl ? -EAGAIN : -ENOSPC;
         case -INSUFFICIENT_DEVICES:
-               return ERR_PTR(-EROFS);
+               return -EROFS;
         default:
                 BUG();
         }
@@ -895,13 +896,13 @@ struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *c, struct open_bucket *ob)
  void bch2_alloc_sectors_append_ptrs(struct bch_fs *c, struct write_point *wp,
                                     struct bkey_i *k, unsigned sectors,
                                     bool cached)
-
  {
         struct open_bucket *ob;
         unsigned i;
  
         BUG_ON(sectors > wp->sectors_free);
-       wp->sectors_free -= sectors;
+       wp->sectors_free        -= sectors;
+       wp->sectors_allocated   += sectors;
  
         open_bucket_for_each(c, &wp->ptrs, ob, i) {
                 struct bch_dev *ca = bch_dev_bkey_exists(c, ob->dev);
@@ -942,6 +943,10 @@ static inline void writepoint_init(struct write_point *wp,
  {
         mutex_init(&wp->lock);
         wp->data_type = type;
+
+       INIT_WORK(&wp->index_update_work, bch2_write_point_do_index_updates);
+       INIT_LIST_HEAD(&wp->writes);
+       spin_lock_init(&wp->writes_lock);
  }
  
  void bch2_fs_allocator_foreground_init(struct bch_fs *c)
@@ -997,3 +1002,33 @@ void bch2_open_buckets_to_text(struct printbuf *out, struct bch_fs *c)
         }
  
  }
+
+static const char * const bch2_write_point_states[] = {
+#define x(n)   #n,
+       WRITE_POINT_STATES()
+#undef x
+       NULL
+};
+
+void bch2_write_points_to_text(struct printbuf *out, struct bch_fs *c)
+{
+       struct write_point *wp;
+       unsigned i;
+
+       for (wp = c->write_points;
+            wp < c->write_points + ARRAY_SIZE(c->write_points);
+            wp++) {
+               pr_buf(out, "%lu: ", wp->write_point);
+               bch2_hprint(out, wp->sectors_allocated);
+
+               pr_buf(out, " last wrote: ");
+               bch2_pr_time_units(out, sched_clock() - wp->last_used);
+
+               for (i = 0; i < WRITE_POINT_STATE_NR; i++) {
+                       pr_buf(out, " %s: ", bch2_write_point_states[i]);
+                       bch2_pr_time_units(out, wp->time[i]);
+               }
+
+               pr_newline(out);
+       }
+}
diff --git a/fs/bcachefs/alloc_foreground.h b/fs/bcachefs/alloc_foreground.h

index 3598c70b93b4f43faf9748bfa8769a85bcd8a066..9b4389b09cbb19d627cc8f845dc78bc52786d938 100644 (file)
--- a/fs/bcachefs/alloc_foreground.h
+++ b/fs/bcachefs/alloc_foreground.h
@@ -122,14 +122,15 @@ int bch2_bucket_alloc_set(struct bch_fs *, struct open_buckets *,
                       unsigned, unsigned *, bool *, enum alloc_reserve,
                       unsigned, struct closure *);
  
-struct write_point *bch2_alloc_sectors_start(struct bch_fs *,
-                                            unsigned, unsigned,
-                                            struct write_point_specifier,
-                                            struct bch_devs_list *,
-                                            unsigned, unsigned,
-                                            enum alloc_reserve,
-                                            unsigned,
-                                            struct closure *);
+int bch2_alloc_sectors_start(struct bch_fs *,
+                            unsigned, unsigned,
+                            struct write_point_specifier,
+                            struct bch_devs_list *,
+                            unsigned, unsigned,
+                            enum alloc_reserve,
+                            unsigned,
+                            struct closure *,
+                            struct write_point **);
  
  struct bch_extent_ptr bch2_ob_ptr(struct bch_fs *, struct open_bucket *);
  void bch2_alloc_sectors_append_ptrs(struct bch_fs *, struct write_point *,
@@ -156,4 +157,6 @@ void bch2_fs_allocator_foreground_init(struct bch_fs *);
  
  void bch2_open_buckets_to_text(struct printbuf *, struct bch_fs *);
  
+void bch2_write_points_to_text(struct printbuf *, struct bch_fs *);
+
  #endif /* _BCACHEFS_ALLOC_FOREGROUND_H */
diff --git a/fs/bcachefs/alloc_types.h b/fs/bcachefs/alloc_types.h

index e3a3eb2711588bca88062c9cc7d3d3dd4be8062d..9e00afb175592294c2ff88d5d3f5d01e4cce3f7a 100644 (file)
--- a/fs/bcachefs/alloc_types.h
+++ b/fs/bcachefs/alloc_types.h
@@ -81,18 +81,46 @@ struct dev_stripe_state {
         u64                     next_alloc[BCH_SB_MEMBERS_MAX];
  };
  
+#define WRITE_POINT_STATES()           \
+       x(stopped)                      \
+       x(waiting_io)                   \
+       x(waiting_work)                 \
+       x(running)
+
+enum write_point_state {
+#define x(n)   WRITE_POINT_##n,
+       WRITE_POINT_STATES()
+#undef x
+       WRITE_POINT_STATE_NR
+};
+
  struct write_point {
-       struct hlist_node       node;
-       struct mutex            lock;
-       u64                     last_used;
-       unsigned long           write_point;
-       enum bch_data_type      data_type;
+       struct {
+               struct hlist_node       node;
+               struct mutex            lock;
+               u64                     last_used;
+               unsigned long           write_point;
+               enum bch_data_type      data_type;
  
-       /* calculated based on how many pointers we're actually going to use: */
-       unsigned                sectors_free;
+               /* calculated based on how many pointers we're actually going to use: */
+               unsigned                sectors_free;
+
+               struct open_buckets     ptrs;
+               struct dev_stripe_state stripe;
+
+               u64                     sectors_allocated;
+       } __attribute__((__aligned__(SMP_CACHE_BYTES)));
+
+       struct {
+               struct work_struct      index_update_work;
+
+               struct list_head        writes;
+               spinlock_t              writes_lock;
  
-       struct open_buckets     ptrs;
-       struct dev_stripe_state stripe;
+               enum write_point_state  state;
+               u64                     last_state_change;
+               u64                     time[WRITE_POINT_STATE_NR];
+       } __attribute__((__aligned__(SMP_CACHE_BYTES)));
  };
  
  struct write_point_specifier {
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c

index ff56c374ff2e6c02999d23223dbb0894276df397..d1e3e2c76e304fef457c98ede095f68d5023990e 100644 (file)
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -191,6 +191,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
         struct bch_devs_list devs_have = (struct bch_devs_list) { 0 };
         unsigned nr_reserve;
         enum alloc_reserve alloc_reserve;
+       int ret;
  
         if (flags & BTREE_INSERT_USE_RESERVE) {
                 nr_reserve      = 0;
@@ -213,7 +214,7 @@ static struct btree *__bch2_btree_node_alloc(struct bch_fs *c,
         mutex_unlock(&c->btree_reserve_cache_lock);
  
  retry:
-       wp = bch2_alloc_sectors_start(c,
+       ret = bch2_alloc_sectors_start(c,
                                       c->opts.metadata_target ?:
                                       c->opts.foreground_target,
                                       0,
@@ -221,9 +222,9 @@ retry:
                                       &devs_have,
                                       res->nr_replicas,
                                       c->opts.metadata_replicas_required,
-                                     alloc_reserve, 0, cl);
-       if (IS_ERR(wp))
-               return ERR_CAST(wp);
+                                     alloc_reserve, 0, cl, &wp);
+       if (unlikely(ret))
+               return ERR_PTR(ret);
  
         if (wp->sectors_free < btree_sectors(c)) {
                 struct open_bucket *ob;
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c

index 2cc56979fcb353648799a9fc6d2c54fd00d28e3a..6bebbd44ccc81b9d7548735634cdf8e78ca243a1 100644 (file)
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -589,7 +589,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
         }
  }
  
-static void __bch2_write(struct closure *);
+static void __bch2_write(struct bch_write_op *);
  
  static void bch2_write_done(struct closure *cl)
  {
@@ -686,22 +686,86 @@ err:
         goto out;
  }
  
+static inline void __wp_update_state(struct write_point *wp, enum write_point_state state)
+{
+       if (state != wp->state) {
+               u64 now = ktime_get_ns();
+
+               if (wp->last_state_change &&
+                   time_after64(now, wp->last_state_change))
+                       wp->time[wp->state] += now - wp->last_state_change;
+               wp->state = state;
+               wp->last_state_change = now;
+       }
+}
+
+static inline void wp_update_state(struct write_point *wp, bool running)
+{
+       enum write_point_state state;
+
+       state = running                  ? WRITE_POINT_running :
+               !list_empty(&wp->writes) ? WRITE_POINT_waiting_io
+                                        : WRITE_POINT_stopped;
+
+       __wp_update_state(wp, state);
+}
+
  static void bch2_write_index(struct closure *cl)
  {
         struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
-       struct bch_fs *c = op->c;
+       struct write_point *wp = op->wp;
+       struct workqueue_struct *wq = index_update_wq(op);
  
-       __bch2_write_index(op);
+       barrier();
  
-       if (!(op->flags & BCH_WRITE_DONE)) {
-               continue_at(cl, __bch2_write, index_update_wq(op));
-       } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
-               bch2_journal_flush_seq_async(&c->journal,
-                                            *op_journal_seq(op),
-                                            cl);
-               continue_at(cl, bch2_write_done, index_update_wq(op));
-       } else {
-               continue_at_nobarrier(cl, bch2_write_done, NULL);
+       /*
+        * We're not using wp->writes_lock here, so this is racey: that's ok,
+        * because this is just for diagnostic purposes, and we're running out
+        * of interrupt context here so if we were to take the log we'd have to
+        * switch to spin_lock_irq()/irqsave(), which is not free:
+        */
+       if (wp->state == WRITE_POINT_waiting_io)
+               __wp_update_state(wp, WRITE_POINT_waiting_work);
+
+       op->btree_update_ready = true;
+       queue_work(wq, &wp->index_update_work);
+}
+
+void bch2_write_point_do_index_updates(struct work_struct *work)
+{
+       struct write_point *wp =
+               container_of(work, struct write_point, index_update_work);
+       struct bch_write_op *op;
+
+       while (1) {
+               spin_lock(&wp->writes_lock);
+               list_for_each_entry(op, &wp->writes, wp_list)
+                       if (op->btree_update_ready) {
+                               list_del(&op->wp_list);
+                               goto unlock;
+                       }
+               op = NULL;
+unlock:
+               wp_update_state(wp, op != NULL);
+               spin_unlock(&wp->writes_lock);
+
+               if (!op)
+                       break;
+
+               op->flags |= BCH_WRITE_IN_WORKER;
+
+               __bch2_write_index(op);
+
+               if (!(op->flags & BCH_WRITE_DONE)) {
+                       __bch2_write(op);
+               } else if (!op->error && (op->flags & BCH_WRITE_FLUSH)) {
+                       bch2_journal_flush_seq_async(&op->c->journal,
+                                                    *op_journal_seq(op),
+                                                    &op->cl);
+                       continue_at(&op->cl, bch2_write_done, index_update_wq(op));
+               } else {
+                       bch2_write_done(&op->cl);
+               }
         }
  }
  
@@ -734,10 +798,8 @@ static void bch2_write_endio(struct bio *bio)
  
         if (parent)
                 bio_endio(&parent->bio);
-       else if (!(op->flags & BCH_WRITE_SKIP_CLOSURE_PUT))
-               closure_put(cl);
         else
-               continue_at_nobarrier(cl, bch2_write_index, index_update_wq(op));
+               closure_put(cl);
  }
  
  static void init_append_extent(struct bch_write_op *op,
@@ -1136,19 +1198,18 @@ err:
         return ret;
  }
  
-static void __bch2_write(struct closure *cl)
+static void __bch2_write(struct bch_write_op *op)
  {
-       struct bch_write_op *op = container_of(cl, struct bch_write_op, cl);
         struct bch_fs *c = op->c;
-       struct write_point *wp;
+       struct write_point *wp = NULL;
         struct bio *bio = NULL;
-       bool skip_put = true;
         unsigned nofs_flags;
         int ret;
  
         nofs_flags = memalloc_nofs_save();
  again:
         memset(&op->failed, 0, sizeof(op->failed));
+       op->btree_update_ready = false;
  
         do {
                 struct bkey_i *key_to_write;
@@ -1158,13 +1219,13 @@ again:
                 /* +1 for possible cache device: */
                 if (op->open_buckets.nr + op->nr_replicas + 1 >
                     ARRAY_SIZE(op->open_buckets.v))
-                       goto flush_io;
+                       break;
  
                 if (bch2_keylist_realloc(&op->insert_keys,
                                         op->inline_keys,
                                         ARRAY_SIZE(op->inline_keys),
                                         BKEY_EXTENT_U64s_MAX))
-                       goto flush_io;
+                       break;
  
                 if ((op->flags & BCH_WRITE_FROM_INTERNAL) &&
                     percpu_ref_is_dying(&c->writes)) {
@@ -1177,7 +1238,7 @@ again:
                  * freeing up space on specific disks, which means that
                  * allocations for specific disks may hang arbitrarily long:
                  */
-               wp = bch2_alloc_sectors_start(c,
+               ret = bch2_alloc_sectors_start(c,
                         op->target,
                         op->opts.erasure_code && !(op->flags & BCH_WRITE_CACHED),
                         op->write_point,
@@ -1187,53 +1248,34 @@ again:
                         op->alloc_reserve,
                         op->flags,
                         (op->flags & (BCH_WRITE_ALLOC_NOWAIT|
-                                     BCH_WRITE_ONLY_SPECIFIED_DEVS)) ? NULL : cl);
-               EBUG_ON(!wp);
-
-               if (unlikely(IS_ERR(wp))) {
-                       if (unlikely(PTR_ERR(wp) != -EAGAIN)) {
-                               ret = PTR_ERR(wp);
+                                     BCH_WRITE_ONLY_SPECIFIED_DEVS))
+                       ? NULL : &op->cl,
+                       &wp);
+               if (unlikely(ret)) {
+                       if (unlikely(ret != -EAGAIN))
                                 goto err;
-                       }
  
-                       goto flush_io;
+                       break;
                 }
  
-               /*
-                * It's possible for the allocator to fail, put us on the
-                * freelist waitlist, and then succeed in one of various retry
-                * paths: if that happens, we need to disable the skip_put
-                * optimization because otherwise there won't necessarily be a
-                * barrier before we free the bch_write_op:
-                */
-               if (atomic_read(&cl->remaining) & CLOSURE_WAITING)
-                       skip_put = false;
+               EBUG_ON(!wp);
  
                 bch2_open_bucket_get(c, wp, &op->open_buckets);
                 ret = bch2_write_extent(op, wp, &bio);
+
                 bch2_alloc_sectors_done(c, wp);
  
                 if (ret < 0)
                         goto err;
  
-               if (ret) {
-                       skip_put = false;
-               } else {
-                       /*
-                        * for the skip_put optimization this has to be set
-                        * before we submit the bio:
-                        */
+               if (!ret)
                         op->flags |= BCH_WRITE_DONE;
-               }
  
                 bio->bi_end_io  = bch2_write_endio;
                 bio->bi_private = &op->cl;
                 bio->bi_opf |= REQ_OP_WRITE;
  
-               if (!skip_put)
-                       closure_get(bio->bi_private);
-               else
-                       op->flags |= BCH_WRITE_SKIP_CLOSURE_PUT;
+               closure_get(bio->bi_private);
  
                 key_to_write = (void *) (op->insert_keys.keys_p +
                                          key_to_write_offset);
@@ -1241,55 +1283,49 @@ again:
                 bch2_submit_wbio_replicas(to_wbio(bio), c, BCH_DATA_user,
                                           key_to_write);
         } while (ret);
-
-       if (!skip_put)
-               continue_at(cl, bch2_write_index, index_update_wq(op));
  out:
-       memalloc_nofs_restore(nofs_flags);
-       return;
-err:
-       op->error = ret;
-       op->flags |= BCH_WRITE_DONE;
-
-       continue_at(cl, bch2_write_index, index_update_wq(op));
-       goto out;
-flush_io:
         /*
          * If the write can't all be submitted at once, we generally want to
          * block synchronously as that signals backpressure to the caller.
-        *
-        * However, if we're running out of a workqueue, we can't block here
-        * because we'll be blocking other work items from completing:
          */
-       if (current->flags & PF_WQ_WORKER) {
-               continue_at(cl, bch2_write_index, index_update_wq(op));
-               goto out;
-       }
-
-       closure_sync(cl);
-
-       if (!bch2_keylist_empty(&op->insert_keys)) {
+       if (!(op->flags & BCH_WRITE_DONE) &&
+           !(op->flags & BCH_WRITE_IN_WORKER)) {
+               closure_sync(&op->cl);
                 __bch2_write_index(op);
  
-               if (op->error) {
-                       op->flags |= BCH_WRITE_DONE;
-                       continue_at_nobarrier(cl, bch2_write_done, NULL);
-                       goto out;
-               }
+               if (!(op->flags & BCH_WRITE_DONE))
+                       goto again;
+               bch2_write_done(&op->cl);
+       } else {
+               spin_lock(&wp->writes_lock);
+               op->wp = wp;
+               list_add_tail(&op->wp_list, &wp->writes);
+               if (wp->state == WRITE_POINT_stopped)
+                       __wp_update_state(wp, WRITE_POINT_waiting_io);
+               spin_unlock(&wp->writes_lock);
+
+               continue_at(&op->cl, bch2_write_index, NULL);
         }
  
-       goto again;
+       memalloc_nofs_restore(nofs_flags);
+       return;
+err:
+       op->error = ret;
+       op->flags |= BCH_WRITE_DONE;
+       goto out;
  }
  
  static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
  {
-       struct closure *cl = &op->cl;
         struct bio *bio = &op->wbio.bio;
         struct bvec_iter iter;
         struct bkey_i_inline_data *id;
         unsigned sectors;
         int ret;
  
+       op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
+       op->flags |= BCH_WRITE_DONE;
+
         bch2_check_set_feature(op->c, BCH_FEATURE_inline_data);
  
         ret = bch2_keylist_realloc(&op->insert_keys, op->inline_keys,
@@ -1317,11 +1353,7 @@ static void bch2_write_data_inline(struct bch_write_op *op, unsigned data_len)
         set_bkey_val_bytes(&id->k, data_len);
         bch2_keylist_push(&op->insert_keys);
  
-       op->flags |= BCH_WRITE_WROTE_DATA_INLINE;
-       op->flags |= BCH_WRITE_DONE;
-
-       continue_at_nobarrier(cl, bch2_write_index, NULL);
-       return;
+       __bch2_write_index(op);
  err:
         bch2_write_done(&op->cl);
  }
@@ -1349,6 +1381,7 @@ void bch2_write(struct closure *cl)
         struct bch_fs *c = op->c;
         unsigned data_len;
  
+       EBUG_ON(op->cl.parent);
         BUG_ON(!op->nr_replicas);
         BUG_ON(!op->write_point.v);
         BUG_ON(!bkey_cmp(op->pos, POS_MAX));
@@ -1381,18 +1414,14 @@ void bch2_write(struct closure *cl)
                 return;
         }
  
-       continue_at_nobarrier(cl, __bch2_write, NULL);
+       __bch2_write(op);
         return;
  err:
         bch2_disk_reservation_put(c, &op->res);
  
-       if (op->end_io) {
-               EBUG_ON(cl->parent);
-               closure_debug_destroy(cl);
+       closure_debug_destroy(&op->cl);
+       if (op->end_io)
                 op->end_io(op);
-       } else {
-               closure_return(cl);
-       }
  }
  
  /* Cache promotion on read */
diff --git a/fs/bcachefs/io.h b/fs/bcachefs/io.h

index f8ce9543c9e3e471ba2a4e86b016ab97980ec763..b484d33879681a8d5d77065139b5cb6dd28205a2 100644 (file)
--- a/fs/bcachefs/io.h
+++ b/fs/bcachefs/io.h
@@ -41,7 +41,7 @@ enum bch_write_flags {
         __BCH_WRITE_CHECK_ENOSPC,
         __BCH_WRITE_MOVE,
         __BCH_WRITE_JOURNAL_SEQ_PTR,
-       __BCH_WRITE_SKIP_CLOSURE_PUT,
+       __BCH_WRITE_IN_WORKER,
         __BCH_WRITE_DONE,
  };
  
@@ -59,7 +59,7 @@ enum bch_write_flags {
  
  /* Internal: */
  #define BCH_WRITE_JOURNAL_SEQ_PTR      (1U << __BCH_WRITE_JOURNAL_SEQ_PTR)
-#define BCH_WRITE_SKIP_CLOSURE_PUT     (1U << __BCH_WRITE_SKIP_CLOSURE_PUT)
+#define BCH_WRITE_IN_WORKER            (1U << __BCH_WRITE_IN_WORKER)
  #define BCH_WRITE_DONE                 (1U << __BCH_WRITE_DONE)
  
  static inline u64 *op_journal_seq(struct bch_write_op *op)
@@ -115,6 +115,8 @@ static inline void bch2_write_op_init(struct bch_write_op *op, struct bch_fs *c,
  
  void bch2_write(struct closure *);
  
+void bch2_write_point_do_index_updates(struct work_struct *);
+
  static inline struct bch_write_bio *wbio_init(struct bio *bio)
  {
         struct bch_write_bio *wbio = to_wbio(bio);
diff --git a/fs/bcachefs/io_types.h b/fs/bcachefs/io_types.h

index 53270f0a08a360c8efa7926cb41e8b073da2accb..c316a39d381a1f4111b4e53b56ce3feed108f17b 100644 (file)
--- a/fs/bcachefs/io_types.h
+++ b/fs/bcachefs/io_types.h
@@ -119,6 +119,7 @@ struct bch_write_op {
         unsigned                nr_replicas_required:4;
         unsigned                alloc_reserve:3;
         unsigned                incompressible:1;
+       unsigned                btree_update_ready:1;
  
         struct bch_devs_list    devs_have;
         u16                     target;
@@ -134,6 +135,9 @@ struct bch_write_op {
  
         struct write_point_specifier write_point;
  
+       struct write_point      *wp;
+       struct list_head        wp_list;
+
         struct disk_reservation res;
  
         struct open_buckets     open_buckets;
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c

index e4201aecdba1bd37f0e20b19cc4a68c321d43e5a..208482db3683490ead51a580fe4c63396faabc0d 100644 (file)
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -791,7 +791,7 @@ static struct bch_fs *bch2_fs_alloc(struct bch_sb *sb, struct bch_opts opts)
         c->inode_shard_bits = ilog2(roundup_pow_of_two(num_possible_cpus()));
  
         if (!(c->btree_update_wq = alloc_workqueue("bcachefs",
-                               WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
+                               WQ_FREEZABLE|WQ_UNBOUND|WQ_MEM_RECLAIM, 512)) ||
             !(c->btree_io_complete_wq = alloc_workqueue("bcachefs_btree_io",
                                 WQ_FREEZABLE|WQ_MEM_RECLAIM, 1)) ||
             !(c->copygc_wq = alloc_workqueue("bcachefs_copygc",
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c

index 7e10adba5c75ce6bc843dd0b09660ccf1bb9e1ba..ec672134cb18d9cc3179330f3a3d634dd5eb429e 100644 (file)
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -180,6 +180,7 @@ read_attribute(btree_key_cache);
  read_attribute(btree_transactions);
  read_attribute(stripes_heap);
  read_attribute(open_buckets);
+read_attribute(write_points);
  
  read_attribute(internal_uuid);
  
@@ -418,6 +419,9 @@ SHOW(bch2_fs)
         if (attr == &sysfs_open_buckets)
                 bch2_open_buckets_to_text(out, c);
  
+       if (attr == &sysfs_write_points)
+               bch2_write_points_to_text(out, c);
+
         if (attr == &sysfs_compression_stats)
                 bch2_compression_stats_to_text(out, c);
  
@@ -563,6 +567,7 @@ struct attribute *bch2_fs_internal_files[] = {
         &sysfs_new_stripes,
         &sysfs_stripes_heap,
         &sysfs_open_buckets,
+       &sysfs_write_points,
         &sysfs_io_timers_read,
         &sysfs_io_timers_write,
  
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c

index f290c069c6830a3c3f2c0d486d9907cbc6f72b89..e1d36d9b092c7a16560ede344c88db68140e6072 100644 (file)
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -390,7 +390,7 @@ static const struct time_unit *pick_time_units(u64 ns)
         return u;
  }
  
-static void pr_time_units(struct printbuf *out, u64 ns)
+void bch2_pr_time_units(struct printbuf *out, u64 ns)
  {
         const struct time_unit *u = pick_time_units(ns);
  
@@ -410,13 +410,13 @@ void bch2_time_stats_to_text(struct printbuf *out, struct bch2_time_stats *stats
                freq ?  div64_u64(NSEC_PER_SEC, freq) : 0);
  
         pr_buf(out, "frequency:\t");
-       pr_time_units(out, freq);
+       bch2_pr_time_units(out, freq);
  
         pr_buf(out, "\navg duration:\t");
-       pr_time_units(out, stats->average_duration);
+       bch2_pr_time_units(out, stats->average_duration);
  
         pr_buf(out, "\nmax duration:\t");
-       pr_time_units(out, stats->max_duration);
+       bch2_pr_time_units(out, stats->max_duration);
  
         i = eytzinger0_first(NR_QUANTILES);
         u = pick_time_units(stats->quantiles.entries[i].m);
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h

index fd776fb281b74209edc401be8e788412d0c5ef01..085f1c35738367ae498081eb368850966531c98e 100644 (file)
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -352,6 +352,8 @@ static inline void pr_sectors(struct printbuf *out, u64 v)
         bch2_pr_units(out, v, v << 9);
  }
  
+void bch2_pr_time_units(struct printbuf *, u64);
+
  #ifdef __KERNEL__
  static inline void pr_time(struct printbuf *out, u64 time)
  {
author	Kent Overstreet <kent.overstreet@linux.dev>
	Mon, 31 Oct 2022 20:13:05 +0000 (16:13 -0400)
committer	Kent Overstreet <kent.overstreet@linux.dev>
	Sun, 22 Oct 2023 21:09:29 +0000 (17:09 -0400)
fs/bcachefs/alloc_foreground.c		patch \| blob \| history
fs/bcachefs/alloc_foreground.h		patch \| blob \| history
fs/bcachefs/alloc_types.h		patch \| blob \| history
fs/bcachefs/btree_update_interior.c		patch \| blob \| history
fs/bcachefs/io.c		patch \| blob \| history
fs/bcachefs/io.h		patch \| blob \| history
fs/bcachefs/io_types.h		patch \| blob \| history
fs/bcachefs/super.c		patch \| blob \| history
fs/bcachefs/sysfs.c		patch \| blob \| history
fs/bcachefs/util.c		patch \| blob \| history
fs/bcachefs/util.h		patch \| blob \| history