bcachefs: Kill JOURNAL_NEED_WRITE
authorKent Overstreet <kent.overstreet@gmail.com>
Fri, 25 Feb 2022 15:28:20 +0000 (10:28 -0500)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:26 +0000 (17:09 -0400)
This replaces the journal flag JOURNAL_NEED_WRITE with per-journal buf
state - more explicit, and solving a race in the old code that would
lead to entries being opened and written unnecessarily.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/journal.c
fs/bcachefs/journal_io.c
fs/bcachefs/journal_types.h

index 64875c8150f7fff585fc918d349c6d7ecdad0049..880ca2061012452cc652dffbddff7e337f697b29 100644 (file)
@@ -86,6 +86,7 @@ static void bch2_journal_buf_init(struct journal *j)
        buf->noflush    = false;
        buf->must_flush = false;
        buf->separate_flush = false;
+       buf->flush_time = 0;
 
        memset(buf->data, 0, sizeof(*buf->data));
        buf->data->seq  = cpu_to_le64(journal_cur_seq(j));
@@ -152,11 +153,6 @@ static bool __journal_entry_close(struct journal *j)
                        return true;
                }
 
-               if (!test_bit(JOURNAL_NEED_WRITE, &j->flags)) {
-                       set_bit(JOURNAL_NEED_WRITE, &j->flags);
-                       j->need_write_time = local_clock();
-               }
-
                new.cur_entry_offset = JOURNAL_ENTRY_CLOSED_VAL;
                new.idx++;
 
@@ -205,7 +201,6 @@ static bool __journal_entry_close(struct journal *j)
        bch2_journal_buf_init(j);
 
        cancel_delayed_work(&j->write_work);
-       clear_bit(JOURNAL_NEED_WRITE, &j->flags);
 
        bch2_journal_space_available(j);
 
@@ -216,15 +211,16 @@ static bool __journal_entry_close(struct journal *j)
 static bool journal_entry_want_write(struct journal *j)
 {
        union journal_res_state s = READ_ONCE(j->reservations);
+       struct journal_buf *buf = journal_cur_buf(j);
        bool ret = false;
 
-       /*
-        * Don't close it yet if we already have a write in flight, but do set
-        * NEED_WRITE:
-        */
-       if (s.idx != s.unwritten_idx)
-               set_bit(JOURNAL_NEED_WRITE, &j->flags);
-       else
+       if (!buf->flush_time) {
+               buf->flush_time = local_clock() ?: 1;
+               buf->expires = jiffies;
+       }
+
+       /* Don't close it yet if we already have a write in flight: */
+       if (s.idx == s.unwritten_idx)
                ret = __journal_entry_close(j);
 
        return ret;
@@ -278,6 +274,8 @@ static int journal_entry_open(struct journal *j)
         */
        BUG_ON(buf->data->u64s);
 
+       buf->expires            = jiffies +
+               msecs_to_jiffies(c->opts.journal_flush_delay);
        buf->u64s_reserved      = j->entry_u64s_reserved;
        buf->disk_sectors       = j->cur_entry_sectors;
        buf->sectors            = min(buf->disk_sectors, buf->buf_size >> 9);
@@ -337,8 +335,19 @@ static void journal_quiesce(struct journal *j)
 static void journal_write_work(struct work_struct *work)
 {
        struct journal *j = container_of(work, struct journal, write_work.work);
+       struct bch_fs *c = container_of(j, struct bch_fs, journal);
+       struct journal_buf *buf;
+       long delta;
 
-       journal_entry_close(j);
+       spin_lock(&j->lock);
+       buf = journal_cur_buf(j);
+       delta = buf->expires - jiffies;
+
+       if (delta > 0)
+               mod_delayed_work(c->io_complete_wq, &j->write_work, delta);
+       else
+               __journal_entry_close(j);
+       spin_unlock(&j->lock);
 }
 
 static int __journal_res_get(struct journal *j, struct journal_res *res,
@@ -591,7 +600,11 @@ recheck_need_open:
                seq = res.seq;
                buf = j->buf + (seq & JOURNAL_BUF_MASK);
                buf->must_flush = true;
-               set_bit(JOURNAL_NEED_WRITE, &j->flags);
+
+               if (!buf->flush_time) {
+                       buf->flush_time = local_clock() ?: 1;
+                       buf->expires = jiffies;
+               }
 
                if (parent && !closure_wait(&buf->wait, parent))
                        BUG();
@@ -657,7 +670,11 @@ int bch2_journal_meta(struct journal *j)
 
        buf = j->buf + (res.seq & JOURNAL_BUF_MASK);
        buf->must_flush = true;
-       set_bit(JOURNAL_NEED_WRITE, &j->flags);
+
+       if (!buf->flush_time) {
+               buf->flush_time = local_clock() ?: 1;
+               buf->expires = jiffies;
+       }
 
        bch2_journal_res_put(j, &res);
 
@@ -1233,12 +1250,22 @@ void __bch2_journal_debug_to_text(struct printbuf *out, struct journal *j)
 
                pr_buf(out, "unwritten entry:\tidx %u refcount %u sectors %u\n",
                       i, journal_state_count(s, i), j->buf[i].sectors);
+               pr_indent_push(out, 2);
+
+               pr_buf(out, "refcount %u", journal_state_count(s, i));
+               pr_newline(out);
+
+               pr_buf(out, "sectors %u", j->buf[i].sectors);
+               pr_newline(out);
+
+               pr_buf(out, "expires %li ms", jiffies_to_msecs(j->buf[i].expires - jiffies));
+               pr_newline(out);
+
+               pr_indent_pop(out, 2);
        }
 
        pr_buf(out,
-              "need write:\t\t%i\n"
               "replay done:\t\t%i\n",
-              test_bit(JOURNAL_NEED_WRITE,     &j->flags),
               test_bit(JOURNAL_REPLAY_DONE,    &j->flags));
 
        pr_buf(out, "space:\n");
index bbec4d85b6bc2b01818f0fe0c1ed45101287c22d..724a8bb69978866988e1f53677b941fb07fc8016 100644 (file)
@@ -1403,13 +1403,15 @@ static void journal_write_done(struct closure *cl)
        closure_wake_up(&w->wait);
        journal_wake(j);
 
-       if (test_bit(JOURNAL_NEED_WRITE, &j->flags))
-               mod_delayed_work(c->io_complete_wq, &j->write_work, 0);
-       spin_unlock(&j->lock);
+       if (new.unwritten_idx == new.idx) {
+               struct journal_buf *buf = journal_cur_buf(j);
+               long delta = buf->expires - jiffies;
 
-       if (new.unwritten_idx != new.idx &&
-           !journal_state_count(new, new.unwritten_idx))
+               mod_delayed_work(c->io_complete_wq, &j->write_work, max(0L, delta));
+       } else if (!journal_state_count(new, new.unwritten_idx))
                closure_call(&j->io, bch2_journal_write, c->io_complete_wq, NULL);
+
+       spin_unlock(&j->lock);
 }
 
 static void journal_write_endio(struct bio *bio)
index 3012b374625f9b285a6027f77fb280330f8f226c..36843fd0c7da79dd20d176b4c686ae00eca8442e 100644 (file)
@@ -25,6 +25,8 @@ struct journal_buf {
 
        struct closure_waitlist wait;
        u64                     last_seq;       /* copy of data->last_seq */
+       unsigned long           expires;
+       u64                     flush_time;
 
        unsigned                buf_size;       /* size in bytes of @data */
        unsigned                sectors;        /* maximum size for current entry */
@@ -139,16 +141,9 @@ enum journal_space_from {
        journal_space_nr,
 };
 
-/*
- * JOURNAL_NEED_WRITE - current (pending) journal entry should be written ASAP,
- * either because something's waiting on the write to complete or because it's
- * been dirty too long and the timer's expired.
- */
-
 enum {
        JOURNAL_REPLAY_DONE,
        JOURNAL_STARTED,
-       JOURNAL_NEED_WRITE,
        JOURNAL_MAY_GET_UNRESERVED,
        JOURNAL_MAY_SKIP_FLUSH,
 };
@@ -266,7 +261,6 @@ struct journal {
        unsigned long           last_flush_write;
 
        u64                     res_get_blocked_start;
-       u64                     need_write_time;
        u64                     write_start_time;
 
        u64                     nr_flush_writes;