From 0ce2dbbe9915af85b2ebafe6dfeca6813ba5e13c Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sun, 3 Mar 2019 15:15:55 -0500 Subject: [PATCH] bcachefs: ja->discard_idx, ja->dirty_idx Signed-off-by: Kent Overstreet --- fs/bcachefs/journal.c | 42 ++++++++++++++--------- fs/bcachefs/journal_io.c | 12 ++++--- fs/bcachefs/journal_reclaim.c | 63 ++++++++++++++++++++++------------- fs/bcachefs/journal_types.h | 24 ++++++------- 4 files changed, 83 insertions(+), 58 deletions(-) diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index 3b3c342b2df2c..17add726f2ace 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -760,6 +760,7 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, while (ja->nr < nr) { struct open_bucket *ob = NULL; + unsigned pos; long bucket; if (new_fs) { @@ -786,20 +787,24 @@ static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr, preempt_disable(); } - __array_insert_item(ja->buckets, ja->nr, ja->last_idx); - __array_insert_item(ja->bucket_seq, ja->nr, ja->last_idx); - __array_insert_item(journal_buckets->buckets, ja->nr, ja->last_idx); + pos = ja->nr ? (ja->cur_idx + 1) % ja->nr : 0; + __array_insert_item(ja->buckets, ja->nr, pos); + __array_insert_item(ja->bucket_seq, ja->nr, pos); + __array_insert_item(journal_buckets->buckets, ja->nr, pos); + ja->nr++; - ja->buckets[ja->last_idx] = bucket; - ja->bucket_seq[ja->last_idx] = 0; - journal_buckets->buckets[ja->last_idx] = cpu_to_le64(bucket); + ja->buckets[pos] = bucket; + ja->bucket_seq[pos] = 0; + journal_buckets->buckets[pos] = cpu_to_le64(bucket); - if (ja->last_idx < ja->nr) { - if (ja->cur_idx >= ja->last_idx) - ja->cur_idx++; - ja->last_idx++; - } - ja->nr++; + if (pos <= ja->discard_idx) + ja->discard_idx = (ja->discard_idx + 1) % ja->nr; + if (pos <= ja->dirty_idx_ondisk) + ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr; + if (pos <= ja->dirty_idx) + ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr; + if (pos <= ja->cur_idx) + ja->cur_idx = (ja->cur_idx + 1) % ja->nr; bch2_mark_metadata_bucket(c, ca, bucket, BCH_DATA_JOURNAL, ca->mi.bucket_size, @@ -1042,6 +1047,7 @@ int bch2_fs_journal_init(struct journal *j) mutex_init(&j->blacklist_lock); INIT_LIST_HEAD(&j->seq_blacklist); mutex_init(&j->reclaim_lock); + mutex_init(&j->discard_lock); lockdep_init_map(&j->res_map, "journal res", &res_key, 0); @@ -1138,13 +1144,17 @@ ssize_t bch2_journal_print_debug(struct journal *j, char *buf) "dev %u:\n" "\tnr\t\t%u\n" "\tavailable\t%u:%u\n" - "\tcur_idx\t\t%u (seq %llu)\n" - "\tlast_idx\t%u (seq %llu)\n", + "\tdiscard_idx\t\t%u\n" + "\tdirty_idx_ondisk\t%u (seq %llu)\n" + "\tdirty_idx\t\t%u (seq %llu)\n" + "\tcur_idx\t\t%u (seq %llu)\n", iter, ja->nr, bch2_journal_dev_buckets_available(j, ja), ja->sectors_free, - ja->cur_idx, ja->bucket_seq[ja->cur_idx], - ja->last_idx, ja->bucket_seq[ja->last_idx]); + ja->discard_idx, + ja->dirty_idx_ondisk, ja->bucket_seq[ja->dirty_idx_ondisk], + ja->dirty_idx, ja->bucket_seq[ja->dirty_idx], + ja->cur_idx, ja->bucket_seq[ja->cur_idx]); } spin_unlock(&j->lock); diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index d4b82344221c6..b6a51dff09784 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -625,11 +625,12 @@ static void bch2_journal_read_device(struct closure *cl) ja->sectors_free = 0; /* - * Set last_idx to indicate the entire journal is full and needs to be + * Set dirty_idx to indicate the entire journal is full and needs to be * reclaimed - journal reclaim will immediately reclaim whatever isn't * pinned when it first runs: */ - ja->last_idx = (ja->cur_idx + 1) % ja->nr; + ja->discard_idx = ja->dirty_idx_ondisk = + ja->dirty_idx = (ja->cur_idx + 1) % ja->nr; out: kvpfree(buf.data, buf.size); percpu_ref_put(&ca->io_ref); @@ -1069,12 +1070,13 @@ static void journal_write_done(struct closure *cl) goto err; spin_lock(&j->lock); - j->seq_ondisk = seq; - j->last_seq_ondisk = last_seq; - if (seq >= j->pin.front) journal_seq_pin(j, seq)->devs = devs; + j->seq_ondisk = seq; + j->last_seq_ondisk = last_seq; + bch2_journal_space_available(j); + /* * Updating last_seq_ondisk may let bch2_journal_reclaim_work() discard * more buckets: diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c index 431afeab42b00..3a85fb8b85269 100644 --- a/fs/bcachefs/journal_reclaim.c +++ b/fs/bcachefs/journal_reclaim.c @@ -14,22 +14,20 @@ unsigned bch2_journal_dev_buckets_available(struct journal *j, { struct bch_fs *c = container_of(j, struct bch_fs, journal); unsigned next = (ja->cur_idx + 1) % ja->nr; - unsigned available = (ja->last_idx + ja->nr - next) % ja->nr; + unsigned available = (ja->discard_idx + ja->nr - next) % ja->nr; /* * Allocator startup needs some journal space before we can do journal * replay: */ - if (available && - test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) - available--; + if (available && test_bit(BCH_FS_ALLOCATOR_STARTED, &c->flags)) + --available; /* * Don't use the last bucket unless writing the new last_seq * will make another bucket available: */ - if (available && - journal_last_seq(j) <= ja->bucket_seq[ja->last_idx]) + if (available && ja->dirty_idx_ondisk == ja->dirty_idx) --available; return available; @@ -55,12 +53,34 @@ void bch2_journal_space_available(struct journal *j) for_each_member_device_rcu(ca, c, i, &c->rw_devs[BCH_DATA_JOURNAL]) { struct journal_device *ja = &ca->journal; - unsigned buckets_this_device, sectors_this_device; if (!ja->nr) continue; + while (ja->dirty_idx != ja->cur_idx && + ja->bucket_seq[ja->dirty_idx] < journal_last_seq(j)) + ja->dirty_idx = (ja->dirty_idx + 1) % ja->nr; + + while (ja->dirty_idx_ondisk != ja->dirty_idx && + ja->bucket_seq[ja->dirty_idx_ondisk] < j->last_seq_ondisk) + ja->dirty_idx_ondisk = (ja->dirty_idx_ondisk + 1) % ja->nr; + nr_online++; + } + + if (nr_online < c->opts.metadata_replicas_required) { + ret = -EROFS; + sectors_next_entry = 0; + goto out; + } + + for_each_member_device_rcu(ca, c, i, + &c->rw_devs[BCH_DATA_JOURNAL]) { + struct journal_device *ja = &ca->journal; + unsigned buckets_this_device, sectors_this_device; + + if (!ja->nr) + continue; buckets_this_device = bch2_journal_dev_buckets_available(j, ja); sectors_this_device = ja->sectors_free; @@ -100,20 +120,17 @@ void bch2_journal_space_available(struct journal *j) nr_devs++; } - rcu_read_unlock(); - if (nr_online < c->opts.metadata_replicas_required) { - ret = -EROFS; - sectors_next_entry = 0; - } else if (!sectors_next_entry || - nr_devs < min_t(unsigned, nr_online, - c->opts.metadata_replicas)) { + if (!sectors_next_entry || + nr_devs < min_t(unsigned, nr_online, c->opts.metadata_replicas)) { ret = -ENOSPC; sectors_next_entry = 0; } else if (!fifo_free(&j->pin)) { ret = -ENOSPC; sectors_next_entry = 0; } +out: + rcu_read_unlock(); j->cur_entry_sectors = sectors_next_entry; j->cur_entry_error = ret; @@ -129,25 +146,23 @@ static bool should_discard_bucket(struct journal *j, struct journal_device *ja) bool ret; spin_lock(&j->lock); - ret = ja->nr && - ja->last_idx != ja->cur_idx && - ja->bucket_seq[ja->last_idx] < j->last_seq_ondisk; + ret = ja->discard_idx != ja->dirty_idx_ondisk; spin_unlock(&j->lock); return ret; } /* - * Advance ja->last_idx as long as it points to buckets that are no longer + * Advance ja->discard_idx as long as it points to buckets that are no longer * dirty, issuing discards if necessary: */ -static void journal_do_discards(struct journal *j) +static void bch2_journal_do_discards(struct journal *j) { struct bch_fs *c = container_of(j, struct bch_fs, journal); struct bch_dev *ca; unsigned iter; - mutex_lock(&j->reclaim_lock); + mutex_lock(&j->discard_lock); for_each_rw_member(ca, c, iter) { struct journal_device *ja = &ca->journal; @@ -157,18 +172,18 @@ static void journal_do_discards(struct journal *j) bdev_max_discard_sectors(ca->disk_sb.bdev)) blkdev_issue_discard(ca->disk_sb.bdev, bucket_to_sector(ca, - ja->buckets[ja->last_idx]), + ja->buckets[ja->discard_idx]), ca->mi.bucket_size, GFP_NOIO); spin_lock(&j->lock); - ja->last_idx = (ja->last_idx + 1) % ja->nr; + ja->discard_idx = (ja->discard_idx + 1) % ja->nr; bch2_journal_space_available(j); spin_unlock(&j->lock); } } - mutex_unlock(&j->reclaim_lock); + mutex_unlock(&j->discard_lock); } /* @@ -399,7 +414,7 @@ void bch2_journal_reclaim_work(struct work_struct *work) unsigned iter, bucket_to_flush, min_nr = 0; u64 seq_to_flush = 0; - journal_do_discards(j); + bch2_journal_do_discards(j); mutex_lock(&j->reclaim_lock); spin_lock(&j->lock); diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h index 2f48008820ac8..09b2d22230335 100644 --- a/fs/bcachefs/journal_types.h +++ b/fs/bcachefs/journal_types.h @@ -193,9 +193,6 @@ struct journal { struct journal_entry_pin_list *data; } pin; - struct journal_entry_pin *flush_in_progress; - wait_queue_head_t pin_flush_wait; - u64 replay_journal_seq; struct mutex blacklist_lock; @@ -206,10 +203,13 @@ struct journal { spinlock_t err_lock; struct delayed_work reclaim_work; + struct mutex reclaim_lock; unsigned long last_flushed; + struct journal_entry_pin *flush_in_progress; + wait_queue_head_t pin_flush_wait; - /* protects advancing ja->last_idx: */ - struct mutex reclaim_lock; + /* protects advancing ja->discard_idx: */ + struct mutex discard_lock; unsigned write_delay_ms; unsigned reclaim_delay_ms; @@ -240,17 +240,15 @@ struct journal_device { unsigned sectors_free; - /* Journal bucket we're currently writing to */ - unsigned cur_idx; - - /* Last journal bucket that still contains an open journal entry */ - /* - * j->lock and j->reclaim_lock must both be held to modify, j->lock - * sufficient to read: + * discard_idx <= dirty_idx_ondisk <= dirty_idx <= cur_idx: */ - unsigned last_idx; + unsigned discard_idx; /* Next bucket to discard */ + unsigned dirty_idx_ondisk; + unsigned dirty_idx; + unsigned cur_idx; /* Journal bucket we're currently writing to */ unsigned nr; + u64 *buckets; /* Bio for journal reads/writes to this device */ -- 2.30.2