bcachefs: Add a lock to bch_page_state
authorKent Overstreet <kent.overstreet@gmail.com>
Wed, 9 Oct 2019 13:19:06 +0000 (09:19 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:08:29 +0000 (17:08 -0400)
We can't use the page lock to protect it, because on writeback IO error
we need to access the page state before calling end_page_writeback() and
the page lock semantics are completely insane so that deadlocks.

Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
fs/bcachefs/fs-io.c

index 90587a556cee668467a24daa04814680e7114f85..0756b11ae3e487bb247c30a8143046bc974eea15 100644 (file)
@@ -468,6 +468,7 @@ struct bch_page_sector {
 };
 
 struct bch_page_state {
+       spinlock_t              lock;
        atomic_t                write_count;
        struct bch_page_sector  s[PAGE_SECTORS];
 };
@@ -523,6 +524,7 @@ static struct bch_page_state *__bch2_page_state_create(struct page *page,
        if (!s)
                return NULL;
 
+       spin_lock_init(&s->lock);
        /*
         * migrate_page_move_mapping() assumes that pages with private data
         * have their count elevated by 1.
@@ -670,6 +672,9 @@ static void bch2_clear_page_bits(struct page *page)
        if (!s)
                return;
 
+       EBUG_ON(!PageLocked(page));
+       EBUG_ON(PageWriteback(page));
+
        for (i = 0; i < ARRAY_SIZE(s->s); i++) {
                disk_res.sectors += s->s[i].replicas_reserved;
                s->s[i].replicas_reserved = 0;
@@ -699,6 +704,8 @@ static void bch2_set_page_dirty(struct bch_fs *c,
        WARN_ON((u64) page_offset(page) + offset + len >
                round_up((u64) i_size_read(&inode->v), block_bytes(c)));
 
+       spin_lock(&s->lock);
+
        for (i = round_down(offset, block_bytes(c)) >> 9;
             i < round_up(offset + len, block_bytes(c)) >> 9;
             i++) {
@@ -715,6 +722,8 @@ static void bch2_set_page_dirty(struct bch_fs *c,
                s->s[i].state = max_t(unsigned, s->s[i].state, SECTOR_DIRTY);
        }
 
+       spin_unlock(&s->lock);
+
        if (dirty_sectors)
                i_sectors_acct(c, inode, &res->quota, dirty_sectors);
 
@@ -798,10 +807,6 @@ void bch2_invalidate_folio(struct folio *folio, size_t offset, size_t length)
 
 bool bch2_release_folio(struct folio *folio, gfp_t gfp_mask)
 {
-       /* XXX: this can't take locks that are held while we allocate memory */
-       EBUG_ON(!PageLocked(&folio->page));
-       EBUG_ON(folio_test_writeback(folio));
-
        if (folio_test_dirty(folio) || folio_test_writeback(folio))
                return false;
 
@@ -1183,11 +1188,11 @@ static void bch2_writepage_io_done(struct closure *cl)
                        SetPageError(bvec->bv_page);
                        mapping_set_error(bvec->bv_page->mapping, -EIO);
 
-                       lock_page(bvec->bv_page);
-                       s = bch2_page_state(bvec->bv_page);
+                       s = __bch2_page_state(bvec->bv_page);
+                       spin_lock(&s->lock);
                        for (i = 0; i < PAGE_SECTORS; i++)
                                s->s[i].nr_replicas = 0;
-                       unlock_page(bvec->bv_page);
+                       spin_unlock(&s->lock);
                }
        }
 
@@ -2978,9 +2983,12 @@ static void mark_range_unallocated(struct bch_inode_info *inode,
                        folio_lock(folio);
                        s = bch2_page_state(&folio->page);
 
-                       if (s)
+                       if (s) {
+                               spin_lock(&s->lock);
                                for (j = 0; j < PAGE_SECTORS; j++)
                                        s->s[j].nr_replicas = 0;
+                               spin_unlock(&s->lock);
+                       }
 
                        folio_unlock(folio);
                }