From eb8e6e9ccbb4ba37c04a7cff032975b4df7d63c7 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 11 Nov 2020 12:33:12 -0500 Subject: [PATCH] bcachefs: Deadlock prevention for ei_pagecache_lock In the dio write path, when get_user_pages() invokes the fault handler we have a recursive locking situation - we have to handle the lock ordering ourselves or we have a deadlock: this patch addresses that by checking for locking ordering violations and doing the unlock/relock dance if necessary. Signed-off-by: Kent Overstreet Signed-off-by: Kent Overstreet --- fs/bcachefs/fs-io.c | 68 +++++++++++++++++++++++++++++++++++++++++++-- fs/bcachefs/fs.c | 5 ++++ fs/bcachefs/fs.h | 1 + 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c index 658d19c04b995..1afdd775ffb3b 100644 --- a/fs/bcachefs/fs-io.c +++ b/fs/bcachefs/fs-io.c @@ -44,6 +44,22 @@ static inline bool bio_full(struct bio *bio, unsigned len) return false; } +static inline struct address_space *faults_disabled_mapping(void) +{ + return (void *) (((unsigned long) current->faults_disabled_mapping) & ~1UL); +} + +static inline void set_fdm_dropped_locks(void) +{ + current->faults_disabled_mapping = + (void *) (((unsigned long) current->faults_disabled_mapping)|1); +} + +static inline bool fdm_dropped_locks(void) +{ + return ((unsigned long) current->faults_disabled_mapping) & 1; +} + struct quota_res { u64 sectors; }; @@ -501,10 +517,35 @@ static void bch2_set_page_dirty(struct bch_fs *c, vm_fault_t bch2_page_fault(struct vm_fault *vmf) { struct file *file = vmf->vma->vm_file; + struct address_space *mapping = file->f_mapping; + struct address_space *fdm = faults_disabled_mapping(); struct bch_inode_info *inode = file_bch_inode(file); int ret; + if (fdm == mapping) + return VM_FAULT_SIGBUS; + + /* Lock ordering: */ + if (fdm > mapping) { + struct bch_inode_info *fdm_host = to_bch_ei(fdm->host); + + if (bch2_pagecache_add_tryget(&inode->ei_pagecache_lock)) + goto got_lock; + + bch2_pagecache_block_put(&fdm_host->ei_pagecache_lock); + + bch2_pagecache_add_get(&inode->ei_pagecache_lock); + bch2_pagecache_add_put(&inode->ei_pagecache_lock); + + bch2_pagecache_block_get(&fdm_host->ei_pagecache_lock); + + /* Signal that lock has been dropped: */ + set_fdm_dropped_locks(); + return VM_FAULT_SIGBUS; + } + bch2_pagecache_add_get(&inode->ei_pagecache_lock); +got_lock: ret = filemap_fault(vmf); bch2_pagecache_add_put(&inode->ei_pagecache_lock); @@ -1765,14 +1806,16 @@ static long bch2_dio_write_loop(struct dio_write *dio) struct bio *bio = &dio->op.wbio.bio; struct bvec_iter_all iter; struct bio_vec *bv; - unsigned unaligned; - bool sync = dio->sync; + unsigned unaligned, iter_count; + bool sync = dio->sync, dropped_locks; long ret; if (dio->loop) goto loop; while (1) { + iter_count = dio->iter.count; + if (kthread) kthread_use_mm(dio->mm); BUG_ON(current->faults_disabled_mapping); @@ -1780,13 +1823,34 @@ static long bch2_dio_write_loop(struct dio_write *dio) ret = bio_iov_iter_get_pages(bio, &dio->iter); + dropped_locks = fdm_dropped_locks(); + current->faults_disabled_mapping = NULL; if (kthread) kthread_unuse_mm(dio->mm); + /* + * If the fault handler returned an error but also signalled + * that it dropped & retook ei_pagecache_lock, we just need to + * re-shoot down the page cache and retry: + */ + if (dropped_locks && ret) + ret = 0; + if (unlikely(ret < 0)) goto err; + if (unlikely(dropped_locks)) { + ret = write_invalidate_inode_pages_range(mapping, + req->ki_pos, + req->ki_pos + iter_count - 1); + if (unlikely(ret)) + goto err; + + if (!bio->bi_iter.bi_size) + continue; + } + unaligned = bio->bi_iter.bi_size & (block_bytes(c) - 1); bio->bi_iter.bi_size -= unaligned; iov_iter_revert(&dio->iter, unaligned); diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 3e3ab4e53f33f..231a5433577f1 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -93,6 +93,11 @@ void bch2_pagecache_add_put(struct pagecache_lock *lock) __pagecache_lock_put(lock, 1); } +bool bch2_pagecache_add_tryget(struct pagecache_lock *lock) +{ + return __pagecache_lock_tryget(lock, 1); +} + void bch2_pagecache_add_get(struct pagecache_lock *lock) { __pagecache_lock_get(lock, 1); diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h index b3a2993dd9bc7..7c095b856b056 100644 --- a/fs/bcachefs/fs.h +++ b/fs/bcachefs/fs.h @@ -26,6 +26,7 @@ static inline void pagecache_lock_init(struct pagecache_lock *lock) } void bch2_pagecache_add_put(struct pagecache_lock *); +bool bch2_pagecache_add_tryget(struct pagecache_lock *); void bch2_pagecache_add_get(struct pagecache_lock *); void bch2_pagecache_block_put(struct pagecache_lock *); void bch2_pagecache_block_get(struct pagecache_lock *); -- 2.30.2