From: Linus Torvalds Date: Sat, 9 Jun 2018 00:21:52 +0000 (-0700) Subject: Merge tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdim... X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=7d3bf613e99abbd96ac7b90ee3694a246c975021;p=linux.git Merge tag 'libnvdimm-for-4.18' of git://git./linux/kernel/git/nvdimm/nvdimm Pull libnvdimm updates from Dan Williams: "This adds a user for the new 'bytes-remaining' updates to memcpy_mcsafe() that you already received through Ingo via the x86-dax- for-linus pull. Not included here, but still targeting this cycle, is support for handling memory media errors (poison) consumed via userspace dax mappings. Summary: - DAX broke a fundamental assumption of truncate of file mapped pages. The truncate path assumed that it is safe to disconnect a pinned page from a file and let the filesystem reclaim the physical block. With DAX the page is equivalent to the filesystem block. Introduce dax_layout_busy_page() to enable filesystems to wait for pinned DAX pages to be released. Without this wait a filesystem could allocate blocks under active device-DMA to a new file. - DAX arranges for the block layer to be bypassed and uses dax_direct_access() + copy_to_iter() to satisfy read(2) calls. However, the memcpy_mcsafe() facility is available through the pmem block driver. In order to safely handle media errors, via the DAX block-layer bypass, introduce copy_to_iter_mcsafe(). - Fix cache management policy relative to the ACPI NFIT Platform Capabilities Structure to properly elide cache flushes when they are not necessary. The table indicates whether CPU caches are power-fail protected. Clarify that a deep flush is always performed on REQ_{FUA,PREFLUSH} requests" * tag 'libnvdimm-for-4.18' of git://git.kernel.org/pub/scm/linux/kernel/git/nvdimm/nvdimm: (21 commits) dax: Use dax_write_cache* helpers libnvdimm, pmem: Do not flush power-fail protected CPU caches libnvdimm, pmem: Unconditionally deep flush on *sync libnvdimm, pmem: Complete REQ_FLUSH => REQ_PREFLUSH acpi, nfit: Remove ecc_unit_size dax: dax_insert_mapping_entry always succeeds libnvdimm, e820: Register all pmem resources libnvdimm: Debug probe times linvdimm, pmem: Preserve read-only setting for pmem devices x86, nfit_test: Add unit test for memcpy_mcsafe() pmem: Switch to copy_to_iter_mcsafe() dax: Report bytes remaining in dax_iomap_actor() dax: Introduce a ->copy_to_iter dax operation uio, lib: Fix CONFIG_ARCH_HAS_UACCESS_MCSAFE compilation xfs, dax: introduce xfs_break_dax_layouts() xfs: prepare xfs_break_layouts() for another layout type xfs: prepare xfs_break_layouts() to be called with XFS_MMAPLOCK_EXCL mm, fs, dax: handle layout changes to pinned dax mappings mm: fix __gup_device_huge vs unmap mm: introduce MEMORY_DEVICE_FS_DAX and CONFIG_DEV_PAGEMAP_OPS ... --- 7d3bf613e99abbd96ac7b90ee3694a246c975021 diff --cc drivers/dax/super.c index 1d7bd96511f00,88e77b7f0c4bb..903d9c473749c --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@@ -80,11 -80,13 +80,12 @@@ EXPORT_SYMBOL_GPL(fs_dax_get_by_bdev) * This is a library function for filesystems to check if the block device * can be mounted with dax option. * - * Return: negative errno if unsupported, 0 if supported. + * Return: true if supported, false if unsupported */ -int __bdev_dax_supported(struct super_block *sb, int blocksize) +bool __bdev_dax_supported(struct block_device *bdev, int blocksize) { - struct block_device *bdev = sb->s_bdev; struct dax_device *dax_dev; + bool dax_enabled = false; pgoff_t pgoff; int err, id; void *kaddr; @@@ -134,15 -135,22 +135,22 @@@ * on being able to do (page_address(pfn_to_page())). */ WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API)); + dax_enabled = true; } else if (pfn_t_devmap(pfn)) { - /* pass */; - } else { + struct dev_pagemap *pgmap; + + pgmap = get_dev_pagemap(pfn_t_to_pfn(pfn), NULL); + if (pgmap && pgmap->type == MEMORY_DEVICE_FS_DAX) + dax_enabled = true; + put_dev_pagemap(pgmap); + } + + if (!dax_enabled) { - pr_debug("VFS (%s): error: dax support not enabled\n", - sb->s_id); - return -EOPNOTSUPP; + pr_debug("%s: error: dax support not enabled\n", + bdevname(bdev, buf)); + return false; } - - return 0; + return true; } EXPORT_SYMBOL_GPL(__bdev_dax_supported); #endif diff --cc fs/dax.c index 08656a2f2aa6c,1f5f14a2ce4c5..641192808bb69 --- a/fs/dax.c +++ b/fs/dax.c @@@ -910,9 -1007,8 +1007,8 @@@ static vm_fault_t dax_load_hole(struct { struct inode *inode = mapping->host; unsigned long vaddr = vmf->address; - int ret = VM_FAULT_NOPAGE; + vm_fault_t ret = VM_FAULT_NOPAGE; struct page *zero_page; - void *entry2; pfn_t pfn; zero_page = ZERO_PAGE(0); @@@ -922,14 -1018,9 +1018,9 @@@ } pfn = page_to_pfn_t(zero_page); - entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn, - RADIX_DAX_ZERO_PAGE, false); - if (IS_ERR(entry2)) { - ret = VM_FAULT_SIGBUS; - goto out; - } - + dax_insert_mapping_entry(mapping, vmf, entry, pfn, RADIX_DAX_ZERO_PAGE, + false); - vm_insert_mixed(vmf->vma, vaddr, pfn); + ret = vmf_insert_mixed(vmf->vma, vaddr, pfn); out: trace_dax_load_hole(inode, vmf, ret); return ret; diff --cc fs/xfs/xfs_file.c index 0e3fb8978344d,19b0c3e0e2322..bed07dfbb85e9 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@@ -724,13 -702,85 +724,76 @@@ xfs_file_write_iter * allow an operation to fall back to buffered mode. */ ret = xfs_file_dio_aio_write(iocb, from); - if (ret == -EREMCHG) - goto buffered; - } else { -buffered: - ret = xfs_file_buffered_aio_write(iocb, from); + if (ret != -EREMCHG) + return ret; } - if (ret > 0) { - XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); - - /* Handle various SYNC-type writes */ - ret = generic_write_sync(iocb, ret); - } - return ret; + return xfs_file_buffered_aio_write(iocb, from); } + static void + xfs_wait_dax_page( + struct inode *inode, + bool *did_unlock) + { + struct xfs_inode *ip = XFS_I(inode); + + *did_unlock = true; + xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); + schedule(); + xfs_ilock(ip, XFS_MMAPLOCK_EXCL); + } + + static int + xfs_break_dax_layouts( + struct inode *inode, + uint iolock, + bool *did_unlock) + { + struct page *page; + + ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); + + page = dax_layout_busy_page(inode->i_mapping); + if (!page) + return 0; + + return ___wait_var_event(&page->_refcount, + atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, + 0, 0, xfs_wait_dax_page(inode, did_unlock)); + } + + int + xfs_break_layouts( + struct inode *inode, + uint *iolock, + enum layout_break_reason reason) + { + bool retry; + int error; + + ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); + + do { + retry = false; + switch (reason) { + case BREAK_UNMAP: + error = xfs_break_dax_layouts(inode, *iolock, &retry); + if (error || retry) + break; + /* fall through */ + case BREAK_WRITE: + error = xfs_break_leased_layouts(inode, iolock, &retry); + break; + default: + WARN_ON_ONCE(1); + error = -EINVAL; + } + } while (error == 0 && retry); + + return error; + } + #define XFS_FALLOC_FL_SUPPORTED \ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ diff --cc include/linux/dax.h index 88504e87cd6ca,b51db4264c83b..3855e3800f483 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@@ -83,11 -86,12 +86,13 @@@ static inline void fs_put_dax(struct da struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev); int dax_writeback_mapping_range(struct address_space *mapping, struct block_device *bdev, struct writeback_control *wbc); + + struct page *dax_layout_busy_page(struct address_space *mapping); #else -static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +static inline bool bdev_dax_supported(struct block_device *bdev, + int blocksize) { - return -EOPNOTSUPP; + return false; } static inline struct dax_device *fs_dax_get_by_host(const char *host)