From: Linus Torvalds Date: Thu, 15 Dec 2022 21:12:15 +0000 (-0800) Subject: Merge tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=785d21ba2f447fb26df4b22f45653763beb767ea;p=linux.git Merge tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio Pull VFIO updates from Alex Williamson: - Replace deprecated git://github.com link in MAINTAINERS (Palmer Dabbelt) - Simplify vfio/mlx5 with module_pci_driver() helper (Shang XiaoJing) - Drop unnecessary buffer from ACPI call (Rafael Mendonca) - Correct latent missing include issue in iova-bitmap and fix support for unaligned bitmaps. Follow-up with better fix through refactor (Joao Martins) - Rework ccw mdev driver to split private data from parent structure, better aligning with the mdev lifecycle and allowing us to remove a temporary workaround (Eric Farman) - Add an interface to get an estimated migration data size for a device, allowing userspace to make informed decisions, ex. more accurately predicting VM downtime (Yishai Hadas) - Fix minor typo in vfio/mlx5 array declaration (Yishai Hadas) - Simplify module and Kconfig through consolidating SPAPR/EEH code and config options and folding virqfd module into main vfio module (Jason Gunthorpe) - Fix error path from device_register() across all vfio mdev and sample drivers (Alex Williamson) - Define migration pre-copy interface and implement for vfio/mlx5 devices, allowing portions of the device state to be saved while the device continues operation, towards reducing the stop-copy state size (Jason Gunthorpe, Yishai Hadas, Shay Drory) - Implement pre-copy for hisi_acc devices (Shameer Kolothum) - Fixes to mdpy mdev driver remove path and error path on probe (Shang XiaoJing) - vfio/mlx5 fixes for incorrect return after copy_to_user() fault and incorrect buffer freeing (Dan Carpenter) * tag 'vfio-v6.2-rc1' of https://github.com/awilliam/linux-vfio: (42 commits) vfio/mlx5: error pointer dereference in error handling vfio/mlx5: fix error code in mlx5vf_precopy_ioctl() samples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe() hisi_acc_vfio_pci: Enable PRE_COPY flag hisi_acc_vfio_pci: Move the dev compatibility tests for early check hisi_acc_vfio_pci: Introduce support for PRE_COPY state transitions hisi_acc_vfio_pci: Add support for precopy IOCTL vfio/mlx5: Enable MIGRATION_PRE_COPY flag vfio/mlx5: Fallback to STOP_COPY upon specific PRE_COPY error vfio/mlx5: Introduce multiple loads vfio/mlx5: Consider temporary end of stream as part of PRE_COPY vfio/mlx5: Introduce vfio precopy ioctl implementation vfio/mlx5: Introduce SW headers for migration states vfio/mlx5: Introduce device transitions of PRE_COPY vfio/mlx5: Refactor to use queue based data chunks vfio/mlx5: Refactor migration file state vfio/mlx5: Refactor MKEY usage vfio/mlx5: Refactor PD usage vfio/mlx5: Enforce a single SAVE command at a time vfio: Extend the device migration protocol with PRE_COPY ... --- 785d21ba2f447fb26df4b22f45653763beb767ea diff --cc drivers/s390/cio/vfio_ccw_fsm.c index 0a5e8b4a67434,e67fad897af3a..2784a4e4d2bef --- a/drivers/s390/cio/vfio_ccw_fsm.c +++ b/drivers/s390/cio/vfio_ccw_fsm.c @@@ -25,11 -25,9 +25,9 @@@ static int fsm_io_helper(struct vfio_cc unsigned long flags; int ret; - sch = private->sch; - spin_lock_irqsave(sch->lock, flags); - orb = cp_get_orb(&private->cp, (u32)(addr_t)sch, sch->lpm); + orb = cp_get_orb(&private->cp, (u32)virt_to_phys(sch), sch->lpm); if (!orb) { ret = -EIO; goto out; diff --cc drivers/vfio/Kconfig index 286c1663bd756,0b8d53f63c7e5..a8f5446294674 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@@ -46,17 -38,6 +46,12 @@@ config VFIO_NOIOMM this mode since there is no IOMMU to provide DMA translation. If you don't know what to do here, say N. +endif + - config VFIO_SPAPR_EEH - tristate - depends on EEH && VFIO_IOMMU_SPAPR_TCE - default VFIO - +config VFIO_VIRQFD - tristate ++ bool + select EVENTFD + default n source "drivers/vfio/pci/Kconfig" source "drivers/vfio/platform/Kconfig" diff --cc drivers/vfio/Makefile index 3783db7e8082c,0721ed4831c92..70e7dcb302efd --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@@ -2,17 -2,12 +2,14 @@@ obj-$(CONFIG_VFIO) += vfio.o vfio-y += vfio_main.o \ - iova_bitmap.o \ - container.o + group.o \ + iova_bitmap.o +vfio-$(CONFIG_IOMMUFD) += iommufd.o +vfio-$(CONFIG_VFIO_CONTAINER) += container.o + vfio-$(CONFIG_VFIO_VIRQFD) += virqfd.o - obj-$(CONFIG_VFIO_VIRQFD) += vfio_virqfd.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o - obj-$(CONFIG_VFIO_SPAPR_EEH) += vfio_spapr_eeh.o obj-$(CONFIG_VFIO_PCI) += pci/ obj-$(CONFIG_VFIO_PLATFORM) += platform/ obj-$(CONFIG_VFIO_MDEV) += mdev/ diff --cc drivers/vfio/vfio.h index 2e05418fd18df,a7113b4baaa24..f8219a438bfbf --- a/drivers/vfio/vfio.h +++ b/drivers/vfio/vfio.h @@@ -144,94 -119,24 +144,107 @@@ int vfio_container_attach_group(struct void vfio_group_detach_container(struct vfio_group *group); void vfio_device_container_register(struct vfio_device *device); void vfio_device_container_unregister(struct vfio_device *device); -long vfio_container_ioctl_check_extension(struct vfio_container *container, - unsigned long arg); +int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages); +void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage); +int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write); + int __init vfio_container_init(void); void vfio_container_cleanup(void); +#else +static inline struct vfio_container * +vfio_container_from_file(struct file *filep) +{ + return NULL; +} + +static inline int vfio_group_use_container(struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_unuse_container(struct vfio_group *group) +{ +} + +static inline int vfio_container_attach_group(struct vfio_container *container, + struct vfio_group *group) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_group_detach_container(struct vfio_group *group) +{ +} + +static inline void vfio_device_container_register(struct vfio_device *device) +{ +} + +static inline void vfio_device_container_unregister(struct vfio_device *device) +{ +} + +static inline int vfio_device_container_pin_pages(struct vfio_device *device, + dma_addr_t iova, int npage, + int prot, struct page **pages) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_device_container_unpin_pages(struct vfio_device *device, + dma_addr_t iova, int npage) +{ +} + +static inline int vfio_device_container_dma_rw(struct vfio_device *device, + dma_addr_t iova, void *data, + size_t len, bool write) +{ + return -EOPNOTSUPP; +} + +static inline int vfio_container_init(void) +{ + return 0; +} +static inline void vfio_container_cleanup(void) +{ +} +#endif + +#if IS_ENABLED(CONFIG_IOMMUFD) +int vfio_iommufd_bind(struct vfio_device *device, struct iommufd_ctx *ictx); +void vfio_iommufd_unbind(struct vfio_device *device); +#else +static inline int vfio_iommufd_bind(struct vfio_device *device, + struct iommufd_ctx *ictx) +{ + return -EOPNOTSUPP; +} + +static inline void vfio_iommufd_unbind(struct vfio_device *device) +{ +} +#endif + #if IS_ENABLED(CONFIG_VFIO_VIRQFD) + int __init vfio_virqfd_init(void); + void vfio_virqfd_exit(void); + #else + static inline int __init vfio_virqfd_init(void) + { + return 0; + } + static inline void vfio_virqfd_exit(void) + { + } + #endif + #ifdef CONFIG_VFIO_NOIOMMU extern bool vfio_noiommu __read_mostly; #else diff --cc drivers/vfio/vfio_main.c index e21ff965141e6,03dbcd3d96f0e..5177bb061b17b --- a/drivers/vfio/vfio_main.c +++ b/drivers/vfio/vfio_main.c @@@ -241,28 -420,95 +239,16 @@@ out_uninit ida_free(&vfio.device_ida, device->index); return ret; } - EXPORT_SYMBOL_GPL(vfio_init_device); - - /* - * The helper called by driver @release callback to free the device - * structure. Drivers which don't have private data to clean can - * simply use this helper as its @release. - */ - void vfio_free_device(struct vfio_device *device) - { - kvfree(device); - } - EXPORT_SYMBOL_GPL(vfio_free_device); -static struct vfio_group *vfio_noiommu_group_alloc(struct device *dev, - enum vfio_group_type type) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - int ret; - - iommu_group = iommu_group_alloc(); - if (IS_ERR(iommu_group)) - return ERR_CAST(iommu_group); - - ret = iommu_group_set_name(iommu_group, "vfio-noiommu"); - if (ret) - goto out_put_group; - ret = iommu_group_add_device(iommu_group, dev); - if (ret) - goto out_put_group; - - group = vfio_create_group(iommu_group, type); - if (IS_ERR(group)) { - ret = PTR_ERR(group); - goto out_remove_device; - } - iommu_group_put(iommu_group); - return group; - -out_remove_device: - iommu_group_remove_device(dev); -out_put_group: - iommu_group_put(iommu_group); - return ERR_PTR(ret); -} - -static struct vfio_group *vfio_group_find_or_alloc(struct device *dev) -{ - struct iommu_group *iommu_group; - struct vfio_group *group; - - iommu_group = iommu_group_get(dev); - if (!iommu_group && vfio_noiommu) { - /* - * With noiommu enabled, create an IOMMU group for devices that - * don't already have one, implying no IOMMU hardware/driver - * exists. Taint the kernel because we're about to give a DMA - * capable device to a user without IOMMU protection. - */ - group = vfio_noiommu_group_alloc(dev, VFIO_NO_IOMMU); - if (!IS_ERR(group)) { - add_taint(TAINT_USER, LOCKDEP_STILL_OK); - dev_warn(dev, "Adding kernel taint for vfio-noiommu group on device\n"); - } - return group; - } - - if (!iommu_group) - return ERR_PTR(-EINVAL); - - /* - * VFIO always sets IOMMU_CACHE because we offer no way for userspace to - * restore cache coherency. It has to be checked here because it is only - * valid for cases where we are using iommu groups. - */ - if (!device_iommu_capable(dev, IOMMU_CAP_CACHE_COHERENCY)) { - iommu_group_put(iommu_group); - return ERR_PTR(-EINVAL); - } - - group = vfio_group_get_from_iommu(iommu_group); - if (!group) - group = vfio_create_group(iommu_group, VFIO_IOMMU); - - /* The vfio_group holds a reference to the iommu_group */ - iommu_group_put(iommu_group); - return group; -} - static int __vfio_register_dev(struct vfio_device *device, - struct vfio_group *group) + enum vfio_group_type type) { - struct vfio_device *existing_device; int ret; - /* - * In all cases group is the output of one of the group allocation - * functions and we have group->drivers incremented for us. - */ - if (IS_ERR(group)) - return PTR_ERR(group); + if (WARN_ON(device->ops->bind_iommufd && + (!device->ops->unbind_iommufd || + !device->ops->attach_ioas))) + return -EINVAL; /* * If the driver doesn't specify a set then the device is added to a @@@ -1260,6 -1902,19 +1348,10 @@@ static int __init vfio_init(void if (ret) return ret; + ret = vfio_virqfd_init(); + if (ret) + goto err_virqfd; + - /* /dev/vfio/$GROUP */ - vfio.class = class_create(THIS_MODULE, "vfio"); - if (IS_ERR(vfio.class)) { - ret = PTR_ERR(vfio.class); - goto err_group_class; - } - - vfio.class->devnode = vfio_devnode; - /* /sys/class/vfio-dev/vfioX */ vfio.device_class = class_create(THIS_MODULE, "vfio-dev"); if (IS_ERR(vfio.device_class)) { @@@ -1270,17 -1925,36 +1362,20 @@@ pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); return 0; -err_alloc_chrdev: - class_destroy(vfio.device_class); - vfio.device_class = NULL; err_dev_class: - class_destroy(vfio.class); - vfio.class = NULL; -err_group_class: + vfio_virqfd_exit(); + err_virqfd: - vfio_container_cleanup(); + vfio_group_cleanup(); return ret; } static void __exit vfio_cleanup(void) { - WARN_ON(!list_empty(&vfio.group_list)); - ida_destroy(&vfio.device_ida); - ida_destroy(&vfio.group_ida); - unregister_chrdev_region(vfio.group_devt, MINORMASK + 1); class_destroy(vfio.device_class); vfio.device_class = NULL; - class_destroy(vfio.class); + vfio_virqfd_exit(); - vfio_container_cleanup(); - vfio.class = NULL; + vfio_group_cleanup(); xa_destroy(&vfio_device_set_xa); }