vfio: remove VFIO_GROUP_NOTIFY_SET_KVM
authorMatthew Rosato <mjrosato@linux.ibm.com>
Thu, 19 May 2022 18:33:11 +0000 (14:33 -0400)
committerAlex Williamson <alex.williamson@redhat.com>
Tue, 24 May 2022 14:41:18 +0000 (08:41 -0600)
Rather than relying on a notifier for associating the KVM with
the group, let's assume that the association has already been
made prior to device_open.  The first time a device is opened
associate the group KVM with the device.

This fixes a user-triggerable oops in GVT.

Reviewed-by: Tony Krowiak <akrowiak@linux.ibm.com>
Reviewed-by: Kevin Tian <kevin.tian@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
Signed-off-by: Matthew Rosato <mjrosato@linux.ibm.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Acked-by: Zhi Wang <zhi.a.wang@intel.com>
Link: https://lore.kernel.org/r/20220519183311.582380-2-mjrosato@linux.ibm.com
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
drivers/gpu/drm/i915/gvt/gtt.c
drivers/gpu/drm/i915/gvt/gvt.h
drivers/gpu/drm/i915/gvt/kvmgt.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/s390/crypto/vfio_ap_private.h
drivers/vfio/vfio.c
include/linux/vfio.h

index 9c5cc28009758df7477242753a85d2c46c45110b..b4f69364f9a1392ea27dc5f5267075a0fa5e0497 100644 (file)
@@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192;
 
 static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn)
 {
-       struct kvm *kvm = vgpu->kvm;
+       struct kvm *kvm = vgpu->vfio_device.kvm;
        int idx;
        bool ret;
 
@@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu,
 
        if (!vgpu->attached)
                return -EINVAL;
-       pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry));
+       pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry));
        if (is_error_noslot_pfn(pfn))
                return -EINVAL;
        return PageTransHuge(pfn_to_page(pfn));
index 2af4c83e733c6cc697b173ae33c472b69df975ac..aee1a45da74bcb717c325b80ef36d2305af4f175 100644 (file)
@@ -227,9 +227,6 @@ struct intel_vgpu {
        struct mutex cache_lock;
 
        struct notifier_block iommu_notifier;
-       struct notifier_block group_notifier;
-       struct kvm *kvm;
-       struct work_struct release_work;
        atomic_t released;
 
        struct kvm_page_track_notifier_node track_node;
index 7655ffa97d511668939885a099e7e83f99fa23ed..e2f6c56ab3420cb7cacb0238ecd6eb51073af8fa 100644 (file)
@@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt)
        }
 }
 
-static void intel_vgpu_release_work(struct work_struct *work);
-
 static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn,
                unsigned long size)
 {
@@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
-static int intel_vgpu_group_notifier(struct notifier_block *nb,
-                                    unsigned long action, void *data)
-{
-       struct intel_vgpu *vgpu =
-               container_of(nb, struct intel_vgpu, group_notifier);
-
-       /* the only action we care about */
-       if (action == VFIO_GROUP_NOTIFY_SET_KVM) {
-               vgpu->kvm = data;
-
-               if (!data)
-                       schedule_work(&vgpu->release_work);
-       }
-
-       return NOTIFY_OK;
-}
-
 static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
 {
        struct intel_vgpu *itr;
@@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu)
                if (!itr->attached)
                        continue;
 
-               if (vgpu->kvm == itr->kvm) {
+               if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) {
                        ret = true;
                        goto out;
                }
@@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
        int ret;
 
        vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier;
-       vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier;
 
        events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
        ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events,
@@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
                goto out;
        }
 
-       events = VFIO_GROUP_NOTIFY_SET_KVM;
-       ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events,
-                                    &vgpu->group_notifier);
-       if (ret != 0) {
-               gvt_vgpu_err("vfio_register_notifier for group failed: %d\n",
-                       ret);
-               goto undo_iommu;
-       }
-
        ret = -EEXIST;
        if (vgpu->attached)
-               goto undo_register;
+               goto undo_iommu;
 
        ret = -ESRCH;
-       if (!vgpu->kvm || vgpu->kvm->mm != current->mm) {
+       if (!vgpu->vfio_device.kvm ||
+           vgpu->vfio_device.kvm->mm != current->mm) {
                gvt_vgpu_err("KVM is required to use Intel vGPU\n");
-               goto undo_register;
+               goto undo_iommu;
        }
 
+       kvm_get_kvm(vgpu->vfio_device.kvm);
+
        ret = -EEXIST;
        if (__kvmgt_vgpu_exist(vgpu))
-               goto undo_register;
+               goto undo_iommu;
 
        vgpu->attached = true;
-       kvm_get_kvm(vgpu->kvm);
 
        kvmgt_protect_table_init(vgpu);
        gvt_cache_init(vgpu);
 
        vgpu->track_node.track_write = kvmgt_page_track_write;
        vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
-       kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node);
+       kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
+                                        &vgpu->track_node);
 
        debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs,
                             &vgpu->nr_cache_entries);
@@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
        atomic_set(&vgpu->released, 0);
        return 0;
 
-undo_register:
-       vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY,
-                                &vgpu->group_notifier);
-
 undo_iommu:
        vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY,
                                 &vgpu->iommu_notifier);
@@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu)
        }
 }
 
-static void __intel_vgpu_release(struct intel_vgpu *vgpu)
+static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 {
+       struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
        struct drm_i915_private *i915 = vgpu->gvt->gt->i915;
        int ret;
 
@@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
        drm_WARN(&i915->drm, ret,
                 "vfio_unregister_notifier for iommu failed: %d\n", ret);
 
-       ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY,
-                                      &vgpu->group_notifier);
-       drm_WARN(&i915->drm, ret,
-                "vfio_unregister_notifier for group failed: %d\n", ret);
-
        debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
 
-       kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node);
-       kvm_put_kvm(vgpu->kvm);
+       kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
+                                          &vgpu->track_node);
        kvmgt_protect_table_destroy(vgpu);
        gvt_cache_destroy(vgpu);
 
        intel_vgpu_release_msi_eventfd_ctx(vgpu);
 
-       vgpu->kvm = NULL;
        vgpu->attached = false;
-}
-
-static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
-{
-       __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev));
-}
-
-static void intel_vgpu_release_work(struct work_struct *work)
-{
-       struct intel_vgpu *vgpu =
-               container_of(work, struct intel_vgpu, release_work);
 
-       __intel_vgpu_release(vgpu);
+       if (vgpu->vfio_device.kvm)
+               kvm_put_kvm(vgpu->vfio_device.kvm);
 }
 
 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev)
                return PTR_ERR(vgpu);
        }
 
-       INIT_WORK(&vgpu->release_work, intel_vgpu_release_work);
        vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev,
                            &intel_vgpu_dev_ops);
 
@@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = {
 
 int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->kvm;
+       struct kvm *kvm = info->vfio_device.kvm;
        struct kvm_memory_slot *slot;
        int idx;
 
@@ -1743,7 +1697,7 @@ out:
 
 int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn)
 {
-       struct kvm *kvm = info->kvm;
+       struct kvm *kvm = info->vfio_device.kvm;
        struct kvm_memory_slot *slot;
        int idx;
 
index e8914024f5b1af6606f50b901d8f5b4dec0555e0..a7d2a95796d36039463b968000a24893b1084da3 100644 (file)
@@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev)
        }
 }
 
-static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
-                                      unsigned long action, void *data)
-{
-       int notify_rc = NOTIFY_OK;
-       struct ap_matrix_mdev *matrix_mdev;
-
-       if (action != VFIO_GROUP_NOTIFY_SET_KVM)
-               return NOTIFY_OK;
-
-       matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier);
-
-       if (!data)
-               vfio_ap_mdev_unset_kvm(matrix_mdev);
-       else if (vfio_ap_mdev_set_kvm(matrix_mdev, data))
-               notify_rc = NOTIFY_DONE;
-
-       return notify_rc;
-}
-
 static struct vfio_ap_queue *vfio_ap_find_queue(int apqn)
 {
        struct device *dev;
@@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
        unsigned long events;
        int ret;
 
-       matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier;
-       events = VFIO_GROUP_NOTIFY_SET_KVM;
+       if (!vdev->kvm)
+               return -EINVAL;
 
-       ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events,
-                                    &matrix_mdev->group_notifier);
+       ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm);
        if (ret)
                return ret;
 
@@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev)
        ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events,
                                     &matrix_mdev->iommu_notifier);
        if (ret)
-               goto out_unregister_group;
+               goto err_kvm;
        return 0;
 
-out_unregister_group:
-       vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
-                                &matrix_mdev->group_notifier);
+err_kvm:
+       vfio_ap_mdev_unset_kvm(matrix_mdev);
        return ret;
 }
 
@@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev)
 
        vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY,
                                 &matrix_mdev->iommu_notifier);
-       vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY,
-                                &matrix_mdev->group_notifier);
        vfio_ap_mdev_unset_kvm(matrix_mdev);
 }
 
index 648fcaf8104abb7e2a0cd342fc041b564c61b8d9..a26efd804d0df3a6fe616716d336349bacaf2bd0 100644 (file)
@@ -81,8 +81,6 @@ struct ap_matrix {
  * @node:      allows the ap_matrix_mdev struct to be added to a list
  * @matrix:    the adapters, usage domains and control domains assigned to the
  *             mediated matrix device.
- * @group_notifier: notifier block used for specifying callback function for
- *                 handling the VFIO_GROUP_NOTIFY_SET_KVM event
  * @iommu_notifier: notifier block used for specifying callback function for
  *                 handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even
  * @kvm:       the struct holding guest's state
@@ -94,7 +92,6 @@ struct ap_matrix_mdev {
        struct vfio_device vdev;
        struct list_head node;
        struct ap_matrix matrix;
-       struct notifier_block group_notifier;
        struct notifier_block iommu_notifier;
        struct kvm *kvm;
        crypto_hook pqap_hook;
index f5ed03897210c3d18f9a0d39d85e16b18c0cbfde..e22be13e67711039516a51f9e703ac05951ef268 100644 (file)
@@ -1088,10 +1088,21 @@ static struct file *vfio_device_open(struct vfio_device *device)
 
        mutex_lock(&device->dev_set->lock);
        device->open_count++;
-       if (device->open_count == 1 && device->ops->open_device) {
-               ret = device->ops->open_device(device);
-               if (ret)
-                       goto err_undo_count;
+       if (device->open_count == 1) {
+               /*
+                * Here we pass the KVM pointer with the group under the read
+                * lock.  If the device driver will use it, it must obtain a
+                * reference and release it during close_device.
+                */
+               down_read(&device->group->group_rwsem);
+               device->kvm = device->group->kvm;
+
+               if (device->ops->open_device) {
+                       ret = device->ops->open_device(device);
+                       if (ret)
+                               goto err_undo_count;
+               }
+               up_read(&device->group->group_rwsem);
        }
        mutex_unlock(&device->dev_set->lock);
 
@@ -1124,10 +1135,14 @@ static struct file *vfio_device_open(struct vfio_device *device)
 
 err_close_device:
        mutex_lock(&device->dev_set->lock);
+       down_read(&device->group->group_rwsem);
        if (device->open_count == 1 && device->ops->close_device)
                device->ops->close_device(device);
 err_undo_count:
        device->open_count--;
+       if (device->open_count == 0 && device->kvm)
+               device->kvm = NULL;
+       up_read(&device->group->group_rwsem);
        mutex_unlock(&device->dev_set->lock);
        module_put(device->dev->driver->owner);
 err_unassign_container:
@@ -1320,9 +1335,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep)
 
        mutex_lock(&device->dev_set->lock);
        vfio_assert_device_open(device);
+       down_read(&device->group->group_rwsem);
        if (device->open_count == 1 && device->ops->close_device)
                device->ops->close_device(device);
+       up_read(&device->group->group_rwsem);
        device->open_count--;
+       if (device->open_count == 0)
+               device->kvm = NULL;
        mutex_unlock(&device->dev_set->lock);
 
        module_put(device->dev->driver->owner);
@@ -1731,8 +1750,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
  * @file: VFIO group file
  * @kvm: KVM to link
  *
- * The kvm pointer will be forwarded to all the vfio_device's attached to the
- * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier.
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the group.
  */
 void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
 {
@@ -1743,8 +1762,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
 
        down_write(&group->group_rwsem);
        group->kvm = kvm;
-       blocking_notifier_call_chain(&group->notifier,
-                                    VFIO_GROUP_NOTIFY_SET_KVM, kvm);
        up_write(&group->group_rwsem);
 }
 EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
@@ -2011,7 +2028,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
        struct vfio_iommu_driver *driver;
        int ret;
 
-       down_read(&group->group_rwsem);
+       lockdep_assert_held_read(&group->group_rwsem);
+
        container = group->container;
        driver = container->iommu_driver;
        if (likely(driver && driver->ops->register_notifier))
@@ -2019,7 +2037,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group,
                                                     events, nb);
        else
                ret = -ENOTTY;
-       up_read(&group->group_rwsem);
 
        return ret;
 }
@@ -2031,7 +2048,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
        struct vfio_iommu_driver *driver;
        int ret;
 
-       down_read(&group->group_rwsem);
+       lockdep_assert_held_read(&group->group_rwsem);
+
        container = group->container;
        driver = container->iommu_driver;
        if (likely(driver && driver->ops->unregister_notifier))
@@ -2039,47 +2057,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group,
                                                       nb);
        else
                ret = -ENOTTY;
-       up_read(&group->group_rwsem);
 
        return ret;
 }
 
-static int vfio_register_group_notifier(struct vfio_group *group,
-                                       unsigned long *events,
-                                       struct notifier_block *nb)
-{
-       int ret;
-       bool set_kvm = false;
-
-       if (*events & VFIO_GROUP_NOTIFY_SET_KVM)
-               set_kvm = true;
-
-       /* clear known events */
-       *events &= ~VFIO_GROUP_NOTIFY_SET_KVM;
-
-       /* refuse to continue if still events remaining */
-       if (*events)
-               return -EINVAL;
-
-       ret = blocking_notifier_chain_register(&group->notifier, nb);
-       if (ret)
-               return ret;
-
-       /*
-        * The attaching of kvm and vfio_group might already happen, so
-        * here we replay once upon registration.
-        */
-       if (set_kvm) {
-               down_read(&group->group_rwsem);
-               if (group->kvm)
-                       blocking_notifier_call_chain(&group->notifier,
-                                                    VFIO_GROUP_NOTIFY_SET_KVM,
-                                                    group->kvm);
-               up_read(&group->group_rwsem);
-       }
-       return 0;
-}
-
 int vfio_register_notifier(struct vfio_device *device,
                           enum vfio_notify_type type, unsigned long *events,
                           struct notifier_block *nb)
@@ -2095,9 +2076,6 @@ int vfio_register_notifier(struct vfio_device *device,
        case VFIO_IOMMU_NOTIFY:
                ret = vfio_register_iommu_notifier(group, events, nb);
                break;
-       case VFIO_GROUP_NOTIFY:
-               ret = vfio_register_group_notifier(group, events, nb);
-               break;
        default:
                ret = -EINVAL;
        }
@@ -2119,9 +2097,6 @@ int vfio_unregister_notifier(struct vfio_device *device,
        case VFIO_IOMMU_NOTIFY:
                ret = vfio_unregister_iommu_notifier(group, nb);
                break;
-       case VFIO_GROUP_NOTIFY:
-               ret = blocking_notifier_chain_unregister(&group->notifier, nb);
-               break;
        default:
                ret = -EINVAL;
        }
index 45b287826ce686a7e5c736234620323b6fb3afd5..aa888cc517578e7031ef946a2c11e99ce9352ad0 100644 (file)
@@ -36,6 +36,8 @@ struct vfio_device {
        struct vfio_device_set *dev_set;
        struct list_head dev_set_list;
        unsigned int migration_flags;
+       /* Driver must reference the kvm during open_device or never touch it */
+       struct kvm *kvm;
 
        /* Members below here are private, not for driver use */
        refcount_t refcount;
@@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova,
 /* each type has independent events */
 enum vfio_notify_type {
        VFIO_IOMMU_NOTIFY = 0,
-       VFIO_GROUP_NOTIFY = 1,
 };
 
 /* events for VFIO_IOMMU_NOTIFY */
 #define VFIO_IOMMU_NOTIFY_DMA_UNMAP    BIT(0)
 
-/* events for VFIO_GROUP_NOTIFY */
-#define VFIO_GROUP_NOTIFY_SET_KVM      BIT(0)
-
 extern int vfio_register_notifier(struct vfio_device *device,
                                  enum vfio_notify_type type,
                                  unsigned long *required_events,