iommu/vt-d: Use rbtree to track iommu probed devices
authorLu Baolu <baolu.lu@linux.intel.com>
Tue, 27 Feb 2024 02:14:40 +0000 (10:14 +0800)
committerJoerg Roedel <jroedel@suse.de>
Fri, 1 Mar 2024 12:51:20 +0000 (13:51 +0100)
Use a red-black tree(rbtree) to track devices probed by the driver's
probe_device callback. These devices need to be looked up quickly by
a source ID when the hardware reports a fault, either recoverable or
unrecoverable.

Fault reporting paths are critical. Searching a list in this scenario
is inefficient, with an algorithm complexity of O(n). An rbtree is a
self-balancing binary search tree, offering an average search time
complexity of O(log(n)). This significant performance improvement
makes rbtrees a better choice.

Furthermore, rbtrees are implemented on a per-iommu basis, eliminating
the need for global searches and further enhancing efficiency in
critical fault paths. The rbtree is protected by a spin lock with
interrupts disabled to ensure thread-safe access even within interrupt
contexts.

Co-developed-by: Huang Jiaqing <jiaqing.huang@intel.com>
Signed-off-by: Huang Jiaqing <jiaqing.huang@intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Reviewed-by: Jason Gunthorpe <jgg@nvidia.com>
Link: https://lore.kernel.org/r/20240220065939.121116-2-baolu.lu@linux.intel.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
drivers/iommu/intel/dmar.c
drivers/iommu/intel/iommu.c
drivers/iommu/intel/iommu.h

index 23cb80d62a9ab4e814bcf62728f9a5a0570a2107..f9b63c2875f715c3b1818bf4d265f19b428492ad 100644 (file)
@@ -1095,7 +1095,8 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
        iommu->agaw = agaw;
        iommu->msagaw = msagaw;
        iommu->segment = drhd->segment;
-
+       iommu->device_rbtree = RB_ROOT;
+       spin_lock_init(&iommu->device_rbtree_lock);
        iommu->node = NUMA_NO_NODE;
 
        ver = readl(iommu->reg + DMAR_VER_REG);
index fc52fcd786aa9932de54b426db062d9c1f821808..025d7385cf581324d5ba289e3617b51ffbfda4f5 100644 (file)
@@ -97,6 +97,81 @@ static phys_addr_t root_entry_uctp(struct root_entry *re)
        return re->hi & VTD_PAGE_MASK;
 }
 
+static int device_rid_cmp_key(const void *key, const struct rb_node *node)
+{
+       struct device_domain_info *info =
+               rb_entry(node, struct device_domain_info, node);
+       const u16 *rid_lhs = key;
+
+       if (*rid_lhs < PCI_DEVID(info->bus, info->devfn))
+               return -1;
+
+       if (*rid_lhs > PCI_DEVID(info->bus, info->devfn))
+               return 1;
+
+       return 0;
+}
+
+static int device_rid_cmp(struct rb_node *lhs, const struct rb_node *rhs)
+{
+       struct device_domain_info *info =
+               rb_entry(lhs, struct device_domain_info, node);
+       u16 key = PCI_DEVID(info->bus, info->devfn);
+
+       return device_rid_cmp_key(&key, rhs);
+}
+
+/*
+ * Looks up an IOMMU-probed device using its source ID.
+ *
+ * Returns the pointer to the device if there is a match. Otherwise,
+ * returns NULL.
+ *
+ * Note that this helper doesn't guarantee that the device won't be
+ * released by the iommu subsystem after being returned. The caller
+ * should use its own synchronization mechanism to avoid the device
+ * being released during its use if its possibly the case.
+ */
+struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid)
+{
+       struct device_domain_info *info = NULL;
+       struct rb_node *node;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+       node = rb_find(&rid, &iommu->device_rbtree, device_rid_cmp_key);
+       if (node)
+               info = rb_entry(node, struct device_domain_info, node);
+       spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+
+       return info ? info->dev : NULL;
+}
+
+static int device_rbtree_insert(struct intel_iommu *iommu,
+                               struct device_domain_info *info)
+{
+       struct rb_node *curr;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+       curr = rb_find_add(&info->node, &iommu->device_rbtree, device_rid_cmp);
+       spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+       if (WARN_ON(curr))
+               return -EEXIST;
+
+       return 0;
+}
+
+static void device_rbtree_remove(struct device_domain_info *info)
+{
+       struct intel_iommu *iommu = info->iommu;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu->device_rbtree_lock, flags);
+       rb_erase(&info->node, &iommu->device_rbtree);
+       spin_unlock_irqrestore(&iommu->device_rbtree_lock, flags);
+}
+
 /*
  * This domain is a statically identity mapping domain.
  *     1. This domain creats a static 1:1 mapping to all usable memory.
@@ -4261,25 +4336,34 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
        }
 
        dev_iommu_priv_set(dev, info);
+       ret = device_rbtree_insert(iommu, info);
+       if (ret)
+               goto free;
 
        if (sm_supported(iommu) && !dev_is_real_dma_subdevice(dev)) {
                ret = intel_pasid_alloc_table(dev);
                if (ret) {
                        dev_err(dev, "PASID table allocation failed\n");
-                       kfree(info);
-                       return ERR_PTR(ret);
+                       goto clear_rbtree;
                }
        }
 
        intel_iommu_debugfs_create_dev(info);
 
        return &iommu->iommu;
+clear_rbtree:
+       device_rbtree_remove(info);
+free:
+       kfree(info);
+
+       return ERR_PTR(ret);
 }
 
 static void intel_iommu_release_device(struct device *dev)
 {
        struct device_domain_info *info = dev_iommu_priv_get(dev);
 
+       device_rbtree_remove(info);
        dmar_remove_one_dev_info(dev);
        intel_pasid_free_table(dev);
        intel_iommu_debugfs_remove_dev(info);
index d02f916d8e59a914d2441fa2b81af9ac31dfbf86..50d1e196db52ca749c63ea2706a02d0bb197aa13 100644 (file)
@@ -716,6 +716,11 @@ struct intel_iommu {
        struct q_inval  *qi;            /* Queued invalidation info */
        u32 iommu_state[MAX_SR_DMAR_REGS]; /* Store iommu states between suspend and resume.*/
 
+       /* rb tree for all probed devices */
+       struct rb_root device_rbtree;
+       /* protect the device_rbtree */
+       spinlock_t device_rbtree_lock;
+
 #ifdef CONFIG_IRQ_REMAP
        struct ir_table *ir_table;      /* Interrupt remapping info */
        struct irq_domain *ir_domain;
@@ -749,6 +754,8 @@ struct device_domain_info {
        struct intel_iommu *iommu; /* IOMMU used by this device */
        struct dmar_domain *domain; /* pointer to domain */
        struct pasid_table *pasid_table; /* pasid table */
+       /* device tracking node(lookup by PCI RID) */
+       struct rb_node node;
 #ifdef CONFIG_INTEL_IOMMU_DEBUGFS
        struct dentry *debugfs_dentry; /* pointer to device directory dentry */
 #endif
@@ -1074,6 +1081,7 @@ void free_pgtable_page(void *vaddr);
 void iommu_flush_write_buffer(struct intel_iommu *iommu);
 struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent,
                                               const struct iommu_user_data *user_data);
+struct device *device_rbtree_find(struct intel_iommu *iommu, u16 rid);
 
 #ifdef CONFIG_INTEL_IOMMU_SVM
 void intel_svm_check(struct intel_iommu *iommu);