iommu/amd: Introduce Disable IRTE Caching Support
authorSuravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Tue, 30 May 2023 14:11:35 +0000 (10:11 -0400)
committerJoerg Roedel <jroedel@suse.de>
Fri, 9 Jun 2023 12:47:09 +0000 (14:47 +0200)
An Interrupt Remapping Table (IRT) stores interrupt remapping configuration
for each device. In a normal operation, the AMD IOMMU caches the table
to optimize subsequent data accesses. This requires the IOMMU driver to
invalidate IRT whenever it updates the table. The invalidation process
includes issuing an INVALIDATE_INTERRUPT_TABLE command following by
a COMPLETION_WAIT command.

However, there are cases in which the IRT is updated at a high rate.
For example, for IOMMU AVIC, the IRTE[IsRun] bit is updated on every
vcpu scheduling (i.e. amd_iommu_update_ga()). On system with large
amount of vcpus and VFIO PCI pass-through devices, the invalidation
process could potentially become a performance bottleneck.

Introducing a new kernel boot option:

    amd_iommu=irtcachedis

which disables IRTE caching by setting the IRTCachedis bit in each IOMMU
Control register, and bypass the IRT invalidation process.

Reviewed-by: Jerry Snitselaar <jsnitsel@redhat.com>
Co-developed-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Alejandro Jimenez <alejandro.j.jimenez@oracle.com>
Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Link: https://lore.kernel.org/r/20230530141137.14376-4-suravee.suthikulpanit@amd.com
Signed-off-by: Joerg Roedel <jroedel@suse.de>
Documentation/admin-guide/kernel-parameters.txt
drivers/iommu/amd/amd_iommu_types.h
drivers/iommu/amd/init.c

index 9e5bab29685ff0534fe82714dda5e20804c38ead..986ac2b73ea22be02375b56db66990c74aae5876 100644 (file)
                                       option with care.
                        pgtbl_v1     - Use v1 page table for DMA-API (Default).
                        pgtbl_v2     - Use v2 page table for DMA-API.
+                       irtcachedis  - Disable Interrupt Remapping Table (IRT) caching.
 
        amd_iommu_dump= [HW,X86-64]
                        Enable AMD IOMMU driver option to dump the ACPI table
index 8c072be68875c28896f70fc8fae59449cb7689c0..8eeea1f25e6978362f7179041815b4b2e12e6c81 100644 (file)
 #define CONTROL_GAINT_EN       29
 #define CONTROL_XT_EN          50
 #define CONTROL_INTCAPXT_EN    51
+#define CONTROL_IRTCACHEDIS    59
 #define CONTROL_SNPAVIC_EN     61
 
 #define CTRL_INV_TO_MASK       (7 << CONTROL_INV_TIMEOUT)
@@ -716,6 +717,9 @@ struct amd_iommu {
        /* if one, we need to send a completion wait command */
        bool need_sync;
 
+       /* true if disable irte caching */
+       bool irtcachedis_enabled;
+
        /* Handle for IOMMU core code */
        struct iommu_device iommu;
 
index 329a406cc37de8f84e88acde441eea20ce65599b..418da641ee3da844cfa11a329632120eb4eb77c8 100644 (file)
@@ -162,6 +162,7 @@ static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
 static bool amd_iommu_detected;
 static bool amd_iommu_disabled __initdata;
 static bool amd_iommu_force_enable __initdata;
+static bool amd_iommu_irtcachedis;
 static int amd_iommu_target_ivhd_type;
 
 /* Global EFR and EFR2 registers */
@@ -484,6 +485,9 @@ static void iommu_disable(struct amd_iommu *iommu)
 
        /* Disable IOMMU hardware itself */
        iommu_feature_disable(iommu, CONTROL_IOMMU_EN);
+
+       /* Clear IRTE cache disabling bit */
+       iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
 }
 
 /*
@@ -2686,6 +2690,33 @@ static void iommu_enable_ga(struct amd_iommu *iommu)
 #endif
 }
 
+static void iommu_disable_irtcachedis(struct amd_iommu *iommu)
+{
+       iommu_feature_disable(iommu, CONTROL_IRTCACHEDIS);
+}
+
+static void iommu_enable_irtcachedis(struct amd_iommu *iommu)
+{
+       u64 ctrl;
+
+       if (!amd_iommu_irtcachedis)
+               return;
+
+       /*
+        * Note:
+        * The support for IRTCacheDis feature is dertermined by
+        * checking if the bit is writable.
+        */
+       iommu_feature_enable(iommu, CONTROL_IRTCACHEDIS);
+       ctrl = readq(iommu->mmio_base +  MMIO_CONTROL_OFFSET);
+       ctrl &= (1ULL << CONTROL_IRTCACHEDIS);
+       if (ctrl)
+               iommu->irtcachedis_enabled = true;
+       pr_info("iommu%d (%#06x) : IRT cache is %s\n",
+               iommu->index, iommu->devid,
+               iommu->irtcachedis_enabled ? "disabled" : "enabled");
+}
+
 static void early_enable_iommu(struct amd_iommu *iommu)
 {
        iommu_disable(iommu);
@@ -2696,6 +2727,7 @@ static void early_enable_iommu(struct amd_iommu *iommu)
        iommu_set_exclusion_range(iommu);
        iommu_enable_ga(iommu);
        iommu_enable_xt(iommu);
+       iommu_enable_irtcachedis(iommu);
        iommu_enable(iommu);
        iommu_flush_all_caches(iommu);
 }
@@ -2746,10 +2778,12 @@ static void early_enable_iommus(void)
                for_each_iommu(iommu) {
                        iommu_disable_command_buffer(iommu);
                        iommu_disable_event_buffer(iommu);
+                       iommu_disable_irtcachedis(iommu);
                        iommu_enable_command_buffer(iommu);
                        iommu_enable_event_buffer(iommu);
                        iommu_enable_ga(iommu);
                        iommu_enable_xt(iommu);
+                       iommu_enable_irtcachedis(iommu);
                        iommu_set_device_table(iommu);
                        iommu_flush_all_caches(iommu);
                }
@@ -3402,6 +3436,8 @@ static int __init parse_amd_iommu_options(char *str)
                        amd_iommu_pgtable = AMD_IOMMU_V1;
                } else if (strncmp(str, "pgtbl_v2", 8) == 0) {
                        amd_iommu_pgtable = AMD_IOMMU_V2;
+               } else if (strncmp(str, "irtcachedis", 11) == 0) {
+                       amd_iommu_irtcachedis = true;
                } else {
                        pr_notice("Unknown option - '%s'\n", str);
                }