target/i386: Support up to 32768 CPUs without IRQ remapping
authorDavid Woodhouse <dwmw2@infradead.org>
Mon, 5 Oct 2020 14:18:19 +0000 (15:18 +0100)
committerPaolo Bonzini <pbonzini@redhat.com>
Thu, 10 Dec 2020 17:15:00 +0000 (12:15 -0500)
The IOAPIC has an 'Extended Destination ID' field in its RTE, which maps
to bits 11-4 of the MSI address. Since those address bits fall within a
given 4KiB page they were historically non-trivial to use on real hardware.

The Intel IOMMU uses the lowest bit to indicate a remappable format MSI,
and then the remaining 7 bits are part of the index.

Where the remappable format bit isn't set, we can actually use the other
seven to allow external (IOAPIC and MSI) interrupts to reach up to 32768
CPUs instead of just the 255 permitted on bare metal.

Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
Message-Id: <78097f9218300e63e751e077a0a5ca029b56ba46.camel@infradead.org>
[Fix UBSAN warning. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: David Woodhouse <dwmw2@infradead.org>
hw/i386/kvm/apic.c
hw/i386/pc.c
include/standard-headers/asm-x86/kvm_para.h
target/i386/cpu.c
target/i386/kvm.c
target/i386/kvm_i386.h

index dd29906061ce1a8149d7d14b8854bd87f241c265..b226b674e808b803f2534ab516d45181b3fa6036 100644 (file)
@@ -183,6 +183,13 @@ static void kvm_send_msi(MSIMessage *msg)
 {
     int ret;
 
+    /*
+     * The message has already passed through interrupt remapping if enabled,
+     * but the legacy extended destination ID in low bits still needs to be
+     * handled.
+     */
+    msg->address = kvm_swizzle_msi_ext_dest_id(msg->address);
+
     ret = kvm_irqchip_send_msi(kvm_state, *msg);
     if (ret < 0) {
         fprintf(stderr, "KVM: injection failed, MSI lost (%s)\n",
index 9e29f3792b2a660f7046750b3f8569302cac8690..640fb5b0b70997748c4468800d3bcb2501d3d0f0 100644 (file)
@@ -104,6 +104,7 @@ const size_t pc_compat_5_2_len = G_N_ELEMENTS(pc_compat_5_2);
 
 GlobalProperty pc_compat_5_1[] = {
     { "ICH9-LPC", "x-smi-cpu-hotplug", "off" },
+    { TYPE_X86_CPU, "kvm-msi-ext-dest-id", "off" },
 };
 const size_t pc_compat_5_1_len = G_N_ELEMENTS(pc_compat_5_1);
 
@@ -796,17 +797,12 @@ void pc_machine_done(Notifier *notifier, void *data)
         fw_cfg_modify_i16(x86ms->fw_cfg, FW_CFG_NB_CPUS, x86ms->boot_cpus);
     }
 
-    if (x86ms->apic_id_limit > 255 && !xen_enabled()) {
-        IntelIOMMUState *iommu = INTEL_IOMMU_DEVICE(x86_iommu_get_default());
 
-        if (!iommu || !x86_iommu_ir_supported(X86_IOMMU_DEVICE(iommu)) ||
-            iommu->intr_eim != ON_OFF_AUTO_ON) {
-            error_report("current -smp configuration requires "
-                         "Extended Interrupt Mode enabled. "
-                         "You can add an IOMMU using: "
-                         "-device intel-iommu,intremap=on,eim=on");
-            exit(EXIT_FAILURE);
-        }
+    if (x86ms->apic_id_limit > 255 && !xen_enabled() &&
+        !kvm_irqchip_in_kernel()) {
+        error_report("current -smp configuration requires kernel "
+                     "irqchip support.");
+        exit(EXIT_FAILURE);
     }
 }
 
index 07877d3295f265760c6eddec2b5e77bc11c14221..215d01b4eca8195aca3a6abce9676d2f1a680bb6 100644 (file)
@@ -32,6 +32,7 @@
 #define KVM_FEATURE_POLL_CONTROL       12
 #define KVM_FEATURE_PV_SCHED_YIELD     13
 #define KVM_FEATURE_ASYNC_PF_INT       14
+#define KVM_FEATURE_MSI_EXT_DEST_ID    15
 
 #define KVM_HINTS_REALTIME      0
 
index 5a8c96072e4125b0822f16d421099721722bf343..b90ed05897d872a556bf2d6cd6df2bcf18c123f5 100644 (file)
@@ -799,7 +799,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = {
             "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock",
             "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt",
             NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi",
-            "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", NULL,
+            "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", "kvm-msi-ext-dest-id",
             NULL, NULL, NULL, NULL,
             NULL, NULL, NULL, NULL,
             "kvmclock-stable-bit", NULL, NULL, NULL,
@@ -4114,6 +4114,7 @@ static PropValue kvm_default_props[] = {
     { "kvm-pv-eoi", "on" },
     { "kvmclock-stable-bit", "on" },
     { "x2apic", "on" },
+    { "kvm-msi-ext-dest-id", "off" },
     { "acpi", "off" },
     { "monitor", "off" },
     { "svm", "off" },
@@ -5140,6 +5141,8 @@ static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model)
     if (kvm_enabled()) {
         if (!kvm_irqchip_in_kernel()) {
             x86_cpu_change_kvm_default("x2apic", "off");
+        } else if (kvm_irqchip_is_split() && kvm_enable_x2apic()) {
+            x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on");
         }
 
         x86_cpu_apply_props(cpu, kvm_default_props);
index a2934dda027c66b634637986b58926468910ea67..bcfa4b03e077bb8c3c48b842a7a991023f64e617 100644 (file)
@@ -416,6 +416,9 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function,
         if (!kvm_irqchip_in_kernel()) {
             ret &= ~(1U << KVM_FEATURE_PV_UNHALT);
         }
+        if (kvm_irqchip_is_split()) {
+            ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID;
+        }
     } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) {
         ret |= 1U << KVM_HINTS_REALTIME;
     }
@@ -4589,38 +4592,74 @@ int kvm_arch_irqchip_create(KVMState *s)
     }
 }
 
+uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address)
+{
+    CPUX86State *env;
+    uint64_t ext_id;
+
+    if (!first_cpu) {
+        return address;
+    }
+    env = &X86_CPU(first_cpu)->env;
+    if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) {
+        return address;
+    }
+
+    /*
+     * If the remappable format bit is set, or the upper bits are
+     * already set in address_hi, or the low extended bits aren't
+     * there anyway, do nothing.
+     */
+    ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT);
+    if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) {
+        return address;
+    }
+
+    address &= ~ext_id;
+    address |= ext_id << 35;
+    return address;
+}
+
 int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
                              uint64_t address, uint32_t data, PCIDevice *dev)
 {
     X86IOMMUState *iommu = x86_iommu_get_default();
 
     if (iommu) {
-        int ret;
-        MSIMessage src, dst;
         X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu);
 
-        if (!class->int_remap) {
-            return 0;
-        }
+        if (class->int_remap) {
+            int ret;
+            MSIMessage src, dst;
 
-        src.address = route->u.msi.address_hi;
-        src.address <<= VTD_MSI_ADDR_HI_SHIFT;
-        src.address |= route->u.msi.address_lo;
-        src.data = route->u.msi.data;
+            src.address = route->u.msi.address_hi;
+            src.address <<= VTD_MSI_ADDR_HI_SHIFT;
+            src.address |= route->u.msi.address_lo;
+            src.data = route->u.msi.data;
 
-        ret = class->int_remap(iommu, &src, &dst, dev ? \
-                               pci_requester_id(dev) : \
-                               X86_IOMMU_SID_INVALID);
-        if (ret) {
-            trace_kvm_x86_fixup_msi_error(route->gsi);
-            return 1;
-        }
+            ret = class->int_remap(iommu, &src, &dst, dev ?     \
+                                   pci_requester_id(dev) :      \
+                                   X86_IOMMU_SID_INVALID);
+            if (ret) {
+                trace_kvm_x86_fixup_msi_error(route->gsi);
+                return 1;
+            }
+
+            /*
+             * Handled untranslated compatibilty format interrupt with
+             * extended destination ID in the low bits 11-5. */
+            dst.address = kvm_swizzle_msi_ext_dest_id(dst.address);
 
-        route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
-        route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
-        route->u.msi.data = dst.data;
+            route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT;
+            route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK;
+            route->u.msi.data = dst.data;
+            return 0;
+        }
     }
 
+    address = kvm_swizzle_msi_ext_dest_id(address);
+    route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT;
+    route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK;
     return 0;
 }
 
index a4a619cebb441b71d62ca9598bb51ff2e83f9582..dc725083891ceed4da8b26f8bc81a1f45d449588 100644 (file)
@@ -48,4 +48,6 @@ bool kvm_has_waitpkg(void);
 
 bool kvm_hv_vpindex_settable(void);
 
+uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address);
+
 #endif