KVM: X86: Implement "send IPI" hypercall
authorWanpeng Li <wanpengli@tencent.com>
Mon, 23 Jul 2018 06:39:54 +0000 (14:39 +0800)
committerPaolo Bonzini <pbonzini@redhat.com>
Mon, 6 Aug 2018 15:59:20 +0000 (17:59 +0200)
Using hypercall to send IPIs by one vmexit instead of one by one for
xAPIC/x2APIC physical mode and one vmexit per-cluster for x2APIC cluster
mode. Intel guest can enter x2apic cluster mode when interrupt remmaping
is enabled in qemu, however, latest AMD EPYC still just supports xapic
mode which can get great improvement by Exit-less IPIs. This patchset
lets a guest send multicast IPIs, with at most 128 destinations per
hypercall in 64-bit mode and 64 vCPUs per hypercall in 32-bit mode.

Hardware: Xeon Skylake 2.5GHz, 2 sockets, 40 cores, 80 threads, the VM
is 80 vCPUs, IPI microbenchmark(https://lkml.org/lkml/2017/12/19/141):

x2apic cluster mode, vanilla

 Dry-run:                         0,            2392199 ns
 Self-IPI:                  6907514,           15027589 ns
 Normal IPI:              223910476,          251301666 ns
 Broadcast IPI:                   0,         9282161150 ns
 Broadcast lock:                  0,         8812934104 ns

x2apic cluster mode, pv-ipi

 Dry-run:                         0,            2449341 ns
 Self-IPI:                  6720360,           15028732 ns
 Normal IPI:              228643307,          255708477 ns
 Broadcast IPI:                   0,         7572293590 ns  => 22% performance boost
 Broadcast lock:                  0,         8316124651 ns

x2apic physical mode, vanilla

 Dry-run:                         0,            3135933 ns
 Self-IPI:                  8572670,           17901757 ns
 Normal IPI:              226444334,          255421709 ns
 Broadcast IPI:                   0,        19845070887 ns
 Broadcast lock:                  0,        19827383656 ns

x2apic physical mode, pv-ipi

 Dry-run:                         0,            2446381 ns
 Self-IPI:                  6788217,           15021056 ns
 Normal IPI:              219454441,          249583458 ns
 Broadcast IPI:                   0,         7806540019 ns  => 154% performance boost
 Broadcast lock:                  0,         9143618799 ns

Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Cc: Vitaly Kuznetsov <vkuznets@redhat.com>
Signed-off-by: Wanpeng Li <wanpengli@tencent.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Documentation/virtual/kvm/cpuid.txt
Documentation/virtual/kvm/hypercalls.txt
arch/x86/include/asm/kvm_host.h
arch/x86/kvm/cpuid.c
arch/x86/kvm/lapic.c
arch/x86/kvm/x86.c
include/uapi/linux/kvm_para.h

index ab022dcd09117571d215294ed51c5675543165f1..97ca1940a0dc95645c05431017f1f06b7150905c 100644 (file)
@@ -62,6 +62,10 @@ KVM_FEATURE_ASYNC_PF_VMEXIT        ||    10 || paravirtualized async PF VM exit
                                    ||       || can be enabled by setting bit 2
                                    ||       || when writing to msr 0x4b564d02
 ------------------------------------------------------------------------------
+KVM_FEATURE_PV_SEND_IPI            ||    11 || guest checks this feature bit
+                                   ||       || before using paravirtualized
+                                   ||       || send IPIs.
+------------------------------------------------------------------------------
 KVM_FEATURE_CLOCKSOURCE_STABLE_BIT ||    24 || host will warn if no guest-side
                                    ||       || per-cpu warps are expected in
                                    ||       || kvmclock.
index a890529c63ed6a3be4e2c38eb739377b37c6bc4c..da24c138c8d131bea63aeb652af3d6fc373514ef 100644 (file)
@@ -121,3 +121,23 @@ compute the CLOCK_REALTIME for its clock, at the same instant.
 
 Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource,
 or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK.
+
+6. KVM_HC_SEND_IPI
+------------------------
+Architecture: x86
+Status: active
+Purpose: Send IPIs to multiple vCPUs.
+
+a0: lower part of the bitmap of destination APIC IDs
+a1: higher part of the bitmap of destination APIC IDs
+a2: the lowest APIC ID in bitmap
+a3: APIC ICR
+
+The hypercall lets a guest send multicast IPIs, with at most 128
+128 destinations per hypercall in 64-bit mode and 64 vCPUs per
+hypercall in 32-bit mode.  The destinations are represented by a
+bitmap contained in the first two arguments (a0 and a1). Bit 0 of
+a0 corresponds to the APIC ID in the third argument (a2), bit 1
+corresponds to the APIC ID a2+1, and so on.
+
+Returns the number of CPUs to which the IPIs were delivered successfully.
index 150937e64f6372d2b209f2384d391c399547ffed..c18958ef17d2cdc4ccfc6ac80a4752198bded1b6 100644 (file)
@@ -1457,6 +1457,10 @@ int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event);
 void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+                   unsigned long ipi_bitmap_high, int min,
+                   unsigned long icr, int op_64_bit);
+
 void kvm_define_shared_msr(unsigned index, u32 msr);
 int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
 
index 7e042e3d47fd5a007cd70ad20d2f5d6ee525c28e..7bcfa61375c097fa71e0d6b168f8493c8a9f4ad9 100644 (file)
@@ -621,7 +621,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
                             (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT) |
                             (1 << KVM_FEATURE_PV_UNHALT) |
                             (1 << KVM_FEATURE_PV_TLB_FLUSH) |
-                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT);
+                            (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
+                            (1 << KVM_FEATURE_PV_SEND_IPI);
 
                if (sched_info_on())
                        entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
index b5cd8465d44f6cb99a9ae705cf2f44f3c310a1ac..f0d693122c24898b3525e384492b335fbf2cfe11 100644 (file)
@@ -547,6 +547,46 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
                        irq->level, irq->trig_mode, dest_map);
 }
 
+int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
+                   unsigned long ipi_bitmap_high, int min,
+                   unsigned long icr, int op_64_bit)
+{
+       int i;
+       struct kvm_apic_map *map;
+       struct kvm_vcpu *vcpu;
+       struct kvm_lapic_irq irq = {0};
+       int cluster_size = op_64_bit ? 64 : 32;
+       int count = 0;
+
+       irq.vector = icr & APIC_VECTOR_MASK;
+       irq.delivery_mode = icr & APIC_MODE_MASK;
+       irq.level = (icr & APIC_INT_ASSERT) != 0;
+       irq.trig_mode = icr & APIC_INT_LEVELTRIG;
+
+       if (icr & APIC_DEST_MASK)
+               return -KVM_EINVAL;
+       if (icr & APIC_SHORT_MASK)
+               return -KVM_EINVAL;
+
+       rcu_read_lock();
+       map = rcu_dereference(kvm->arch.apic_map);
+
+       /* Bits above cluster_size are masked in the caller.  */
+       for_each_set_bit(i, &ipi_bitmap_low, BITS_PER_LONG) {
+               vcpu = map->phys_map[min + i]->vcpu;
+               count += kvm_apic_set_irq(vcpu, &irq, NULL);
+       }
+
+       min += cluster_size;
+       for_each_set_bit(i, &ipi_bitmap_high, BITS_PER_LONG) {
+               vcpu = map->phys_map[min + i]->vcpu;
+               count += kvm_apic_set_irq(vcpu, &irq, NULL);
+       }
+
+       rcu_read_unlock();
+       return count;
+}
+
 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
 {
 
index 6b974802cadbafd89818ef67b69f30715006f7d8..3c83711c0ebe147e4251db1c3a30ea1799d5042f 100644 (file)
@@ -6802,6 +6802,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
        case KVM_HC_CLOCK_PAIRING:
                ret = kvm_pv_clock_pairing(vcpu, a0, a1);
                break;
+       case KVM_HC_SEND_IPI:
+               ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
+               break;
 #endif
        default:
                ret = -KVM_ENOSYS;
index dcf629dd28896b2010f3fcd4fc8516130a71e80a..f3893ef82b653a259e6450526e26d0a3309e513e 100644 (file)
@@ -13,6 +13,7 @@
 /* Return values for hypercalls */
 #define KVM_ENOSYS             1000
 #define KVM_EFAULT             EFAULT
+#define KVM_EINVAL             EINVAL
 #define KVM_E2BIG              E2BIG
 #define KVM_EPERM              EPERM
 #define KVM_EOPNOTSUPP         95