x86/hyperv: provide a bunch of helper functions
authorWei Liu <wei.liu@kernel.org>
Wed, 3 Feb 2021 15:04:28 +0000 (15:04 +0000)
committerWei Liu <wei.liu@kernel.org>
Thu, 11 Feb 2021 08:47:06 +0000 (08:47 +0000)
They are used to deposit pages into Microsoft Hypervisor and bring up
logical and virtual processors.

Signed-off-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
Signed-off-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Signed-off-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Co-Developed-by: Lillian Grassin-Drake <ligrassi@microsoft.com>
Co-Developed-by: Sunil Muthuswamy <sunilmut@microsoft.com>
Co-Developed-by: Nuno Das Neves <nunodasneves@linux.microsoft.com>
Signed-off-by: Wei Liu <wei.liu@kernel.org>
Reviewed-by: Michael Kelley <mikelley@microsoft.com>
Link: https://lore.kernel.org/r/20210203150435.27941-10-wei.liu@kernel.org
arch/x86/hyperv/Makefile
arch/x86/hyperv/hv_proc.c [new file with mode: 0644]
arch/x86/include/asm/mshyperv.h
include/asm-generic/hyperv-tlfs.h

index 89b1f74d32253b4c091f69b586d6f4201d66e7d4..565358020921c0530a80b37c0fa27d7ce10caabe 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-y                  := hv_init.o mmu.o nested.o
-obj-$(CONFIG_X86_64)   += hv_apic.o
+obj-$(CONFIG_X86_64)   += hv_apic.o hv_proc.o
 
 ifdef CONFIG_X86_64
 obj-$(CONFIG_PARAVIRT_SPINLOCKS)       += hv_spinlock.o
diff --git a/arch/x86/hyperv/hv_proc.c b/arch/x86/hyperv/hv_proc.c
new file mode 100644 (file)
index 0000000..60461e5
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/types.h>
+#include <linux/version.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+#include <linux/clockchips.h>
+#include <linux/acpi.h>
+#include <linux/hyperv.h>
+#include <linux/slab.h>
+#include <linux/cpuhotplug.h>
+#include <linux/minmax.h>
+#include <asm/hypervisor.h>
+#include <asm/mshyperv.h>
+#include <asm/apic.h>
+
+#include <asm/trace/hyperv.h>
+
+/*
+ * See struct hv_deposit_memory. The first u64 is partition ID, the rest
+ * are GPAs.
+ */
+#define HV_DEPOSIT_MAX (HV_HYP_PAGE_SIZE / sizeof(u64) - 1)
+
+/* Deposits exact number of pages. Must be called with interrupts enabled.  */
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages)
+{
+       struct page **pages, *page;
+       int *counts;
+       int num_allocations;
+       int i, j, page_count;
+       int order;
+       u64 status;
+       int ret;
+       u64 base_pfn;
+       struct hv_deposit_memory *input_page;
+       unsigned long flags;
+
+       if (num_pages > HV_DEPOSIT_MAX)
+               return -E2BIG;
+       if (!num_pages)
+               return 0;
+
+       /* One buffer for page pointers and counts */
+       page = alloc_page(GFP_KERNEL);
+       if (!page)
+               return -ENOMEM;
+       pages = page_address(page);
+
+       counts = kcalloc(HV_DEPOSIT_MAX, sizeof(int), GFP_KERNEL);
+       if (!counts) {
+               free_page((unsigned long)pages);
+               return -ENOMEM;
+       }
+
+       /* Allocate all the pages before disabling interrupts */
+       i = 0;
+
+       while (num_pages) {
+               /* Find highest order we can actually allocate */
+               order = 31 - __builtin_clz(num_pages);
+
+               while (1) {
+                       pages[i] = alloc_pages_node(node, GFP_KERNEL, order);
+                       if (pages[i])
+                               break;
+                       if (!order) {
+                               ret = -ENOMEM;
+                               num_allocations = i;
+                               goto err_free_allocations;
+                       }
+                       --order;
+               }
+
+               split_page(pages[i], order);
+               counts[i] = 1 << order;
+               num_pages -= counts[i];
+               i++;
+       }
+       num_allocations = i;
+
+       local_irq_save(flags);
+
+       input_page = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
+       input_page->partition_id = partition_id;
+
+       /* Populate gpa_page_list - these will fit on the input page */
+       for (i = 0, page_count = 0; i < num_allocations; ++i) {
+               base_pfn = page_to_pfn(pages[i]);
+               for (j = 0; j < counts[i]; ++j, ++page_count)
+                       input_page->gpa_page_list[page_count] = base_pfn + j;
+       }
+       status = hv_do_rep_hypercall(HVCALL_DEPOSIT_MEMORY,
+                                    page_count, 0, input_page, NULL);
+       local_irq_restore(flags);
+
+       if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
+               pr_err("Failed to deposit pages: %lld\n", status);
+               ret = status;
+               goto err_free_allocations;
+       }
+
+       ret = 0;
+       goto free_buf;
+
+err_free_allocations:
+       for (i = 0; i < num_allocations; ++i) {
+               base_pfn = page_to_pfn(pages[i]);
+               for (j = 0; j < counts[i]; ++j)
+                       __free_page(pfn_to_page(base_pfn + j));
+       }
+
+free_buf:
+       free_page((unsigned long)pages);
+       kfree(counts);
+       return ret;
+}
+
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 apic_id)
+{
+       struct hv_add_logical_processor_in *input;
+       struct hv_add_logical_processor_out *output;
+       u64 status;
+       unsigned long flags;
+       int ret = 0;
+       int pxm = node_to_pxm(node);
+
+       /*
+        * When adding a logical processor, the hypervisor may return
+        * HV_STATUS_INSUFFICIENT_MEMORY. When that happens, we deposit more
+        * pages and retry.
+        */
+       do {
+               local_irq_save(flags);
+
+               input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+               /* We don't do anything with the output right now */
+               output = *this_cpu_ptr(hyperv_pcpu_output_arg);
+
+               input->lp_index = lp_index;
+               input->apic_id = apic_id;
+               input->flags = 0;
+               input->proximity_domain_info.domain_id = pxm;
+               input->proximity_domain_info.flags.reserved = 0;
+               input->proximity_domain_info.flags.proximity_info_valid = 1;
+               input->proximity_domain_info.flags.proximity_preferred = 1;
+               status = hv_do_hypercall(HVCALL_ADD_LOGICAL_PROCESSOR,
+                                        input, output);
+               local_irq_restore(flags);
+
+               status &= HV_HYPERCALL_RESULT_MASK;
+
+               if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
+                       if (status != HV_STATUS_SUCCESS) {
+                               pr_err("%s: cpu %u apic ID %u, %lld\n", __func__,
+                                      lp_index, apic_id, status);
+                               ret = status;
+                       }
+                       break;
+               }
+               ret = hv_call_deposit_pages(node, hv_current_partition_id, 1);
+       } while (!ret);
+
+       return ret;
+}
+
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags)
+{
+       struct hv_create_vp *input;
+       u64 status;
+       unsigned long irq_flags;
+       int ret = 0;
+       int pxm = node_to_pxm(node);
+
+       /* Root VPs don't seem to need pages deposited */
+       if (partition_id != hv_current_partition_id) {
+               /* The value 90 is empirically determined. It may change. */
+               ret = hv_call_deposit_pages(node, partition_id, 90);
+               if (ret)
+                       return ret;
+       }
+
+       do {
+               local_irq_save(irq_flags);
+
+               input = *this_cpu_ptr(hyperv_pcpu_input_arg);
+
+               input->partition_id = partition_id;
+               input->vp_index = vp_index;
+               input->flags = flags;
+               input->subnode_type = HvSubnodeAny;
+               if (node != NUMA_NO_NODE) {
+                       input->proximity_domain_info.domain_id = pxm;
+                       input->proximity_domain_info.flags.reserved = 0;
+                       input->proximity_domain_info.flags.proximity_info_valid = 1;
+                       input->proximity_domain_info.flags.proximity_preferred = 1;
+               } else {
+                       input->proximity_domain_info.as_uint64 = 0;
+               }
+               status = hv_do_hypercall(HVCALL_CREATE_VP, input, NULL);
+               local_irq_restore(irq_flags);
+
+               status &= HV_HYPERCALL_RESULT_MASK;
+
+               if (status != HV_STATUS_INSUFFICIENT_MEMORY) {
+                       if (status != HV_STATUS_SUCCESS) {
+                               pr_err("%s: vcpu %u, lp %u, %lld\n", __func__,
+                                      vp_index, flags, status);
+                               ret = status;
+                       }
+                       break;
+               }
+               ret = hv_call_deposit_pages(node, partition_id, 1);
+
+       } while (!ret);
+
+       return ret;
+}
+
index b8324202d85012ef96ff9cb7bd488512738b44a6..f9119781f2bbbeae1a3006ed4d0b7fb0201e232e 100644 (file)
@@ -82,6 +82,10 @@ extern void  __percpu  **hyperv_pcpu_output_arg;
 
 extern u64 hv_current_partition_id;
 
+int hv_call_deposit_pages(int node, u64 partition_id, u32 num_pages);
+int hv_call_add_logical_proc(int node, u32 lp_index, u32 acpi_id);
+int hv_call_create_vp(int node, u64 partition_id, u32 vp_index, u32 flags);
+
 static inline u64 hv_do_hypercall(u64 control, void *input, void *output)
 {
        u64 input_address = input ? virt_to_phys(input) : 0;
index ba96bcb5c657e1de87c558d97f4bd41798107896..562a29981632ea46778c50c660d13d573275f824 100644 (file)
@@ -143,6 +143,8 @@ struct ms_hyperv_tsc_page {
 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX   0x0014
 #define HVCALL_SEND_IPI_EX                     0x0015
 #define HVCALL_GET_PARTITION_ID                        0x0046
+#define HVCALL_DEPOSIT_MEMORY                  0x0048
+#define HVCALL_CREATE_VP                       0x004e
 #define HVCALL_GET_VP_REGISTERS                        0x0050
 #define HVCALL_SET_VP_REGISTERS                        0x0051
 #define HVCALL_POST_MESSAGE                    0x005c
@@ -150,6 +152,7 @@ struct ms_hyperv_tsc_page {
 #define HVCALL_POST_DEBUG_DATA                 0x0069
 #define HVCALL_RETRIEVE_DEBUG_DATA             0x006a
 #define HVCALL_RESET_DEBUG_SESSION             0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR           0x0076
 #define HVCALL_RETARGET_INTERRUPT              0x007e
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
 #define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
@@ -414,6 +417,70 @@ struct hv_get_partition_id {
        u64 partition_id;
 } __packed;
 
+/* HvDepositMemory hypercall */
+struct hv_deposit_memory {
+       u64 partition_id;
+       u64 gpa_page_list[];
+} __packed;
+
+struct hv_proximity_domain_flags {
+       u32 proximity_preferred : 1;
+       u32 reserved : 30;
+       u32 proximity_info_valid : 1;
+} __packed;
+
+/* Not a union in windows but useful for zeroing */
+union hv_proximity_domain_info {
+       struct {
+               u32 domain_id;
+               struct hv_proximity_domain_flags flags;
+       };
+       u64 as_uint64;
+} __packed;
+
+struct hv_lp_startup_status {
+       u64 hv_status;
+       u64 substatus1;
+       u64 substatus2;
+       u64 substatus3;
+       u64 substatus4;
+       u64 substatus5;
+       u64 substatus6;
+} __packed;
+
+/* HvAddLogicalProcessor hypercall */
+struct hv_add_logical_processor_in {
+       u32 lp_index;
+       u32 apic_id;
+       union hv_proximity_domain_info proximity_domain_info;
+       u64 flags;
+} __packed;
+
+struct hv_add_logical_processor_out {
+       struct hv_lp_startup_status startup_status;
+} __packed;
+
+enum HV_SUBNODE_TYPE
+{
+    HvSubnodeAny = 0,
+    HvSubnodeSocket = 1,
+    HvSubnodeAmdNode = 2,
+    HvSubnodeL3 = 3,
+    HvSubnodeCount = 4,
+    HvSubnodeInvalid = -1
+};
+
+/* HvCreateVp hypercall */
+struct hv_create_vp {
+       u64 partition_id;
+       u32 vp_index;
+       u8 padding[3];
+       u8 subnode_type;
+       u64 subnode_id;
+       union hv_proximity_domain_info proximity_domain_info;
+       u64 flags;
+} __packed;
+
 /* HvRetargetDeviceInterrupt hypercall */
 union hv_msi_entry {
        u64 as_uint64;