drm/amdkfd: implement the dGPU fallback path for apu (v6)
authorHuang Rui <ray.huang@amd.com>
Tue, 18 Aug 2020 06:54:23 +0000 (14:54 +0800)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 26 Aug 2020 20:40:17 +0000 (16:40 -0400)
We still have a few iommu issues which need to address, so force raven
as "dgpu" path for the moment.

This is to add the fallback path to bypass IOMMU if IOMMU v2 is disabled
or ACPI CRAT table not correct.

v2: Use ignore_crat parameter to decide whether it will go with IOMMUv2.
v3: Align with existed thunk, don't change the way of raven, only renoir
    will use "dgpu" path by default.
v4: don't update global ignore_crat in the driver, and revise fallback
    function if CRAT is broken.
v5: refine acpi crat good but no iommu support case, and rename the
    title.
v6: fix the issue of dGPU initialized firstly, just modify the report
    value in the node_show().

Signed-off-by: Huang Rui <ray.huang@amd.com>
Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/amdkfd/kfd_device.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/amdkfd/kfd_topology.c

index ed29f0ce007cbb39803318152287bf5a0c427411..dfc1a9721867e85afd12e1980958d6bd0343a725 100644 (file)
@@ -680,11 +680,14 @@ MODULE_PARM_DESC(debug_largebar,
  * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT
  * table to get information about AMD APUs. This option can serve as a workaround on
  * systems with a broken CRAT table.
+ *
+ * Default is auto (according to asic type, iommu_v2, and crat table, to decide
+ * whehter use CRAT)
  */
 int ignore_crat;
 module_param(ignore_crat, int, 0444);
 MODULE_PARM_DESC(ignore_crat,
-       "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)");
+       "Ignore CRAT table during KFD initialization (0 = auto (default), 1 = ignore CRAT)");
 
 /**
  * DOC: halt_if_hws_hang (int)
index e9b96ad3d9a525a4f0f01adc85180f6651e2945d..b7b16adb0615d6feb2b5b9ec30c8a312c5d879d4 100644 (file)
@@ -1254,7 +1254,7 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev)
                return true;
        }
 
-       if (dev->device_info->needs_iommu_device)
+       if (dev->use_iommu_v2)
                return false;
 
        amdgpu_amdkfd_get_local_mem_info(dev->kgd, &mem_info);
index 6a250f8fcfb818a45a3893d6659f669625c7a051..3fac06b281ce0b21e8dd90565cdee81b6b82c87b 100644 (file)
@@ -742,6 +742,22 @@ static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
        return 0;
 }
 
+static bool kfd_ignore_crat(void)
+{
+       bool ret;
+
+       if (ignore_crat)
+               return true;
+
+#ifndef KFD_SUPPORT_IOMMU_V2
+       ret = true;
+#else
+       ret = false;
+#endif
+
+       return ret;
+}
+
 /*
  * kfd_create_crat_image_acpi - Allocates memory for CRAT image and
  * copies CRAT from ACPI (if available).
@@ -776,7 +792,7 @@ int kfd_create_crat_image_acpi(void **crat_image, size_t *size)
                return -EINVAL;
        }
 
-       if (ignore_crat) {
+       if (kfd_ignore_crat()) {
                pr_info("CRAT table disabled by module option\n");
                return -ENODATA;
        }
index d5e790f046b4bc297a2256f1f2861ae847b3a6f5..b15b620e731b7f8ab14e9da3d13f67748d824781 100644 (file)
@@ -116,6 +116,7 @@ static const struct kfd_device_info carrizo_device_info = {
        .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
+#endif
 
 static const struct kfd_device_info raven_device_info = {
        .asic_family = CHIP_RAVEN,
@@ -134,7 +135,6 @@ static const struct kfd_device_info raven_device_info = {
        .num_xgmi_sdma_engines = 0,
        .num_sdma_queues_per_engine = 2,
 };
-#endif
 
 static const struct kfd_device_info hawaii_device_info = {
        .asic_family = CHIP_HAWAII,
@@ -738,6 +738,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                goto gws_error;
        }
 
+       /* If CRAT is broken, won't set iommu enabled */
+       kfd_double_confirm_iommu_support(kfd);
+
        if (kfd_iommu_device_init(kfd)) {
                dev_err(kfd_device, "Error initializing iommuv2\n");
                goto device_iommu_error;
index 95a82ac455f2ba1609426ecc797231d865b77ec5..309f63a0b34a15e6442488bbc6fbddcd15544a03 100644 (file)
@@ -62,7 +62,7 @@ static int update_qpd_v9(struct device_queue_manager *dqm,
                                SH_MEM_ALIGNMENT_MODE_UNALIGNED <<
                                        SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT;
                if (amdgpu_noretry &&
-                   !dqm->dev->device_info->needs_iommu_device)
+                   !dqm->dev->use_iommu_v2)
                        qpd->sh_mem_config |=
                                1 << SH_MEM_CONFIG__RETRY_DISABLE__SHIFT;
 
index c1166c40ac15e6ee894c8862907b23992871618a..3c22909470f2078343ccda6bd0e7722ae664e70d 100644 (file)
@@ -321,7 +321,7 @@ static void kfd_init_apertures_vi(struct kfd_process_device *pdd, uint8_t id)
        pdd->lds_base = MAKE_LDS_APP_BASE_VI();
        pdd->lds_limit = MAKE_LDS_APP_LIMIT(pdd->lds_base);
 
-       if (!pdd->dev->device_info->needs_iommu_device) {
+       if (!pdd->dev->use_iommu_v2) {
                /* dGPUs: SVM aperture starting at 0
                 * with small reserved space for kernel.
                 * Set them to CANONICAL addresses.
@@ -425,7 +425,7 @@ int kfd_init_apertures(struct kfd_process *process)
                                return -EINVAL;
                        }
 
-                       if (!dev->device_info->needs_iommu_device) {
+                       if (!dev->use_iommu_v2) {
                                /* dGPUs: the reserved space for kernel
                                 * before SVM
                                 */
index 7c8786b9eb0aaad65571d876e138d45aad8a7f13..5a64915abaf7f6330aad2a049af570351a1b08a8 100644 (file)
@@ -41,7 +41,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
        struct amd_iommu_device_info iommu_info;
        int err;
 
-       if (!kfd->device_info->needs_iommu_device)
+       if (!kfd->use_iommu_v2)
                return -ENODEV;
 
        iommu_info.flags = 0;
@@ -63,7 +63,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
        unsigned int pasid_limit;
        int err;
 
-       if (!kfd->device_info->needs_iommu_device)
+       if (!kfd->use_iommu_v2)
                return 0;
 
        iommu_info.flags = 0;
@@ -109,7 +109,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
        struct kfd_process *p = pdd->process;
        int err;
 
-       if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND)
+       if (!dev->use_iommu_v2 || pdd->bound == PDD_BOUND)
                return 0;
 
        if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) {
@@ -284,7 +284,7 @@ static void kfd_unbind_processes_from_device(struct kfd_dev *kfd)
  */
 void kfd_iommu_suspend(struct kfd_dev *kfd)
 {
-       if (!kfd->device_info->needs_iommu_device)
+       if (!kfd->use_iommu_v2)
                return;
 
        kfd_unbind_processes_from_device(kfd);
@@ -304,7 +304,7 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
        unsigned int pasid_limit;
        int err;
 
-       if (!kfd->device_info->needs_iommu_device)
+       if (!kfd->use_iommu_v2)
                return 0;
 
        pasid_limit = kfd_get_pasid_limit();
index 6727e9de5b8b069fc8bdd0faa9da56ea064b227d..f14beb93acb43da57248f90859d2d132059b9db9 100644 (file)
@@ -297,6 +297,9 @@ struct kfd_dev {
 
        bool pci_atomic_requested;
 
+       /* Use IOMMU v2 flag */
+       bool use_iommu_v2;
+
        /* SRAM ECC flag */
        atomic_t sram_ecc_flag;
 
@@ -892,6 +895,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
 struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
 int kfd_numa_node_to_apic_id(int numa_node_id);
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu);
 
 /* Interrupts */
 int kfd_interrupt_init(struct kfd_dev *dev);
index f185f6cbc05c66291f92aa123cd4ddb14ee4b3e3..5e8eb783d2d52b5726558fbfb5e1bbb66474ff0e 100644 (file)
@@ -446,7 +446,7 @@ static ssize_t node_show(struct kobject *kobj, struct attribute *attr,
        sysfs_show_32bit_prop(buffer, offs, "cpu_cores_count",
                              dev->node_props.cpu_cores_count);
        sysfs_show_32bit_prop(buffer, offs, "simd_count",
-                             dev->node_props.simd_count);
+                             dev->gpu ? dev->node_props.simd_count : 0);
        sysfs_show_32bit_prop(buffer, offs, "mem_banks_count",
                              dev->node_props.mem_banks_count);
        sysfs_show_32bit_prop(buffer, offs, "caches_count",
@@ -1139,7 +1139,7 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
                /* Discrete GPUs need their own topology device list
                 * entries. Don't assign them to CPU/APU nodes.
                 */
-               if (!gpu->device_info->needs_iommu_device &&
+               if (!gpu->use_iommu_v2 &&
                    dev->node_props.cpu_cores_count)
                        continue;
 
@@ -1388,7 +1388,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
        * Overwrite ATS capability according to needs_iommu_device to fix
        * potential missing corresponding bit in CRAT of BIOS.
        */
-       if (dev->gpu->device_info->needs_iommu_device)
+       if (dev->gpu->use_iommu_v2)
                dev->node_props.capability |= HSA_CAP_ATS_PRESENT;
        else
                dev->node_props.capability &= ~HSA_CAP_ATS_PRESENT;
@@ -1515,6 +1515,29 @@ int kfd_numa_node_to_apic_id(int numa_node_id)
        return kfd_cpumask_to_apic_id(cpumask_of_node(numa_node_id));
 }
 
+void kfd_double_confirm_iommu_support(struct kfd_dev *gpu)
+{
+       struct kfd_topology_device *dev;
+
+       gpu->use_iommu_v2 = false;
+
+       if (!gpu->device_info->needs_iommu_device)
+               return;
+
+       down_read(&topology_lock);
+
+       /* Only use IOMMUv2 if there is an APU topology node with no GPU
+        * assigned yet. This GPU will be assigned to it.
+        */
+       list_for_each_entry(dev, &topology_device_list, list)
+               if (dev->node_props.cpu_cores_count &&
+                   dev->node_props.simd_count &&
+                   !dev->gpu)
+                       gpu->use_iommu_v2 = true;
+
+       up_read(&topology_lock);
+}
+
 #if defined(CONFIG_DEBUG_FS)
 
 int kfd_debugfs_hqds_by_device(struct seq_file *m, void *data)