drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3
authorRajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Sat, 28 Jan 2023 02:57:00 +0000 (21:57 -0500)
committerAlex Deucher <alexander.deucher@amd.com>
Fri, 9 Jun 2023 13:53:52 +0000 (09:53 -0400)
[For 1P NPS1 mode driver bringup]

Changes required to initialize the amdgpu driver with frontdoor firmware
loading and discovery=2 with the native mode SBIOS that enables CPU GPU
unified interleaved memory.

sudo modprobe amdgpu discovery=2

Once PSP TMR region is reported via the ACPI interface, the dependency
on the ip_discovery.bin will be removed.

Choice of where to allocate driver table is given to each IP version. In
general, both GTT and VRAM domains will be considered. If one of the
tables has a strict restriction for VRAM domain, then only VRAM domain
is considered.

Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
(lijo: Modified the handling for SMU Tables)
Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
drivers/gpu/drm/amd/amdkfd/kfd_crat.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c

index af37f2ef4438b7ea331084682689812d30b67050..4e179e50de25ca350d6bf2fbd3ccac3d72fb4fdc 100644 (file)
@@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
        (*mem)->dmabuf = dma_buf;
        (*mem)->bo = bo;
        (*mem)->va = va;
-       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+       (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ?
                AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+
        (*mem)->mapped_to_gpu_memory = 0;
        (*mem)->process_info = avm->process_info;
        add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
index 9f0d5f02119ed252af480027029de6bfa2474199..f431205e1077f4b8a5760441499c2d74dfd40196 100644 (file)
@@ -1044,7 +1044,7 @@ static const char * const amdgpu_vram_names[] = {
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
        /* On A+A platform, VRAM can be mapped as WB */
-       if (!adev->gmc.xgmi.connected_to_cpu) {
+       if (!adev->gmc.xgmi.connected_to_cpu && !adev->gmc.is_app_apu) {
                /* reserve PAT memory space to WC for VRAM */
                int r = arch_io_reserve_memtype_wc(adev->gmc.aper_base,
                                adev->gmc.aper_size);
index 863fa331e6ff3c2750510000f30f003d72f8cbae..4395c53d09d83e60aa2b45f226d641dbeebef5c2 100644 (file)
@@ -476,7 +476,8 @@ static int psp_sw_init(void *handle)
                return ret;
 
        ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     AMDGPU_GEM_DOMAIN_VRAM |
+                                     AMDGPU_GEM_DOMAIN_GTT,
                                      &psp->fence_buf_bo,
                                      &psp->fence_buf_mc_addr,
                                      &psp->fence_buf);
@@ -484,7 +485,8 @@ static int psp_sw_init(void *handle)
                goto failed1;
 
        ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
-                                     AMDGPU_GEM_DOMAIN_VRAM,
+                                     AMDGPU_GEM_DOMAIN_VRAM |
+                                     AMDGPU_GEM_DOMAIN_GTT,
                                      &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
                                      (void **)&psp->cmd_buf_mem);
        if (ret)
index 6bbe3b89aef5d608f6c8e9463d732e9446f15856..bc11ae56bba5b9fab771fea4940fb03298ab605c 100644 (file)
@@ -1708,15 +1708,20 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
                ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
        }
 
-       ret = amdgpu_bo_create_kernel_at(adev,
-                                        adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
-                                        adev->mman.discovery_tmr_size,
-                                        &adev->mman.discovery_memory,
-                                        NULL);
-       if (ret) {
-               DRM_ERROR("alloc tmr failed(%d)!\n", ret);
-               amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
-               return ret;
+       if (!adev->gmc.is_app_apu) {
+               ret = amdgpu_bo_create_kernel_at(adev,
+                                                adev->gmc.real_vram_size -
+                                                adev->mman.discovery_tmr_size,
+                                                adev->mman.discovery_tmr_size,
+                                                &adev->mman.discovery_memory,
+                                                NULL);
+               if (ret) {
+                       DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+                       amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+                       return ret;
+               }
+       } else {
+               DRM_DEBUG_DRIVER("backdoor fw loading path for PSP TMR, no reservation needed\n");
        }
 
        return 0;
@@ -1765,10 +1770,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                adev->mman.aper_base_kaddr = ioremap_cache(adev->gmc.aper_base,
                                adev->gmc.visible_vram_size);
 
-       else
+       else if (!adev->gmc.is_app_apu)
 #endif
                adev->mman.aper_base_kaddr = ioremap_wc(adev->gmc.aper_base,
                                adev->gmc.visible_vram_size);
+       else
+               DRM_DEBUG_DRIVER("No need to ioremap when real vram size is 0\n");
 #endif
 
        /*
@@ -1803,23 +1810,32 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
         * This is used for VGA emulation and pre-OS scanout buffers to
         * avoid display artifacts while transitioning between pre-OS
         * and driver.  */
-       r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
-                                      &adev->mman.stolen_vga_memory,
-                                      NULL);
-       if (r)
-               return r;
-       r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
-                                      adev->mman.stolen_extended_size,
-                                      &adev->mman.stolen_extended_memory,
-                                      NULL);
-       if (r)
-               return r;
-       r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
-                                      adev->mman.stolen_reserved_size,
-                                      &adev->mman.stolen_reserved_memory,
-                                      NULL);
-       if (r)
-               return r;
+       if (!adev->gmc.is_app_apu) {
+               r = amdgpu_bo_create_kernel_at(adev, 0,
+                                              adev->mman.stolen_vga_size,
+                                              &adev->mman.stolen_vga_memory,
+                                              NULL);
+               if (r)
+                       return r;
+
+               r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
+                                              adev->mman.stolen_extended_size,
+                                              &adev->mman.stolen_extended_memory,
+                                              NULL);
+
+               if (r)
+                       return r;
+
+               r = amdgpu_bo_create_kernel_at(adev,
+                                              adev->mman.stolen_reserved_offset,
+                                              adev->mman.stolen_reserved_size,
+                                              &adev->mman.stolen_reserved_memory,
+                                              NULL);
+               if (r)
+                       return r;
+       } else {
+               DRM_DEBUG_DRIVER("Skipped stolen memory reservation\n");
+       }
 
        DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
                 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
@@ -1866,7 +1882,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
                DRM_ERROR("Failed initializing oa heap.\n");
                return r;
        }
-
        if (amdgpu_bo_create_kernel(adev, PAGE_SIZE, PAGE_SIZE,
                                AMDGPU_GEM_DOMAIN_GTT,
                                &adev->mman.sdma_access_bo, NULL,
@@ -1887,13 +1902,15 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 
        amdgpu_ttm_training_reserve_vram_fini(adev);
        /* return the stolen vga memory back to VRAM */
-       amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
-       amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
-       /* return the IP Discovery TMR memory back to VRAM */
-       amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
-       if (adev->mman.stolen_reserved_size)
-               amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
-                                     NULL, NULL);
+       if (!adev->gmc.is_app_apu) {
+               amdgpu_bo_free_kernel(&adev->mman.stolen_vga_memory, NULL, NULL);
+               amdgpu_bo_free_kernel(&adev->mman.stolen_extended_memory, NULL, NULL);
+               /* return the IP Discovery TMR memory back to VRAM */
+               amdgpu_bo_free_kernel(&adev->mman.discovery_memory, NULL, NULL);
+               if (adev->mman.stolen_reserved_size)
+                       amdgpu_bo_free_kernel(&adev->mman.stolen_reserved_memory,
+                                             NULL, NULL);
+       }
        amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
                                        &adev->mman.sdma_access_ptr);
        amdgpu_ttm_fw_reserve_vram_fini(adev);
@@ -1935,7 +1952,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
        int r;
 
        if (!adev->mman.initialized || amdgpu_in_reset(adev) ||
-           adev->mman.buffer_funcs_enabled == enable)
+           adev->mman.buffer_funcs_enabled == enable || adev->gmc.is_app_apu)
                return;
 
        if (enable) {
index df63dc3bca18cd2829575ef6a592e21dcaee0db0..bc5d126b600b4ed0f00fef2c1768c62aeda4e9cc 100644 (file)
@@ -512,7 +512,12 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 
        bp.size = amdgpu_vm_pt_size(adev, level);
        bp.byte_align = AMDGPU_GPU_PAGE_SIZE;
-       bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+
+       if (!adev->gmc.is_app_apu)
+               bp.domain = AMDGPU_GEM_DOMAIN_VRAM;
+       else
+               bp.domain = AMDGPU_GEM_DOMAIN_GTT;
+
        bp.domain = amdgpu_bo_get_preferred_domain(adev, bp.domain);
        bp.flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
                AMDGPU_GEM_CREATE_CPU_GTT_USWC;
index 1f1268cd5e09f9b067a487190400b059f67e7cd2..42877c4505f1e46928c5507b2cb47bb22c31ee85 100644 (file)
@@ -459,7 +459,8 @@ static int gfx_v9_4_3_mec_init(struct amdgpu_device *adev)
                adev->gfx.num_compute_rings * num_xcc * GFX9_MEC_HPD_SIZE;
        if (mec_hpd_size) {
                r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE,
-                                             AMDGPU_GEM_DOMAIN_VRAM,
+                                             AMDGPU_GEM_DOMAIN_VRAM |
+                                             AMDGPU_GEM_DOMAIN_GTT,
                                              &adev->gfx.mec.hpd_eop_obj,
                                              &adev->gfx.mec.hpd_eop_gpu_addr,
                                              (void **)&hpd);
index 16634a791e10c9f28747c695d66b995e262fbbc6..245de27c7540296a15bab3455d942e7e6597049e 100644 (file)
@@ -1593,8 +1593,13 @@ static int gmc_v9_0_mc_init(struct amdgpu_device *adev)
        int r;
 
        /* size in MB on si */
-       adev->gmc.mc_vram_size =
-               adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+       if (!adev->gmc.is_app_apu) {
+               adev->gmc.mc_vram_size =
+                       adev->nbio.funcs->get_memsize(adev) * 1024ULL * 1024ULL;
+       } else {
+               DRM_DEBUG("Set mc_vram_size = 0 for APP APU\n");
+               adev->gmc.mc_vram_size = 0;
+       }
        adev->gmc.real_vram_size = adev->gmc.mc_vram_size;
 
        if (!(adev->flags & AMD_IS_APU) &&
index 8b9accecf49b873604b53134d8cd4abcb0f06aef..f85ac4dbc6737ffb7360b800e5c85fdb05412a67 100644 (file)
@@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev)
        if (dev->kfd->local_mem_info.local_mem_size_private == 0 &&
            dev->kfd->local_mem_info.local_mem_size_public > 0)
                return true;
+
+       if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) {
+               pr_debug("APP APU, Consider like a large bar system\n");
+               return true;
+       }
+
        return false;
 }
 
index 16475921587bf225046b527723ad756d8d24c116..1aaf933f9f488936cd73c6b7f057b2e56a20bc63 100644 (file)
@@ -30,6 +30,9 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 
+/* Fixme: Fake 32GB for 1PNPS1 mode bringup */
+#define DUMMY_VRAM_SIZE 31138512896
+
 /* GPU Processor ID base for dGPUs for which VCRAT needs to be created.
  * GPU processor ID are expressed with Bit[31]=1.
  * The base is set to 0x8000_0000 + 0x1000 to avoid collision with GPU IDs
@@ -1053,6 +1056,8 @@ static int kfd_parse_subtype_mem(struct crat_subtype_memory *mem,
 
                        props->heap_type = heap_type;
                        props->flags = flags;
+                       if (size_in_bytes == 0)
+                               size_in_bytes = DUMMY_VRAM_SIZE; /* Fixme: TBD */
                        props->size_in_bytes = size_in_bytes;
                        props->width = width;
 
index 2ddf5198e5c4860f86db5ce824f35ae051fb6bed..4dea79a0c5b5e182f178cf058406d83e5f778685 100644 (file)
@@ -822,11 +822,20 @@ static int smu_init_fb_allocations(struct smu_context *smu)
                }
        }
 
+       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT;
        /* VRAM allocation for driver table */
        for (i = 0; i < SMU_TABLE_COUNT; i++) {
                if (tables[i].size == 0)
                        continue;
 
+               /* If one of the tables has VRAM domain restriction, keep it in
+                * VRAM
+                */
+               if ((tables[i].domain &
+                   (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) ==
+                           AMDGPU_GEM_DOMAIN_VRAM)
+                       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
+
                if (i == SMU_TABLE_PMSTATUSLOG)
                        continue;
 
@@ -836,7 +845,6 @@ static int smu_init_fb_allocations(struct smu_context *smu)
 
        driver_table->size = max_table_size;
        driver_table->align = PAGE_SIZE;
-       driver_table->domain = AMDGPU_GEM_DOMAIN_VRAM;
 
        ret = amdgpu_bo_create_kernel(adev,
                                      driver_table->size,
index ea8f3d6fb98b3fdbd5614b34a94082a4eea9bfea..8969b3ff5c8fcf3a251f83f822c811c1bd1f25b1 100644 (file)
@@ -220,10 +220,12 @@ static int smu_v13_0_6_tables_init(struct smu_context *smu)
                               PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
 
        SMU_TABLE_INIT(tables, SMU_TABLE_SMU_METRICS, sizeof(MetricsTable_t),
-                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+                      PAGE_SIZE,
+                      AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
        SMU_TABLE_INIT(tables, SMU_TABLE_I2C_COMMANDS, sizeof(SwI2cRequest_t),
-                      PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM);
+                      PAGE_SIZE,
+                      AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT);
 
        smu_table->metrics_table = kzalloc(sizeof(MetricsTable_t), GFP_KERNEL);
        if (!smu_table->metrics_table)