drm/xe: Size GT device memory correctly
authorMichael J. Ruhl <michael.j.ruhl@intel.com>
Thu, 25 May 2023 19:43:25 +0000 (15:43 -0400)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Tue, 19 Dec 2023 23:34:10 +0000 (18:34 -0500)
The current method of sizing GT device memory is not quite right.

Update the algorithm to use the relevant HW information and offsets
to set up the sizing correctly.

Update the stolen memory sizing to reflect the changes, and to be
GT specific.

Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Signed-off-by: Michael J. Ruhl <michael.j.ruhl@intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
drivers/gpu/drm/xe/xe_device_types.h
drivers/gpu/drm/xe/xe_gt_types.h
drivers/gpu/drm/xe/xe_mmio.c

index 0c31b341162a0f604bc729e41e1064cd4cea2389..5b3f270bf7906ecb356953589fbacdf590a66ee2 100644 (file)
@@ -144,6 +144,8 @@ struct xe_device {
                        resource_size_t io_size;
                        /** @size: Total size of VRAM */
                        resource_size_t size;
+                       /** @base: Offset to apply for Device Physical Address control */
+                       resource_size_t base;
                        /** @mapping: pointer to VRAM mappable space */
                        void *__iomem mapping;
                } vram;
index 993f855025fd5dcee83efb4f2f95851558aea767..093d650c35f4d27dae080aa5875f4bfdb32e2e00 100644 (file)
@@ -155,6 +155,8 @@ struct xe_gt {
                         * the first 256M). This configuration is known as small-bar.
                         */
                        resource_size_t io_size;
+                       /** @base: offset of VRAM starting base */
+                       resource_size_t base;
                        /** @size: size of VRAM. */
                        resource_size_t size;
                        /** @mapping: pointer to VRAM mappable space */
index 665fcb23bbbb0c1810168594d9969b64cac11a0e..d3b57669c9a7639a89b24fa080bdd263ed3f3ac9 100644 (file)
@@ -179,6 +179,8 @@ static int xe_determine_lmem_bar_size(struct xe_device *xe)
        if (!xe->mem.vram.io_size)
                return -EIO;
 
+       xe->mem.vram.base = 0; /* DPA offset */
+
        /* set up a map to the total memory area. */
        xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
 
@@ -240,6 +242,9 @@ int xe_mmio_tile_vram_size(struct xe_gt *gt, u64 *vram_size, u64 *tile_size, u64
 
 int xe_mmio_probe_vram(struct xe_device *xe)
 {
+       resource_size_t io_size;
+       u64 available_size = 0;
+       u64 total_size = 0;
        struct xe_gt *gt;
        u64 tile_offset;
        u64 tile_size;
@@ -265,64 +270,60 @@ int xe_mmio_probe_vram(struct xe_device *xe)
                drm_warn(&xe->drm, "Restricting VRAM size to PCI resource size (0x%llx->0x%llx)\n",
                         vram_size, (u64)xe->mem.vram.io_size);
 
-       /* Limit size to available memory to account for the current memory algorithm */
-       xe->mem.vram.io_size = min_t(u64, xe->mem.vram.io_size, vram_size);
-       xe->mem.vram.size = xe->mem.vram.io_size;
-
        drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
                 &xe->mem.vram.io_size);
 
-       /* FIXME: Assuming equally partitioned VRAM, incorrect */
-       if (xe->info.tile_count > 1) {
-               u8 adj_tile_count = xe->info.tile_count;
-               resource_size_t size, io_start, io_size;
+       io_size = xe->mem.vram.io_size;
 
-               for_each_gt(gt, xe, id)
-                       if (xe_gt_is_media_type(gt))
-                               --adj_tile_count;
+       /* gt specific ranges */
+       for_each_gt(gt, xe, id) {
+               if (xe_gt_is_media_type(gt))
+                       continue;
 
-               XE_BUG_ON(!adj_tile_count);
+               err = xe_mmio_tile_vram_size(gt, &vram_size, &tile_size, &tile_offset);
+               if (err)
+                       return err;
 
-               size = xe->mem.vram.size / adj_tile_count;
-               io_start = xe->mem.vram.io_start;
-               io_size = xe->mem.vram.io_size;
+               gt->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
+               gt->mem.vram.io_size = min_t(u64, vram_size, io_size);
 
-               for_each_gt(gt, xe, id) {
-                       if (id && !xe_gt_is_media_type(gt)) {
-                               io_size -= min(io_size, size);
-                               io_start += io_size;
-                       }
+               if (!gt->mem.vram.io_size) {
+                       drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+                       return -ENODEV;
+               }
 
-                       gt->mem.vram.size = size;
-
-                       /*
-                        * XXX: multi-tile small-bar might be wild. Hopefully
-                        * full tile without any mappable vram is not something
-                        * we care about.
-                        */
-
-                       gt->mem.vram.io_size = min(size, io_size);
-                       if (io_size) {
-                               gt->mem.vram.io_start = io_start;
-                               gt->mem.vram.mapping = xe->mem.vram.mapping +
-                                       (io_start - xe->mem.vram.io_start);
-                       } else {
-                               drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
-                               return -ENODEV;
-                       }
+               gt->mem.vram.base = tile_offset;
+
+               /* small bar can limit the visible size.  size accordingly */
+               gt->mem.vram.size = min_t(u64, vram_size, io_size);
+               gt->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
 
-                       drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
-                                id, gt->info.vram_id, &gt->mem.vram.io_start,
-                                &gt->mem.vram.size);
+               drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n", id, gt->info.vram_id,
+                        &gt->mem.vram.io_start, &gt->mem.vram.size);
+
+               if (gt->mem.vram.io_size < gt->mem.vram.size)
+                       drm_info(&xe->drm, "VRAM[%u, %u]: CPU access limited to %pa\n", id,
+                                gt->info.vram_id, &gt->mem.vram.io_size);
+
+               /* calculate total size using tile size to get the correct HW sizing */
+               total_size += tile_size;
+               available_size += vram_size;
+
+               if (total_size > xe->mem.vram.io_size) {
+                       drm_warn(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+                                &total_size, &xe->mem.vram.io_size);
                }
-       } else {
-               gt->mem.vram.size = xe->mem.vram.size;
-               gt->mem.vram.io_start = xe->mem.vram.io_start;
-               gt->mem.vram.io_size = xe->mem.vram.io_size;
-               gt->mem.vram.mapping = xe->mem.vram.mapping;
 
-               drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);
+               io_size -= min_t(u64, tile_size, io_size);
        }
+
+       xe->mem.vram.size = total_size;
+
+       drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+                &xe->mem.vram.size);
+       drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
+                &available_size);
+
        return 0;
 }