habanalabs: mmu map wrapper for sizes larger than a page
authorOfir Bitton <obitton@habana.ai>
Thu, 22 Oct 2020 12:13:10 +0000 (15:13 +0300)
committerOded Gabbay <ogabbay@kernel.org>
Mon, 30 Nov 2020 08:47:36 +0000 (10:47 +0200)
We introduce a new wrapper which allows us to mmu map any size
to any host va_range available. In addition we remove duplicated
code from various places in driver and using this new wrapper
instead.
This wrapper supports mapping only contiguous physical
memory blocks and will be used for mappings that are done to the
driver ASID.

Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
drivers/misc/habanalabs/common/command_buffer.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/common/mmu.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/goya/goya.c

index 0c482358f35001ed0fa90379d853884a7d800f6a..2856bb3423ee05cc1cd3257d24238c38ca68fa4c 100644 (file)
@@ -67,9 +67,9 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
        bus_addr = cb->bus_address;
        offset = 0;
        list_for_each_entry(va_block, &cb->va_block_list, node) {
-               rc = hl_mmu_map(ctx, va_block->start, bus_addr, va_block->size,
-                               list_is_last(&va_block->node,
-                                               &cb->va_block_list));
+               rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
+                               va_block->size, list_is_last(&va_block->node,
+                                                       &cb->va_block_list));
                if (rc) {
                        dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
                                va_block->start);
@@ -92,7 +92,7 @@ err_va_umap:
        list_for_each_entry(va_block, &cb->va_block_list, node) {
                if (offset <= 0)
                        break;
-               hl_mmu_unmap(ctx, va_block->start, va_block->size,
+               hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
                                offset <= va_block->size);
                offset -= va_block->size;
        }
@@ -119,7 +119,7 @@ static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
        mutex_lock(&ctx->mmu_lock);
 
        list_for_each_entry(va_block, &cb->va_block_list, node)
-               if (hl_mmu_unmap(ctx, va_block->start, va_block->size,
+               if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
                                list_is_last(&va_block->node,
                                                &cb->va_block_list)))
                        dev_warn_ratelimited(hdev->dev,
index 43aa8cbd89694a2ceb1ab683272389f8ad3f01c3..e1db8301ecbd28125092856cbd3e466b8efed430 100644 (file)
@@ -2162,10 +2162,13 @@ int hl_mmu_init(struct hl_device *hdev);
 void hl_mmu_fini(struct hl_device *hdev);
 int hl_mmu_ctx_init(struct hl_ctx *ctx);
 void hl_mmu_ctx_fini(struct hl_ctx *ctx);
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                u32 page_size, bool flush_pte);
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
                bool flush_pte);
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+                                       u64 phys_addr, u32 size);
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size);
 void hl_mmu_swap_out(struct hl_ctx *ctx);
 void hl_mmu_swap_in(struct hl_ctx *ctx);
 int hl_mmu_if_set_funcs(struct hl_device *hdev);
index 351c9927151f0015c65aba8796cc042167a26faf..744275dd64109e6d4ae70c9f5500a64450829182 100644 (file)
@@ -843,7 +843,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
        for (i = 0 ; i < phys_pg_pack->npages ; i++) {
                paddr = phys_pg_pack->pages[i];
 
-               rc = hl_mmu_map(ctx, next_vaddr, paddr, page_size,
+               rc = hl_mmu_map_page(ctx, next_vaddr, paddr, page_size,
                                (i + 1) == phys_pg_pack->npages);
                if (rc) {
                        dev_err(hdev->dev,
@@ -862,7 +862,7 @@ static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 err:
        next_vaddr = vaddr;
        for (i = 0 ; i < mapped_pg_cnt ; i++) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+               if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
                                        (i + 1) == mapped_pg_cnt))
                        dev_warn_ratelimited(hdev->dev,
                                "failed to unmap handle %u, va: 0x%llx, pa: 0x%llx, page size: %u\n",
@@ -892,7 +892,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
        next_vaddr = vaddr;
 
        for (i = 0 ; i < phys_pg_pack->npages ; i++, next_vaddr += page_size) {
-               if (hl_mmu_unmap(ctx, next_vaddr, page_size,
+               if (hl_mmu_unmap_page(ctx, next_vaddr, page_size,
                                       (i + 1) == phys_pg_pack->npages))
                        dev_warn_ratelimited(hdev->dev,
                        "unmap failed for vaddr: 0x%llx\n", next_vaddr);
index 7279c83cc0815677b7738ba6461518818c688aeb..33ae953d3a3680126cbe42090ea677fb25b9f582 100644 (file)
@@ -122,7 +122,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 }
 
 /*
- * hl_mmu_unmap - unmaps a virtual addr
+ * hl_mmu_unmap_page - unmaps a virtual addr
  *
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
@@ -142,7 +142,7 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
  * For optimization reasons PCI flush may be requested once after unmapping of
  * large area.
  */
-int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
+int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
                bool flush_pte)
 {
        struct hl_device *hdev = ctx->hdev;
@@ -200,7 +200,7 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
 }
 
 /*
- * hl_mmu_map - maps a virtual addr to physical addr
+ * hl_mmu_map_page - maps a virtual addr to physical addr
  *
  * @ctx: pointer to the context structure
  * @virt_addr: virt addr to map from
@@ -221,8 +221,8 @@ int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
  * For optimization reasons PCI flush may be requested once after mapping of
  * large area.
  */
-int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size,
-               bool flush_pte)
+int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
+               u32 page_size, bool flush_pte)
 {
        struct hl_device *hdev = ctx->hdev;
        struct asic_fixed_properties *prop = &hdev->asic_prop;
@@ -302,6 +302,108 @@ err:
        return rc;
 }
 
+/*
+ * hl_mmu_map_contiguous - implements a wrapper for hl_mmu_map_page
+ *                         for mapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to map from
+ * @phys_addr: phys addr to map to
+ * @size: size to map
+ *
+ */
+int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
+                                       u64 phys_addr, u32 size)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 curr_va, curr_pa;
+       u32 page_size;
+       bool flush_pte;
+       int rc = 0, off;
+
+       if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->dmmu.start_addr, prop->dmmu.end_addr))
+               page_size = prop->dmmu.page_size;
+       else if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->pmmu.start_addr, prop->pmmu.end_addr))
+               page_size = prop->pmmu.page_size;
+       else if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+               page_size = prop->pmmu_huge.page_size;
+       else
+               return -EINVAL;
+
+       for (off = 0 ; off < size ; off += page_size) {
+               curr_va = virt_addr + off;
+               curr_pa = phys_addr + off;
+               flush_pte = (off + page_size) >= size;
+               rc = hl_mmu_map_page(ctx, curr_va, curr_pa, page_size,
+                                                               flush_pte);
+               if (rc) {
+                       dev_err(hdev->dev,
+                               "Map failed for va 0x%llx to pa 0x%llx\n",
+                               curr_va, curr_pa);
+                       goto unmap;
+               }
+       }
+
+       return rc;
+
+unmap:
+       for (; off >= 0 ; off -= page_size) {
+               curr_va = virt_addr + off;
+               flush_pte = (off - (s32) page_size) < 0;
+               if (hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte))
+                       dev_warn_ratelimited(hdev->dev,
+                               "failed to unmap va 0x%llx\n", curr_va);
+       }
+
+       return rc;
+}
+
+/*
+ * hl_mmu_unmap_contiguous - implements a wrapper for hl_mmu_unmap_page
+ *                           for unmapping contiguous physical memory
+ *
+ * @ctx: pointer to the context structure
+ * @virt_addr: virt addr to unmap
+ * @size: size to unmap
+ *
+ */
+int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 curr_va;
+       u32 page_size;
+       bool flush_pte;
+       int rc = 0, off;
+
+       if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->dmmu.start_addr, prop->dmmu.end_addr))
+               page_size = prop->dmmu.page_size;
+       else if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->pmmu.start_addr, prop->pmmu.end_addr))
+               page_size = prop->pmmu.page_size;
+       else if (hl_mem_area_inside_range(virt_addr, size,
+                       prop->pmmu_huge.start_addr, prop->pmmu_huge.end_addr))
+               page_size = prop->pmmu_huge.page_size;
+       else
+               return -EINVAL;
+
+       for (off = 0 ; off < size ; off += page_size) {
+               curr_va = virt_addr + off;
+               flush_pte = (off + page_size) >= size;
+               rc = hl_mmu_unmap_page(ctx, curr_va, page_size, flush_pte);
+               if (rc)
+                       dev_warn_ratelimited(hdev->dev,
+                               "Unmap failed for va 0x%llx\n", curr_va);
+       }
+
+       return rc;
+}
+
 /*
  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
  *
index fda3d8a85adac1134bc71126a6826f824abc13ad..49d4b5dda1151c9684e733851758334a32cc939a 100644 (file)
@@ -7755,9 +7755,6 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
                struct hl_ctx *ctx)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
-       bool flush_pte;
-       u64 va, pa;
-       s64 off;
        int min_alloc_order, rc, collective_cb_size;
 
        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
@@ -7802,48 +7799,23 @@ static int gaudi_internal_cb_pool_init(struct hl_device *hdev,
                goto destroy_internal_cb_pool;
 
        mutex_lock(&ctx->mmu_lock);
-
-       /* The mapping is done page by page since we can't assure allocated ptr
-        * is aligned to HOST_SPACE_INTERNAL_CB_SZ
-        */
-       for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
-               va = hdev->internal_cb_va_base + off;
-               pa = hdev->internal_cb_pool_dma_addr + off;
-               flush_pte = (off + PAGE_SIZE_4KB) >= HOST_SPACE_INTERNAL_CB_SZ;
-               rc = hl_mmu_map(ctx, va, pa, PAGE_SIZE_4KB, flush_pte);
-               if (rc) {
-                       dev_err(hdev->dev,
-                               "Map failed for va 0x%llx to pa 0x%llx\n",
-                               va, pa);
-                       goto unmap;
-               }
-       }
+       rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base,
+                       hdev->internal_cb_pool_dma_addr,
+                       HOST_SPACE_INTERNAL_CB_SZ);
 
        hdev->asic_funcs->mmu_invalidate_cache(hdev, false, VM_TYPE_USERPTR);
-
        mutex_unlock(&ctx->mmu_lock);
 
-       return 0;
+       if (rc)
+               goto unreserve_internal_cb_pool;
 
-unmap:
-       for (; off >= 0 ; off -= PAGE_SIZE_4KB) {
-               va = hdev->internal_cb_va_base + off;
-               flush_pte = (off - (s32) PAGE_SIZE_4KB) < 0;
-               if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
-                       dev_warn_ratelimited(hdev->dev,
-                                       "failed to unmap va 0x%llx\n", va);
-       }
+       return 0;
 
+unreserve_internal_cb_pool:
        hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
                        HOST_SPACE_INTERNAL_CB_SZ);
-
-       hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
-
-       mutex_unlock(&ctx->mmu_lock);
-
 destroy_internal_cb_pool:
        gen_pool_destroy(hdev->internal_cb_pool);
-
 free_internal_cb_pool:
        hdev->asic_funcs->asic_dma_free_coherent(hdev,
                        HOST_SPACE_INTERNAL_CB_SZ,
@@ -7857,30 +7829,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
                struct hl_ctx *ctx)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
-       bool flush_pte = false;
-       u64 va, off;
 
        if (!(gaudi->hw_cap_initialized & HW_CAP_MMU))
                return;
 
        mutex_lock(&ctx->mmu_lock);
-
-       for (off = 0 ; off < HOST_SPACE_INTERNAL_CB_SZ ; off += PAGE_SIZE_4KB) {
-               va = hdev->internal_cb_va_base + off;
-
-               if (off + PAGE_SIZE_4KB >= HOST_SPACE_INTERNAL_CB_SZ)
-                       flush_pte = true;
-
-               if (hl_mmu_unmap(ctx, va, PAGE_SIZE_4KB, flush_pte))
-                       dev_warn_ratelimited(hdev->dev,
-                                       "failed to unmap va 0x%llx\n", va);
-       }
-
+       hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base,
+                       HOST_SPACE_INTERNAL_CB_SZ);
        hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base,
                        HOST_SPACE_INTERNAL_CB_SZ);
-
        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
-
        mutex_unlock(&ctx->mmu_lock);
 
        gen_pool_destroy(hdev->internal_cb_pool);
index 55d174d3cac87acf6e3504eabb3eef91da8c973b..342227b93778d4d817742542f59624de9c40675d 100644 (file)
@@ -4906,9 +4906,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
                return 0;
 
        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB) {
-               rc = hl_mmu_map(hdev->kernel_ctx, prop->dram_base_address + off,
-                               prop->dram_base_address + off, PAGE_SIZE_2MB,
-                               (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
+               rc = hl_mmu_map_page(hdev->kernel_ctx,
+                       prop->dram_base_address + off,
+                       prop->dram_base_address + off, PAGE_SIZE_2MB,
+                       (off + PAGE_SIZE_2MB) == CPU_FW_IMAGE_SIZE);
                if (rc) {
                        dev_err(hdev->dev, "Map failed for address 0x%llx\n",
                                prop->dram_base_address + off);
@@ -4917,8 +4918,10 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
        }
 
        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
-               rc = hl_mmu_map(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
-                       hdev->cpu_accessible_dma_address, PAGE_SIZE_2MB, true);
+               rc = hl_mmu_map_page(hdev->kernel_ctx,
+                       VA_CPU_ACCESSIBLE_MEM_ADDR,
+                       hdev->cpu_accessible_dma_address,
+                       PAGE_SIZE_2MB, true);
 
                if (rc) {
                        dev_err(hdev->dev,
@@ -4928,7 +4931,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
                }
        } else {
                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB) {
-                       rc = hl_mmu_map(hdev->kernel_ctx,
+                       rc = hl_mmu_map_page(hdev->kernel_ctx,
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
                                hdev->cpu_accessible_dma_address + cpu_off,
                                PAGE_SIZE_4KB, true);
@@ -4955,7 +4958,7 @@ static int goya_mmu_add_mappings_for_device_cpu(struct hl_device *hdev)
 
 unmap_cpu:
        for (; cpu_off >= 0 ; cpu_off -= PAGE_SIZE_4KB)
-               if (hl_mmu_unmap(hdev->kernel_ctx,
+               if (hl_mmu_unmap_page(hdev->kernel_ctx,
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
                                PAGE_SIZE_4KB, true))
                        dev_warn_ratelimited(hdev->dev,
@@ -4963,7 +4966,7 @@ unmap_cpu:
                                VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off);
 unmap:
        for (; off >= 0 ; off -= PAGE_SIZE_2MB)
-               if (hl_mmu_unmap(hdev->kernel_ctx,
+               if (hl_mmu_unmap_page(hdev->kernel_ctx,
                                prop->dram_base_address + off, PAGE_SIZE_2MB,
                                true))
                        dev_warn_ratelimited(hdev->dev,
@@ -4989,13 +4992,14 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
        WREG32(mmCPU_IF_AWUSER_OVR_EN, 0);
 
        if (!(hdev->cpu_accessible_dma_address & (PAGE_SIZE_2MB - 1))) {
-               if (hl_mmu_unmap(hdev->kernel_ctx, VA_CPU_ACCESSIBLE_MEM_ADDR,
+               if (hl_mmu_unmap_page(hdev->kernel_ctx,
+                               VA_CPU_ACCESSIBLE_MEM_ADDR,
                                PAGE_SIZE_2MB, true))
                        dev_warn(hdev->dev,
                                "Failed to unmap CPU accessible memory\n");
        } else {
                for (cpu_off = 0 ; cpu_off < SZ_2M ; cpu_off += PAGE_SIZE_4KB)
-                       if (hl_mmu_unmap(hdev->kernel_ctx,
+                       if (hl_mmu_unmap_page(hdev->kernel_ctx,
                                        VA_CPU_ACCESSIBLE_MEM_ADDR + cpu_off,
                                        PAGE_SIZE_4KB,
                                        (cpu_off + PAGE_SIZE_4KB) >= SZ_2M))
@@ -5005,7 +5009,7 @@ void goya_mmu_remove_device_cpu_mappings(struct hl_device *hdev)
        }
 
        for (off = 0 ; off < CPU_FW_IMAGE_SIZE ; off += PAGE_SIZE_2MB)
-               if (hl_mmu_unmap(hdev->kernel_ctx,
+               if (hl_mmu_unmap_page(hdev->kernel_ctx,
                                prop->dram_base_address + off, PAGE_SIZE_2MB,
                                (off + PAGE_SIZE_2MB) >= CPU_FW_IMAGE_SIZE))
                        dev_warn_ratelimited(hdev->dev,