RDMA/siw: Use ib_umem_get() to pin user pages
authorBernard Metzler <bmt@zurich.ibm.com>
Sat, 4 Nov 2023 07:56:43 +0000 (08:56 +0100)
committerLeon Romanovsky <leon@kernel.org>
Mon, 13 Nov 2023 08:14:00 +0000 (10:14 +0200)
Abandon siw private code to pin user pages during user
memory registration, but use ib_umem_get() instead.
This will help maintaining the driver in case of changes
to the memory subsystem.

Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>
Link: https://lore.kernel.org/r/20231104075643.195186-1-bmt@zurich.ibm.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/sw/siw/siw.h
drivers/infiniband/sw/siw/siw_mem.c
drivers/infiniband/sw/siw/siw_mem.h
drivers/infiniband/sw/siw/siw_verbs.c

index cec5cccd2e7523d94bad3c4cae64a764b7fcb22d..849e496c7e6790f839936b730e3e2c21f426f453 100644 (file)
@@ -121,11 +121,10 @@ struct siw_page_chunk {
 };
 
 struct siw_umem {
+       struct ib_umem *base_mem;
        struct siw_page_chunk *page_chunk;
        int num_pages;
-       bool writable;
        u64 fp_addr; /* First page base address */
-       struct mm_struct *owning_mm;
 };
 
 struct siw_pble {
index c5f7f1669d09a31d7352305fc3e2f864f8fafedf..2110ceb0603cb48ba4bf535f5dc25eef92967aaf 100644 (file)
@@ -5,6 +5,7 @@
 
 #include <linux/gfp.h>
 #include <rdma/ib_verbs.h>
+#include <rdma/ib_umem.h>
 #include <linux/dma-mapping.h>
 #include <linux/slab.h>
 #include <linux/sched/mm.h>
@@ -60,28 +61,17 @@ struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index)
        return NULL;
 }
 
-static void siw_free_plist(struct siw_page_chunk *chunk, int num_pages,
-                          bool dirty)
+void siw_umem_release(struct siw_umem *umem)
 {
-       unpin_user_pages_dirty_lock(chunk->plist, num_pages, dirty);
-}
-
-void siw_umem_release(struct siw_umem *umem, bool dirty)
-{
-       struct mm_struct *mm_s = umem->owning_mm;
        int i, num_pages = umem->num_pages;
 
-       for (i = 0; num_pages; i++) {
-               int to_free = min_t(int, PAGES_PER_CHUNK, num_pages);
+       if (umem->base_mem)
+               ib_umem_release(umem->base_mem);
 
-               siw_free_plist(&umem->page_chunk[i], to_free,
-                              umem->writable && dirty);
+       for (i = 0; num_pages > 0; i++) {
                kfree(umem->page_chunk[i].plist);
-               num_pages -= to_free;
+               num_pages -= PAGES_PER_CHUNK;
        }
-       atomic64_sub(umem->num_pages, &mm_s->pinned_vm);
-
-       mmdrop(mm_s);
        kfree(umem->page_chunk);
        kfree(umem);
 }
@@ -145,7 +135,7 @@ void siw_free_mem(struct kref *ref)
 
        if (!mem->is_mw && mem->mem_obj) {
                if (mem->is_pbl == 0)
-                       siw_umem_release(mem->umem, true);
+                       siw_umem_release(mem->umem);
                else
                        kfree(mem->pbl);
        }
@@ -362,18 +352,16 @@ struct siw_pbl *siw_pbl_alloc(u32 num_buf)
        return pbl;
 }
 
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
+struct siw_umem *siw_umem_get(struct ib_device *base_dev, u64 start,
+                             u64 len, int rights)
 {
        struct siw_umem *umem;
-       struct mm_struct *mm_s;
+       struct ib_umem *base_mem;
+       struct sg_page_iter sg_iter;
+       struct sg_table *sgt;
        u64 first_page_va;
-       unsigned long mlock_limit;
-       unsigned int foll_flags = FOLL_LONGTERM;
        int num_pages, num_chunks, i, rv = 0;
 
-       if (!can_do_mlock())
-               return ERR_PTR(-EPERM);
-
        if (!len)
                return ERR_PTR(-EINVAL);
 
@@ -385,65 +373,50 @@ struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable)
        if (!umem)
                return ERR_PTR(-ENOMEM);
 
-       mm_s = current->mm;
-       umem->owning_mm = mm_s;
-       umem->writable = writable;
-
-       mmgrab(mm_s);
-
-       if (writable)
-               foll_flags |= FOLL_WRITE;
-
-       mmap_read_lock(mm_s);
-
-       mlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
-
-       if (atomic64_add_return(num_pages, &mm_s->pinned_vm) > mlock_limit) {
-               rv = -ENOMEM;
-               goto out_sem_up;
-       }
-       umem->fp_addr = first_page_va;
-
        umem->page_chunk =
                kcalloc(num_chunks, sizeof(struct siw_page_chunk), GFP_KERNEL);
        if (!umem->page_chunk) {
                rv = -ENOMEM;
-               goto out_sem_up;
+               goto err_out;
        }
-       for (i = 0; num_pages; i++) {
+       base_mem = ib_umem_get(base_dev, start, len, rights);
+       if (IS_ERR(base_mem)) {
+               rv = PTR_ERR(base_mem);
+               siw_dbg(base_dev, "Cannot pin user memory: %d\n", rv);
+               goto err_out;
+       }
+       umem->fp_addr = first_page_va;
+       umem->base_mem = base_mem;
+
+       sgt = &base_mem->sgt_append.sgt;
+       __sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
+
+       if (!__sg_page_iter_next(&sg_iter)) {
+               rv = -EINVAL;
+               goto err_out;
+       }
+       for (i = 0; num_pages > 0; i++) {
                int nents = min_t(int, num_pages, PAGES_PER_CHUNK);
                struct page **plist =
                        kcalloc(nents, sizeof(struct page *), GFP_KERNEL);
 
                if (!plist) {
                        rv = -ENOMEM;
-                       goto out_sem_up;
+                       goto err_out;
                }
                umem->page_chunk[i].plist = plist;
-               while (nents) {
-                       rv = pin_user_pages(first_page_va, nents, foll_flags,
-                                           plist);
-                       if (rv < 0)
-                               goto out_sem_up;
-
-                       umem->num_pages += rv;
-                       first_page_va += rv * PAGE_SIZE;
-                       plist += rv;
-                       nents -= rv;
-                       num_pages -= rv;
+               while (nents--) {
+                       *plist = sg_page_iter_page(&sg_iter);
+                       umem->num_pages++;
+                       num_pages--;
+                       plist++;
+                       if (!__sg_page_iter_next(&sg_iter))
+                               break;
                }
        }
-out_sem_up:
-       mmap_read_unlock(mm_s);
-
-       if (rv > 0)
-               return umem;
-
-       /* Adjust accounting for pages not pinned */
-       if (num_pages)
-               atomic64_sub(num_pages, &mm_s->pinned_vm);
-
-       siw_umem_release(umem, false);
+       return umem;
+err_out:
+       siw_umem_release(umem);
 
        return ERR_PTR(rv);
 }
index a2835284fe5b4831cfa19090c0a1a5415ad80ef0..e74cfcd6dbc1ef7abadc9b67eb40d170848c4cc4 100644 (file)
@@ -6,8 +6,9 @@
 #ifndef _SIW_MEM_H
 #define _SIW_MEM_H
 
-struct siw_umem *siw_umem_get(u64 start, u64 len, bool writable);
-void siw_umem_release(struct siw_umem *umem, bool dirty);
+struct siw_umem *siw_umem_get(struct ib_device *base_dave, u64 start,
+                             u64 len, int rights);
+void siw_umem_release(struct siw_umem *umem);
 struct siw_pbl *siw_pbl_alloc(u32 num_buf);
 dma_addr_t siw_pbl_get_buffer(struct siw_pbl *pbl, u64 off, int *len, int *idx);
 struct siw_mem *siw_mem_id2obj(struct siw_device *sdev, int stag_index);
index c5c27db9c2fe65799f32c092f789aeec4ca24ec3..27f7dda89e49fb0b1f4f70c3d33f1a594439f4d0 100644 (file)
@@ -1321,8 +1321,6 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
        struct siw_umem *umem = NULL;
        struct siw_ureq_reg_mr ureq;
        struct siw_device *sdev = to_siw_dev(pd->device);
-
-       unsigned long mem_limit = rlimit(RLIMIT_MEMLOCK);
        int rv;
 
        siw_dbg_pd(pd, "start: 0x%pK, va: 0x%pK, len: %llu\n",
@@ -1338,20 +1336,7 @@ struct ib_mr *siw_reg_user_mr(struct ib_pd *pd, u64 start, u64 len,
                rv = -EINVAL;
                goto err_out;
        }
-       if (mem_limit != RLIM_INFINITY) {
-               unsigned long num_pages =
-                       (PAGE_ALIGN(len + (start & ~PAGE_MASK))) >> PAGE_SHIFT;
-               mem_limit >>= PAGE_SHIFT;
-
-               if (num_pages > mem_limit - current->mm->locked_vm) {
-                       siw_dbg_pd(pd, "pages req %lu, max %lu, lock %lu\n",
-                                  num_pages, mem_limit,
-                                  current->mm->locked_vm);
-                       rv = -ENOMEM;
-                       goto err_out;
-               }
-       }
-       umem = siw_umem_get(start, len, ib_access_writable(rights));
+       umem = siw_umem_get(pd->device, start, len, rights);
        if (IS_ERR(umem)) {
                rv = PTR_ERR(umem);
                siw_dbg_pd(pd, "getting user memory failed: %d\n", rv);
@@ -1404,7 +1389,7 @@ err_out:
                kfree_rcu(mr, rcu);
        } else {
                if (umem)
-                       siw_umem_release(umem, false);
+                       siw_umem_release(umem);
        }
        return ERR_PTR(rv);
 }