RDMA/mlx5: Use ib_umem_find_best_pgsz() for mkc's
authorJason Gunthorpe <jgg@nvidia.com>
Mon, 26 Oct 2020 13:23:14 +0000 (15:23 +0200)
committerJason Gunthorpe <jgg@nvidia.com>
Mon, 2 Nov 2020 19:10:50 +0000 (15:10 -0400)
Now that all the PAS arrays or UMR XLT's for mkcs are filled using
rdma_for_each_block() we can use the common ib_umem_find_best_pgsz()
algorithm.

Link: https://lore.kernel.org/r/20201026132314.1336717-6-leon@kernel.org
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/umem.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c

index e9fecbdf391bcc543f3c8c710694bb71a0011c01..f1fc7e39c782fb843f96497b18ad2caef214f974 100644 (file)
@@ -84,6 +84,15 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
        dma_addr_t mask;
        int i;
 
+       if (umem->is_odp) {
+               unsigned int page_size = BIT(to_ib_umem_odp(umem)->page_shift);
+
+               /* ODP must always be self consistent. */
+               if (!(pgsz_bitmap & page_size))
+                       return 0;
+               return page_size;
+       }
+
        /* rdma_for_each_block() has a bug if the page size is smaller than the
         * page size used to build the umem. For now prevent smaller page sizes
         * from being returned.
index aadd43425a587c097737b3c2ddf9271bdca6e21d..bb44080170be8c4c8ffec362c4bd0f6408579552 100644 (file)
 
 #define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size)
 
+static __always_inline unsigned long
+__mlx5_log_page_size_to_bitmap(unsigned int log_pgsz_bits,
+                              unsigned int pgsz_shift)
+{
+       unsigned int largest_pg_shift =
+               min_t(unsigned long, (1ULL << log_pgsz_bits) - 1 + pgsz_shift,
+                     BITS_PER_LONG - 1);
+
+       /*
+        * Despite a command allowing it, the device does not support lower than
+        * 4k page size.
+        */
+       pgsz_shift = max_t(unsigned int, MLX5_ADAPTER_PAGE_SHIFT, pgsz_shift);
+       return GENMASK(largest_pg_shift, pgsz_shift);
+}
+
+/*
+ * For mkc users, instead of a page_offset the command has a start_iova which
+ * specifies both the page_offset and the on-the-wire IOVA
+ */
+#define mlx5_umem_find_best_pgsz(umem, typ, log_pgsz_fld, pgsz_shift, iova)    \
+       ib_umem_find_best_pgsz(umem,                                           \
+                              __mlx5_log_page_size_to_bitmap(                 \
+                                      __mlx5_bit_sz(typ, log_pgsz_fld),       \
+                                      pgsz_shift),                            \
+                              iova)
+
 enum {
        MLX5_IB_MMAP_OFFSET_START = 9,
        MLX5_IB_MMAP_OFFSET_END = 255,
index 3fa3809c26605764781b907c292f7c4c3ea81b96..b091d84ba4359aa7c574ac5eeb9fedb236a6af17 100644 (file)
@@ -964,11 +964,13 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
        struct mlx5_cache_ent *ent;
        struct mlx5_ib_mr *mr;
-       int page_shift;
+       unsigned int page_size;
 
-       mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift);
-       ent = mr_cache_ent_from_order(dev, order_base_2(ib_umem_num_dma_blocks(
-                                                  umem, 1UL << page_shift)));
+       page_size = mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+       if (WARN_ON(!page_size))
+               return ERR_PTR(-EINVAL);
+       ent = mr_cache_ent_from_order(
+               dev, order_base_2(ib_umem_num_dma_blocks(umem, page_size)));
        if (!ent)
                return ERR_PTR(-E2BIG);
 
@@ -990,7 +992,7 @@ static struct mlx5_ib_mr *alloc_mr_from_cache(struct ib_pd *pd,
        mr->mmkey.iova = iova;
        mr->mmkey.size = umem->length;
        mr->mmkey.pd = to_mpd(pd)->pdn;
-       mr->page_shift = page_shift;
+       mr->page_shift = order_base_2(page_size);
 
        return mr;
 }
@@ -1280,8 +1282,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
                                     int access_flags, bool populate)
 {
        struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       unsigned int page_size;
        struct mlx5_ib_mr *mr;
-       int page_shift;
        __be64 *pas;
        void *mkc;
        int inlen;
@@ -1289,22 +1291,23 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
        int err;
        bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
 
+       page_size =
+               mlx5_umem_find_best_pgsz(umem, mkc, log_page_size, 0, iova);
+       if (WARN_ON(!page_size))
+               return ERR_PTR(-EINVAL);
+
        mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
        if (!mr)
                return ERR_PTR(-ENOMEM);
 
-       mlx5_ib_cont_pages(umem, iova, MLX5_MKEY_PAGE_SHIFT_MASK, &page_shift);
-
-       mr->page_shift = page_shift;
        mr->ibmr.pd = pd;
        mr->access_flags = access_flags;
+       mr->page_shift = order_base_2(page_size);
 
        inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        if (populate)
-               inlen +=
-                       sizeof(*pas) *
-                       roundup(ib_umem_num_dma_blocks(umem, 1UL << page_shift),
-                               2);
+               inlen += sizeof(*pas) *
+                        roundup(ib_umem_num_dma_blocks(umem, page_size), 2);
        in = kvzalloc(inlen, GFP_KERNEL);
        if (!in) {
                err = -ENOMEM;
@@ -1316,7 +1319,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
                        err = -EINVAL;
                        goto err_2;
                }
-               mlx5_ib_populate_pas(umem, 1ULL << page_shift, pas,
+               mlx5_ib_populate_pas(umem, 1UL << mr->page_shift, pas,
                                     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
        }
 
@@ -1334,11 +1337,11 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
        MLX5_SET64(mkc, mkc, len, umem->length);
        MLX5_SET(mkc, mkc, bsf_octword_size, 0);
        MLX5_SET(mkc, mkc, translations_octword_size,
-                get_octo_len(iova, umem->length, page_shift));
-       MLX5_SET(mkc, mkc, log_page_size, page_shift);
+                get_octo_len(iova, umem->length, mr->page_shift));
+       MLX5_SET(mkc, mkc, log_page_size, mr->page_shift);
        if (populate) {
                MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
-                        get_octo_len(iova, umem->length, page_shift));
+                        get_octo_len(iova, umem->length, mr->page_shift));
        }
 
        err = mlx5_ib_create_mkey(dev, &mr->mmkey, in, inlen);