RDMA/core: remove use of dma_virt_ops
authorChristoph Hellwig <hch@lst.de>
Fri, 6 Nov 2020 18:19:38 +0000 (19:19 +0100)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 17 Nov 2020 19:22:07 +0000 (15:22 -0400)
Use the ib_dma_* helpers to skip the DMA translation instead.  This
removes the last user if dma_virt_ops and keeps the weird layering
violation inside the RDMA core instead of burderning the DMA mapping
subsystems with it.  This also means the software RDMA drivers now don't
have to mess with DMA parameters that are not relevant to them at all, and
that in the future we can use PCI P2P transfers even for software RDMA, as
there is no first fake layer of DMA mapping that the P2P DMA support.

Link: https://lore.kernel.org/r/20201106181941.1878556-8-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Mike Marciniszyn <mike.marciniszyn@cornelisnetworks.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
13 files changed:
drivers/infiniband/core/device.c
drivers/infiniband/core/rw.c
drivers/infiniband/sw/rdmavt/Kconfig
drivers/infiniband/sw/rdmavt/mr.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/Kconfig
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/rxe/rxe_verbs.h
drivers/infiniband/sw/siw/Kconfig
drivers/infiniband/sw/siw/siw.h
drivers/infiniband/sw/siw/siw_main.c
drivers/nvme/target/rdma.c
include/rdma/ib_verbs.h

index ce26564d4edfed67323af82281cf05666ca2369e..3ab1edea6acbe9b2172d20b85da52dcc66279e8f 100644 (file)
@@ -1209,25 +1209,6 @@ out:
        return ret;
 }
 
-static void setup_dma_device(struct ib_device *device,
-                            struct device *dma_device)
-{
-       /*
-        * If the caller does not provide a DMA capable device then the IB
-        * device will be used. In this case the caller should fully setup the
-        * ibdev for DMA. This usually means using dma_virt_ops.
-        */
-#ifdef CONFIG_DMA_VIRT_OPS
-       if (!dma_device) {
-               device->dev.dma_ops = &dma_virt_ops;
-               dma_device = &device->dev;
-       }
-#endif
-       WARN_ON(!dma_device);
-       device->dma_device = dma_device;
-       WARN_ON(!device->dma_device->dma_parms);
-}
-
 /*
  * setup_device() allocates memory and sets up data that requires calling the
  * device ops, this is the only reason these actions are not done during
@@ -1373,7 +1354,14 @@ int ib_register_device(struct ib_device *device, const char *name,
        if (ret)
                return ret;
 
-       setup_dma_device(device, dma_device);
+       /*
+        * If the caller does not provide a DMA capable device then the IB core
+        * will set up ib_sge and scatterlist structures that stash the kernel
+        * virtual address into the address field.
+        */
+       WARN_ON(dma_device && !dma_device->dma_parms);
+       device->dma_device = dma_device;
+
        ret = setup_device(device);
        if (ret)
                return ret;
@@ -2708,6 +2696,21 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
 }
 EXPORT_SYMBOL(ib_set_device_ops);
 
+#ifdef CONFIG_INFINIBAND_VIRT_DMA
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents)
+{
+       struct scatterlist *s;
+       int i;
+
+       for_each_sg(sg, s, nents, i) {
+               sg_dma_address(s) = (uintptr_t)sg_virt(s);
+               sg_dma_len(s) = s->length;
+       }
+       return nents;
+}
+EXPORT_SYMBOL(ib_dma_virt_map_sg);
+#endif /* CONFIG_INFINIBAND_VIRT_DMA */
+
 static const struct rdma_nl_cbs ibnl_ls_cb_table[RDMA_NL_LS_NUM_OPS] = {
        [RDMA_NL_LS_OP_RESOLVE] = {
                .doit = ib_nl_handle_resolve_resp,
index 13f43ab7220b05ac3438869810a4ddf99e723294..a96030b784eb21cf5f3885002d162fcc1ed70414 100644 (file)
@@ -285,8 +285,11 @@ static void rdma_rw_unmap_sg(struct ib_device *dev, struct scatterlist *sg,
 static int rdma_rw_map_sg(struct ib_device *dev, struct scatterlist *sg,
                          u32 sg_cnt, enum dma_data_direction dir)
 {
-       if (is_pci_p2pdma_page(sg_page(sg)))
+       if (is_pci_p2pdma_page(sg_page(sg))) {
+               if (WARN_ON_ONCE(ib_uses_virt_dma(dev)))
+                       return 0;
                return pci_p2pdma_map_sg(dev->dma_device, sg, sg_cnt, dir);
+       }
        return ib_dma_map_sg(dev, sg, sg_cnt, dir);
 }
 
index c8e268082952b002d7671512d29cd1486406c152..0df48b3a6b56c5d6b0629f4806a1feb0bde8a2ea 100644 (file)
@@ -4,6 +4,5 @@ config INFINIBAND_RDMAVT
        depends on INFINIBAND_VIRT_DMA
        depends on X86_64
        depends on PCI
-       select DMA_VIRT_OPS
        help
        This is a common software verbs provider for RDMA networks.
index 8490fdb9c91e50a36aa6247c1b6390490b14e84f..90fc234f489acd5efc05ea9aa355179a8318c0eb 100644 (file)
@@ -324,8 +324,6 @@ static void __rvt_free_mr(struct rvt_mr *mr)
  * @acc: access flags
  *
  * Return: the memory region on success, otherwise returns an errno.
- * Note that all DMA addresses should be created via the functions in
- * struct dma_virt_ops.
  */
 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc)
 {
@@ -766,7 +764,7 @@ int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd,
 
        /*
         * We use LKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr() and dma_virt_ops).
+        * (see rvt_get_dma_mr()).
         */
        if (sge->lkey == 0) {
                struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device);
@@ -877,7 +875,7 @@ int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge,
 
        /*
         * We use RKEY == zero for kernel virtual addresses
-        * (see rvt_get_dma_mr() and dma_virt_ops).
+        * (see rvt_get_dma_mr()).
         */
        rcu_read_lock();
        if (rkey == 0) {
index 5bd817490b1f412f88b2fa719e3c6692d71f300c..49cec85a372a98be551d6181b9bb78e793fa8251 100644 (file)
@@ -525,7 +525,6 @@ static noinline int check_support(struct rvt_dev_info *rdi, int verb)
 int rvt_register_device(struct rvt_dev_info *rdi)
 {
        int ret = 0, i;
-       u64 dma_mask;
 
        if (!rdi)
                return -EINVAL;
@@ -580,13 +579,6 @@ int rvt_register_device(struct rvt_dev_info *rdi)
        /* Completion queues */
        spin_lock_init(&rdi->n_cqs_lock);
 
-       /* DMA Operations */
-       rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
-       dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
-       ret = dma_coerce_mask_and_coherent(&rdi->ibdev.dev, dma_mask);
-       if (ret)
-               goto bail_wss;
-
        /* Protection Domain */
        spin_lock_init(&rdi->n_pds_lock);
        rdi->n_pds_allocated = 0;
index 8810bfa680495a136a7fb8561488b1646f5f1a64..4521490667925ffc277a23119d9d875144a07c5f 100644 (file)
@@ -5,7 +5,6 @@ config RDMA_RXE
        depends on INFINIBAND_VIRT_DMA
        select NET_UDP_TUNNEL
        select CRYPTO_CRC32
-       select DMA_VIRT_OPS
        help
        This driver implements the InfiniBand RDMA transport over
        the Linux network stack. It enables a system with a
index bbc89002c4567853f532d08f4833f08285e5a5e1..a2bd91aaa5de956cc1cb777340ff475dc6aff168 100644 (file)
@@ -1128,7 +1128,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
        int err;
        struct ib_device *dev = &rxe->ib_dev;
        struct crypto_shash *tfm;
-       u64 dma_mask;
 
        strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc));
 
@@ -1139,12 +1138,6 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
        dev->local_dma_lkey = 0;
        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
                            rxe->ndev->dev_addr);
-       dev->dev.dma_parms = &rxe->dma_parms;
-       dma_set_max_seg_size(&dev->dev, UINT_MAX);
-       dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
-       err = dma_coerce_mask_and_coherent(&dev->dev, dma_mask);
-       if (err)
-               return err;
 
        dev->uverbs_cmd_mask |= BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) |
                                BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ);
index 57967fc39c045cd9d50716c2d3a30db5f4c43ae3..79e0a5a878da35de5e5e4c0830aadd8a7ab1ce6d 100644 (file)
@@ -351,7 +351,6 @@ struct rxe_port {
 struct rxe_dev {
        struct ib_device        ib_dev;
        struct ib_device_attr   attr;
-       struct device_dma_parameters dma_parms;
        int                     max_ucontext;
        int                     max_inline_data;
        struct mutex    usdev_lock;
index 3450ba5081df51a2c717746e6419ba750cd8d8d5..1b5105cbabaeedde957394f8548d1db549ee0a02 100644 (file)
@@ -2,7 +2,6 @@ config RDMA_SIW
        tristate "Software RDMA over TCP/IP (iWARP) driver"
        depends on INET && INFINIBAND && LIBCRC32C
        depends on INFINIBAND_VIRT_DMA
-       select DMA_VIRT_OPS
        help
        This driver implements the iWARP RDMA transport over
        the Linux TCP/IP network stack. It enables a system with a
index e9753831ac3f33b885cd565f351cdde5b6cf6b71..adda7899621962f01094b32928afe54650a46e5d 100644 (file)
@@ -69,7 +69,6 @@ struct siw_pd {
 
 struct siw_device {
        struct ib_device base_dev;
-       struct device_dma_parameters dma_parms;
        struct net_device *netdev;
        struct siw_dev_cap attrs;
 
index ac675622767764d00d06cdfa1b5dd04158f054df..d9de062852c4f92b902246b6574580449b0a7ed9 100644 (file)
@@ -306,7 +306,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
        struct siw_device *sdev = NULL;
        struct ib_device *base_dev;
        struct device *parent = netdev->dev.parent;
-       u64 dma_mask;
        int rv;
 
        if (!parent) {
@@ -361,12 +360,6 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
         */
        base_dev->phys_port_cnt = 1;
        base_dev->dev.parent = parent;
-       base_dev->dev.dma_parms = &sdev->dma_parms;
-       dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
-       dma_mask = IS_ENABLED(CONFIG_64BIT) ? DMA_BIT_MASK(64) : DMA_BIT_MASK(32);
-       if (dma_coerce_mask_and_coherent(&base_dev->dev, dma_mask))
-               goto error;
-
        base_dev->num_comp_vectors = num_possible_cpus();
 
        xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
index ae6620489457d66c6f9074e616905d49bcea920d..5c1e7cb7fe0deec61fcc66f1575c8526b02fcd6d 100644 (file)
@@ -414,7 +414,8 @@ static int nvmet_rdma_alloc_rsp(struct nvmet_rdma_device *ndev,
        if (ib_dma_mapping_error(ndev->device, r->send_sge.addr))
                goto out_free_rsp;
 
-       r->req.p2p_client = &ndev->device->dev;
+       if (!ib_uses_virt_dma(ndev->device))
+               r->req.p2p_client = &ndev->device->dev;
        r->send_sge.length = sizeof(*r->req.cqe);
        r->send_sge.lkey = ndev->pd->local_dma_lkey;
 
index 3feb42ef82dc2f0df442696d44ffeffab8efc6a7..174c1bffa00c8b39d1137edf4e8c6fac9116119e 100644 (file)
@@ -3906,6 +3906,16 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
                -ENOSYS;
 }
 
+/*
+ * Drivers that don't need a DMA mapping at the RDMA layer, set dma_device to
+ * NULL. This causes the ib_dma* helpers to just stash the kernel virtual
+ * address into the dma address.
+ */
+static inline bool ib_uses_virt_dma(struct ib_device *dev)
+{
+       return IS_ENABLED(CONFIG_INFINIBAND_VIRT_DMA) && !dev->dma_device;
+}
+
 /**
  * ib_dma_mapping_error - check a DMA addr for error
  * @dev: The device for which the dma_addr was created
@@ -3913,6 +3923,8 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt)
  */
 static inline int ib_dma_mapping_error(struct ib_device *dev, u64 dma_addr)
 {
+       if (ib_uses_virt_dma(dev))
+               return 0;
        return dma_mapping_error(dev->dma_device, dma_addr);
 }
 
@@ -3927,6 +3939,8 @@ static inline u64 ib_dma_map_single(struct ib_device *dev,
                                    void *cpu_addr, size_t size,
                                    enum dma_data_direction direction)
 {
+       if (ib_uses_virt_dma(dev))
+               return (uintptr_t)cpu_addr;
        return dma_map_single(dev->dma_device, cpu_addr, size, direction);
 }
 
@@ -3941,7 +3955,8 @@ static inline void ib_dma_unmap_single(struct ib_device *dev,
                                       u64 addr, size_t size,
                                       enum dma_data_direction direction)
 {
-       dma_unmap_single(dev->dma_device, addr, size, direction);
+       if (!ib_uses_virt_dma(dev))
+               dma_unmap_single(dev->dma_device, addr, size, direction);
 }
 
 /**
@@ -3958,6 +3973,8 @@ static inline u64 ib_dma_map_page(struct ib_device *dev,
                                  size_t size,
                                         enum dma_data_direction direction)
 {
+       if (ib_uses_virt_dma(dev))
+               return (uintptr_t)(page_address(page) + offset);
        return dma_map_page(dev->dma_device, page, offset, size, direction);
 }
 
@@ -3972,7 +3989,30 @@ static inline void ib_dma_unmap_page(struct ib_device *dev,
                                     u64 addr, size_t size,
                                     enum dma_data_direction direction)
 {
-       dma_unmap_page(dev->dma_device, addr, size, direction);
+       if (!ib_uses_virt_dma(dev))
+               dma_unmap_page(dev->dma_device, addr, size, direction);
+}
+
+int ib_dma_virt_map_sg(struct ib_device *dev, struct scatterlist *sg, int nents);
+static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
+                                     struct scatterlist *sg, int nents,
+                                     enum dma_data_direction direction,
+                                     unsigned long dma_attrs)
+{
+       if (ib_uses_virt_dma(dev))
+               return ib_dma_virt_map_sg(dev, sg, nents);
+       return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
+                               dma_attrs);
+}
+
+static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
+                                        struct scatterlist *sg, int nents,
+                                        enum dma_data_direction direction,
+                                        unsigned long dma_attrs)
+{
+       if (!ib_uses_virt_dma(dev))
+               dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction,
+                                  dma_attrs);
 }
 
 /**
@@ -3986,7 +4026,7 @@ static inline int ib_dma_map_sg(struct ib_device *dev,
                                struct scatterlist *sg, int nents,
                                enum dma_data_direction direction)
 {
-       return dma_map_sg(dev->dma_device, sg, nents, direction);
+       return ib_dma_map_sg_attrs(dev, sg, nents, direction, 0);
 }
 
 /**
@@ -4000,24 +4040,7 @@ static inline void ib_dma_unmap_sg(struct ib_device *dev,
                                   struct scatterlist *sg, int nents,
                                   enum dma_data_direction direction)
 {
-       dma_unmap_sg(dev->dma_device, sg, nents, direction);
-}
-
-static inline int ib_dma_map_sg_attrs(struct ib_device *dev,
-                                     struct scatterlist *sg, int nents,
-                                     enum dma_data_direction direction,
-                                     unsigned long dma_attrs)
-{
-       return dma_map_sg_attrs(dev->dma_device, sg, nents, direction,
-                               dma_attrs);
-}
-
-static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
-                                        struct scatterlist *sg, int nents,
-                                        enum dma_data_direction direction,
-                                        unsigned long dma_attrs)
-{
-       dma_unmap_sg_attrs(dev->dma_device, sg, nents, direction, dma_attrs);
+       ib_dma_unmap_sg_attrs(dev, sg, nents, direction, 0);
 }
 
 /**
@@ -4028,6 +4051,8 @@ static inline void ib_dma_unmap_sg_attrs(struct ib_device *dev,
  */
 static inline unsigned int ib_dma_max_seg_size(struct ib_device *dev)
 {
+       if (ib_uses_virt_dma(dev))
+               return UINT_MAX;
        return dma_get_max_seg_size(dev->dma_device);
 }
 
@@ -4043,7 +4068,8 @@ static inline void ib_dma_sync_single_for_cpu(struct ib_device *dev,
                                              size_t size,
                                              enum dma_data_direction dir)
 {
-       dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
+       if (!ib_uses_virt_dma(dev))
+               dma_sync_single_for_cpu(dev->dma_device, addr, size, dir);
 }
 
 /**
@@ -4058,7 +4084,8 @@ static inline void ib_dma_sync_single_for_device(struct ib_device *dev,
                                                 size_t size,
                                                 enum dma_data_direction dir)
 {
-       dma_sync_single_for_device(dev->dma_device, addr, size, dir);
+       if (!ib_uses_virt_dma(dev))
+               dma_sync_single_for_device(dev->dma_device, addr, size, dir);
 }
 
 /* ib_reg_user_mr - register a memory region for virtual addresses from kernel