RDMA: Explicitly pass in the dma_device to ib_register_device
authorJason Gunthorpe <jgg@nvidia.com>
Thu, 8 Oct 2020 08:27:52 +0000 (11:27 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Fri, 16 Oct 2020 16:53:46 +0000 (13:53 -0300)
The code in setup_dma_device has become rather convoluted, move all of
this to the drivers. Drives now pass in a DMA capable struct device which
will be used to setup DMA, or drivers must fully configure the ibdev for
DMA and pass in NULL.

Other than setting the masks in rvt all drivers were doing this already
anyhow.

mthca, mlx4 and mlx5 were already setting up maximum DMA segment size for
DMA based on their hardweare limits in:
__mthca_init_one()
  dma_set_max_seg_size (1G)

__mlx4_init_one()
  dma_set_max_seg_size (1G)

mlx5_pci_init()
  set_dma_caps()
    dma_set_max_seg_size (2G)

Other non software drivers (except usnic) were extended to UINT_MAX [1, 2]
instead of 2G as was before.

[1] https://lore.kernel.org/linux-rdma/20200924114940.GE9475@nvidia.com/
[2] https://lore.kernel.org/linux-rdma/20200924114940.GE9475@nvidia.com/

Link: https://lore.kernel.org/r/20201008082752.275846-1-leon@kernel.org
Link: https://lore.kernel.org/r/6b2ed339933d066622d5715903870676d8cc523a.1602590106.git.mchehab+huawei@kernel.org
Suggested-by: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Parav Pandit <parav@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
17 files changed:
drivers/infiniband/core/device.c
drivers/infiniband/hw/bnxt_re/main.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/efa/efa_main.c
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/qedr/main.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/infiniband/sw/siw/siw_main.c
include/rdma/ib_verbs.h

index dab1f9d658802964a07ef38f1ed46dab2ba6cf43..a3b1fc84cdcab9978ded22068cff5d01531f51e6 100644 (file)
@@ -1177,58 +1177,23 @@ out:
        return ret;
 }
 
-static void setup_dma_device(struct ib_device *device)
+static void setup_dma_device(struct ib_device *device,
+                            struct device *dma_device)
 {
-       struct device *parent = device->dev.parent;
-
-       WARN_ON_ONCE(device->dma_device);
-
-#ifdef CONFIG_DMA_OPS
-       if (device->dev.dma_ops) {
-               /*
-                * The caller provided custom DMA operations. Copy the
-                * DMA-related fields that are used by e.g. dma_alloc_coherent()
-                * into device->dev.
-                */
-               device->dma_device = &device->dev;
-               if (!device->dev.dma_mask) {
-                       if (parent)
-                               device->dev.dma_mask = parent->dma_mask;
-                       else
-                               WARN_ON_ONCE(true);
-               }
-               if (!device->dev.coherent_dma_mask) {
-                       if (parent)
-                               device->dev.coherent_dma_mask =
-                                       parent->coherent_dma_mask;
-                       else
-                               WARN_ON_ONCE(true);
-               }
-       } else
-#endif /* CONFIG_DMA_OPS */
-       {
-               /*
-                * The caller did not provide custom DMA operations. Use the
-                * DMA mapping operations of the parent device.
-                */
-               WARN_ON_ONCE(!parent);
-               device->dma_device = parent;
-       }
-
-       if (!device->dev.dma_parms) {
-               if (parent) {
-                       /*
-                        * The caller did not provide DMA parameters, so
-                        * 'parent' probably represents a PCI device. The PCI
-                        * core sets the maximum segment size to 64
-                        * KB. Increase this parameter to 2 GB.
-                        */
-                       device->dev.dma_parms = parent->dma_parms;
-                       dma_set_max_seg_size(device->dma_device, SZ_2G);
-               } else {
-                       WARN_ON_ONCE(true);
-               }
+       /*
+        * If the caller does not provide a DMA capable device then the IB
+        * device will be used. In this case the caller should fully setup the
+        * ibdev for DMA. This usually means using dma_virt_ops.
+        */
+#ifdef CONFIG_DMA_VIRT_OPS
+       if (!dma_device) {
+               device->dev.dma_ops = &dma_virt_ops;
+               dma_device = &device->dev;
        }
+#endif
+       WARN_ON(!dma_device);
+       device->dma_device = dma_device;
+       WARN_ON(!device->dma_device->dma_parms);
 }
 
 /*
@@ -1241,7 +1206,6 @@ static int setup_device(struct ib_device *device)
        struct ib_udata uhw = {.outlen = 0, .inlen = 0};
        int ret;
 
-       setup_dma_device(device);
        ib_device_check_mandatory(device);
 
        ret = setup_port_data(device);
@@ -1354,7 +1318,10 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
  * ib_register_device - Register an IB device with IB core
  * @device: Device to register
  * @name: unique string device name. This may include a '%' which will
- * cause a unique index to be added to the passed device name.
+ *       cause a unique index to be added to the passed device name.
+ * @dma_device: pointer to a DMA-capable device. If %NULL, then the IB
+ *             device will be used. In this case the caller should fully
+ *             setup the ibdev for DMA. This usually means using dma_virt_ops.
  *
  * Low-level drivers use ib_register_device() to register their
  * devices with the IB core.  All registered clients will receive a
@@ -1365,7 +1332,8 @@ static void prevent_dealloc_device(struct ib_device *ib_dev)
  * asynchronously then the device pointer may become freed as soon as this
  * function returns.
  */
-int ib_register_device(struct ib_device *device, const char *name)
+int ib_register_device(struct ib_device *device, const char *name,
+                      struct device *dma_device)
 {
        int ret;
 
@@ -1373,6 +1341,7 @@ int ib_register_device(struct ib_device *device, const char *name)
        if (ret)
                return ret;
 
+       setup_dma_device(device, dma_device);
        ret = setup_device(device);
        if (ret)
                return ret;
index 53aee5a42ab85074d356d9f4aa7ee86a421a8687..04621ba8fa76112c7fd2bf00c22ba05f0b602d38 100644 (file)
@@ -736,7 +736,8 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev)
        if (ret)
                return ret;
 
-       return ib_register_device(ibdev, "bnxt_re%d");
+       dma_set_max_seg_size(&rdev->en_dev->pdev->dev, UINT_MAX);
+       return ib_register_device(ibdev, "bnxt_re%d", &rdev->en_dev->pdev->dev);
 }
 
 static void bnxt_re_dev_remove(struct bnxt_re_dev *rdev)
index 4b76f2f3f4e483e237aac474231e567d74119367..8138c57a1e43bc7acb01e57f042f1bff74eb2843 100644 (file)
@@ -570,7 +570,9 @@ void c4iw_register_device(struct work_struct *work)
        ret = set_netdevs(&dev->ibdev, &dev->rdev);
        if (ret)
                goto err_dealloc_ctx;
-       ret = ib_register_device(&dev->ibdev, "cxgb4_%d");
+       dma_set_max_seg_size(&dev->rdev.lldi.pdev->dev, UINT_MAX);
+       ret = ib_register_device(&dev->ibdev, "cxgb4_%d",
+                                &dev->rdev.lldi.pdev->dev);
        if (ret)
                goto err_dealloc_ctx;
        return;
index 92d7011463203cef205f7f4b7ca2e1bd1ddc6b96..6faed3a81e0878e3413f011e2ed79879586d1bd9 100644 (file)
@@ -331,7 +331,7 @@ static int efa_ib_device_add(struct efa_dev *dev)
 
        ib_set_device_ops(&dev->ibdev, &efa_dev_ops);
 
-       err = ib_register_device(&dev->ibdev, "efa_%d");
+       err = ib_register_device(&dev->ibdev, "efa_%d", &pdev->dev);
        if (err)
                goto err_release_doorbell_bar;
 
@@ -418,7 +418,7 @@ static int efa_device_init(struct efa_com_dev *edev, struct pci_dev *pdev)
                        err);
                return err;
        }
-
+       dma_set_max_seg_size(&pdev->dev, UINT_MAX);
        return 0;
 }
 
index 467c829000190b68964ac70635c9c372f82903fb..afeffafc59f9067a72b63e454994009a9f60b6a9 100644 (file)
@@ -549,7 +549,8 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev)
                if (ret)
                        return ret;
        }
-       ret = ib_register_device(ib_dev, "hns_%d");
+       dma_set_max_seg_size(dev, UINT_MAX);
+       ret = ib_register_device(ib_dev, "hns_%d", dev);
        if (ret) {
                dev_err(dev, "ib_register_device failed!\n");
                return ret;
index 747b4de6faca007c201a52d5bc3806a07944bacd..581ecbadf5861fb9bcf643ba5d9a5c5d921fd43f 100644 (file)
@@ -2761,7 +2761,8 @@ int i40iw_register_rdma_device(struct i40iw_device *iwdev)
        if (ret)
                goto error;
 
-       ret = ib_register_device(&iwibdev->ibdev, "i40iw%d");
+       dma_set_max_seg_size(&iwdev->hw.pcidev->dev, UINT_MAX);
+       ret = ib_register_device(&iwibdev->ibdev, "i40iw%d", &iwdev->hw.pcidev->dev);
        if (ret)
                goto error;
 
index 753c7040249887a048def8b290ec77ba6cd431bb..cd0fba6b096428b9c85f3d6763e58565860c7e0e 100644 (file)
@@ -2841,7 +2841,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
                goto err_steer_free_bitmap;
 
        rdma_set_device_sysfs_group(&ibdev->ib_dev, &mlx4_attr_group);
-       if (ib_register_device(&ibdev->ib_dev, "mlx4_%d"))
+       if (ib_register_device(&ibdev->ib_dev, "mlx4_%d",
+                              &dev->persist->pdev->dev))
                goto err_diag_counters;
 
        if (mlx4_ib_mad_init(ibdev))
index 7082172b5b61a12248cd286458ee7a9223630554..89e04ca62ae0fa4965cdaba51dd7fb68a76a3012 100644 (file)
@@ -4380,7 +4380,7 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev)
                name = "mlx5_%d";
        else
                name = "mlx5_bond_%d";
-       return ib_register_device(&dev->ib_dev, name);
+       return ib_register_device(&dev->ib_dev, name, &dev->mdev->pdev->dev);
 }
 
 static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev)
index 31b558ff8218523c943d67d7929139efa6da6b79..c4d9cdc4ee97e8b6ad1665d300de91e4611a807c 100644 (file)
@@ -1206,7 +1206,7 @@ int mthca_register_device(struct mthca_dev *dev)
        mutex_init(&dev->cap_mask_mutex);
 
        rdma_set_device_sysfs_group(&dev->ib_dev, &mthca_attr_group);
-       ret = ib_register_device(&dev->ib_dev, "mthca%d");
+       ret = ib_register_device(&dev->ib_dev, "mthca%d", &dev->pdev->dev);
        if (ret)
                return ret;
 
index d8c47d24d6d66bee21900962f2181a20403c5b7f..9b96661a71435b84c982018adcfd1cf94b294220 100644 (file)
@@ -255,7 +255,9 @@ static int ocrdma_register_device(struct ocrdma_dev *dev)
        if (ret)
                return ret;
 
-       return ib_register_device(&dev->ibdev, "ocrdma%d");
+       dma_set_max_seg_size(&dev->nic_info.pdev->dev, UINT_MAX);
+       return ib_register_device(&dev->ibdev, "ocrdma%d",
+                                 &dev->nic_info.pdev->dev);
 }
 
 static int ocrdma_alloc_resources(struct ocrdma_dev *dev)
index 7c0aac3e635bcb148fe2c8a6eed524448ae190f0..967641662b24a75c37dd73e5a4dc2cdd891a91e3 100644 (file)
@@ -293,7 +293,8 @@ static int qedr_register_device(struct qedr_dev *dev)
        if (rc)
                return rc;
 
-       return ib_register_device(&dev->ibdev, "qedr%d");
+       dma_set_max_seg_size(&dev->pdev->dev, UINT_MAX);
+       return ib_register_device(&dev->ibdev, "qedr%d", &dev->pdev->dev);
 }
 
 /* This function allocates fast-path status block memory */
index 462ed71abf531899256abf753c8cbdf65a36f028..aa2e65fc5cd652996049ead63c3e123e5a3848a8 100644 (file)
@@ -425,7 +425,8 @@ static void *usnic_ib_device_add(struct pci_dev *dev)
        if (ret)
                goto err_fwd_dealloc;
 
-       if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d"))
+       dma_set_max_seg_size(&dev->dev, SZ_2G);
+       if (ib_register_device(&us_ibdev->ib_dev, "usnic_%d", &dev->dev))
                goto err_fwd_dealloc;
 
        usnic_fwd_set_mtu(us_ibdev->ufdev, us_ibdev->netdev->mtu);
index 780fd2dfc07eb03200fede1ad914ab3900af2c87..fa2a3fa0c3e4e116125c6a122bd6126103ecf7a4 100644 (file)
@@ -270,7 +270,7 @@ static int pvrdma_register_device(struct pvrdma_dev *dev)
        spin_lock_init(&dev->srq_tbl_lock);
        rdma_set_device_sysfs_group(&dev->ib_dev, &pvrdma_attr_group);
 
-       ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d");
+       ret = ib_register_device(&dev->ib_dev, "vmw_pvrdma%d", &dev->pdev->dev);
        if (ret)
                goto err_srq_free;
 
@@ -854,7 +854,7 @@ static int pvrdma_pci_probe(struct pci_dev *pdev,
                        goto err_free_resource;
                }
        }
-
+       dma_set_max_seg_size(&pdev->dev, UINT_MAX);
        pci_set_master(pdev);
 
        /* Map register space */
index 2d534c450f3c861781df8a19a8ab512fe8150b62..52218684ad4ab0f77ceb0d1bc4a0c2e81a550c8b 100644 (file)
@@ -579,7 +579,9 @@ int rvt_register_device(struct rvt_dev_info *rdi)
        spin_lock_init(&rdi->n_cqs_lock);
 
        /* DMA Operations */
-       rdi->ibdev.dev.dma_ops = rdi->ibdev.dev.dma_ops ? : &dma_virt_ops;
+       rdi->ibdev.dev.dma_parms = rdi->ibdev.dev.parent->dma_parms;
+       dma_set_coherent_mask(&rdi->ibdev.dev,
+                             rdi->ibdev.dev.parent->coherent_dma_mask);
 
        /* Protection Domain */
        spin_lock_init(&rdi->n_pds_lock);
@@ -627,7 +629,7 @@ int rvt_register_device(struct rvt_dev_info *rdi)
                rdi->ibdev.num_comp_vectors = 1;
 
        /* We are now good to announce we exist */
-       ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev));
+       ret = ib_register_device(&rdi->ibdev, dev_name(&rdi->ibdev.dev), NULL);
        if (ret) {
                rvt_pr_err(rdi, "Failed to register driver with ib core.\n");
                goto bail_wss;
index ba8faa34969b99ea9ad3ebe2e0e3f3c930c01575..1fc022362fbe4973298623e164a4394f03a93378 100644 (file)
@@ -1128,12 +1128,9 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
        dev->local_dma_lkey = 0;
        addrconf_addr_eui48((unsigned char *)&dev->node_guid,
                            rxe->ndev->dev_addr);
-       dev->dev.dma_ops = &dma_virt_ops;
        dev->dev.dma_parms = &rxe->dma_parms;
-       rxe->dma_parms = (struct device_dma_parameters)
-               { .max_segment_size = SZ_2G };
-       dma_coerce_mask_and_coherent(&dev->dev,
-                                    dma_get_required_mask(&dev->dev));
+       dma_set_max_seg_size(&dev->dev, UINT_MAX);
+       dma_set_coherent_mask(&dev->dev, dma_get_required_mask(&dev->dev));
 
        dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT)
            | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL)
@@ -1182,7 +1179,7 @@ int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name)
        rxe->tfm = tfm;
 
        rdma_set_device_sysfs_group(dev, &rxe_attr_group);
-       err = ib_register_device(dev, ibdev_name);
+       err = ib_register_device(dev, ibdev_name, NULL);
        if (err)
                pr_warn("%s failed with error %d\n", __func__, err);
 
index d862bec843766fdb2282799e8fd372ceaa3b2b3c..ca8bc72968672f7cbcc3563db5db47fcaf3dc28f 100644 (file)
@@ -69,7 +69,7 @@ static int siw_device_register(struct siw_device *sdev, const char *name)
 
        sdev->vendor_part_id = dev_id++;
 
-       rv = ib_register_device(base_dev, name);
+       rv = ib_register_device(base_dev, name, NULL);
        if (rv) {
                pr_warn("siw: device registration error %d\n", rv);
                return rv;
@@ -382,10 +382,10 @@ static struct siw_device *siw_device_create(struct net_device *netdev)
         */
        base_dev->phys_port_cnt = 1;
        base_dev->dev.parent = parent;
-       base_dev->dev.dma_ops = &dma_virt_ops;
        base_dev->dev.dma_parms = &sdev->dma_parms;
-       sdev->dma_parms = (struct device_dma_parameters)
-               { .max_segment_size = SZ_2G };
+       dma_set_max_seg_size(&base_dev->dev, UINT_MAX);
+       dma_set_coherent_mask(&base_dev->dev,
+                             dma_get_required_mask(&base_dev->dev));
        base_dev->num_comp_vectors = num_possible_cpus();
 
        xa_init_flags(&sdev->qp_xa, XA_FLAGS_ALLOC1);
index ce935d70fdc87940cd65ba1eca84be3ef170bbf9..9bf6c319a670e2472dc0fcf3fbc55324b51a5cfb 100644 (file)
@@ -2782,7 +2782,8 @@ void ib_dealloc_device(struct ib_device *device);
 
 void ib_get_device_fw_str(struct ib_device *device, char *str);
 
-int ib_register_device(struct ib_device *device, const char *name);
+int ib_register_device(struct ib_device *device, const char *name,
+                      struct device *dma_device);
 void ib_unregister_device(struct ib_device *device);
 void ib_unregister_driver(enum rdma_driver_id driver_id);
 void ib_unregister_device_and_put(struct ib_device *device);