RDMA/rxe: Use kzmalloc/kfree for mca
authorBob Pearson <rpearsonhpe@gmail.com>
Tue, 8 Feb 2022 21:16:36 +0000 (15:16 -0600)
committerJason Gunthorpe <jgg@nvidia.com>
Wed, 16 Feb 2022 15:59:11 +0000 (11:59 -0400)
Remove rxe_mca (was rxe_mc_elem) from rxe pools and use kzmalloc and kfree
to allocate and free in rxe_mcast.c. Call kzalloc outside of spinlocks to
avoid having to use GFP_ATOMIC.

Link: https://lore.kernel.org/r/20220208211644.123457-3-rpearsonhpe@gmail.com
Signed-off-by: Bob Pearson <rpearsonhpe@gmail.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/sw/rxe/rxe.c
drivers/infiniband/sw/rxe/rxe_mcast.c
drivers/infiniband/sw/rxe/rxe_pool.c
drivers/infiniband/sw/rxe/rxe_pool.h
drivers/infiniband/sw/rxe/rxe_verbs.h

index e74c4216b314aaeced9fb12ae5d493bfb1f9bb47..7386a51b953de41b69c9ab38041ddc9967f89d33 100644 (file)
@@ -29,7 +29,6 @@ void rxe_dealloc(struct ib_device *ib_dev)
        rxe_pool_cleanup(&rxe->mr_pool);
        rxe_pool_cleanup(&rxe->mw_pool);
        rxe_pool_cleanup(&rxe->mc_grp_pool);
-       rxe_pool_cleanup(&rxe->mc_elem_pool);
 
        if (rxe->tfm)
                crypto_free_shash(rxe->tfm);
@@ -163,15 +162,8 @@ static int rxe_init_pools(struct rxe_dev *rxe)
        if (err)
                goto err9;
 
-       err = rxe_pool_init(rxe, &rxe->mc_elem_pool, RXE_TYPE_MC_ELEM,
-                           rxe->attr.max_total_mcast_qp_attach);
-       if (err)
-               goto err10;
-
        return 0;
 
-err10:
-       rxe_pool_cleanup(&rxe->mc_grp_pool);
 err9:
        rxe_pool_cleanup(&rxe->mw_pool);
 err8:
index fae04497cf2b9cc069cffa07e0763f42cb928b58..2c0a2d3cc93e536ebc237eb9e802ec41117dc4b2 100644 (file)
@@ -26,96 +26,104 @@ static int rxe_mcast_delete(struct rxe_dev *rxe, union ib_gid *mgid)
 }
 
 /* caller should hold rxe->mcg_lock */
-static struct rxe_mcg *create_grp(struct rxe_dev *rxe,
-                                    struct rxe_pool *pool,
-                                    union ib_gid *mgid)
+static struct rxe_mcg *__rxe_create_grp(struct rxe_dev *rxe,
+                                       struct rxe_pool *pool,
+                                       union ib_gid *mgid)
 {
-       int err;
        struct rxe_mcg *grp;
+       int err;
 
-       grp = rxe_alloc_locked(&rxe->mc_grp_pool);
+       grp = rxe_alloc_locked(pool);
        if (!grp)
                return ERR_PTR(-ENOMEM);
 
-       INIT_LIST_HEAD(&grp->qp_list);
-       grp->rxe = rxe;
-       rxe_add_key_locked(grp, mgid);
-
        err = rxe_mcast_add(rxe, mgid);
        if (unlikely(err)) {
-               rxe_drop_key_locked(grp);
                rxe_drop_ref(grp);
                return ERR_PTR(err);
        }
 
+       INIT_LIST_HEAD(&grp->qp_list);
+       grp->rxe = rxe;
+
+       /* rxe_alloc_locked takes a ref on grp but that will be
+        * dropped when grp goes out of scope. We need to take a ref
+        * on the pointer that will be saved in the red-black tree
+        * by rxe_add_key and used to lookup grp from mgid later.
+        * Adding key makes object visible to outside so this should
+        * be done last after the object is ready.
+        */
+       rxe_add_ref(grp);
+       rxe_add_key_locked(grp, mgid);
+
        return grp;
 }
 
-static int rxe_mcast_get_grp(struct rxe_dev *rxe, union ib_gid *mgid,
-                            struct rxe_mcg **grp_p)
+static struct rxe_mcg *rxe_mcast_get_grp(struct rxe_dev *rxe,
+                                        union ib_gid *mgid)
 {
-       int err;
        struct rxe_mcg *grp;
        struct rxe_pool *pool = &rxe->mc_grp_pool;
        unsigned long flags;
 
        if (rxe->attr.max_mcast_qp_attach == 0)
-               return -EINVAL;
+               return ERR_PTR(-EINVAL);
 
        spin_lock_irqsave(&rxe->mcg_lock, flags);
-
        grp = rxe_pool_get_key_locked(pool, mgid);
-       if (grp)
-               goto done;
-
-       grp = create_grp(rxe, pool, mgid);
-       if (IS_ERR(grp)) {
-               spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-               err = PTR_ERR(grp);
-               return err;
-       }
-
-done:
+       if (!grp)
+               grp = __rxe_create_grp(rxe, pool, mgid);
        spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-       *grp_p = grp;
-       return 0;
+
+       return grp;
 }
 
 static int rxe_mcast_add_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
-                          struct rxe_mcg *grp)
+                                 struct rxe_mcg *grp)
 {
-       int err;
-       struct rxe_mca *elem;
+       struct rxe_mca *mca, *tmp;
        unsigned long flags;
+       int err;
+
+       /* check to see if the qp is already a member of the group */
+       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       list_for_each_entry(mca, &grp->qp_list, qp_list) {
+               if (mca->qp == qp) {
+                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+                       return 0;
+               }
+       }
+       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+
+       /* speculative alloc new mca without using GFP_ATOMIC */
+       mca = kzalloc(sizeof(*mca), GFP_KERNEL);
+       if (!mca)
+               return -ENOMEM;
 
-       /* check to see of the qp is already a member of the group */
        spin_lock_irqsave(&rxe->mcg_lock, flags);
-       list_for_each_entry(elem, &grp->qp_list, qp_list) {
-               if (elem->qp == qp) {
+       /* re-check to see if someone else just attached qp */
+       list_for_each_entry(tmp, &grp->qp_list, qp_list) {
+               if (tmp->qp == qp) {
+                       kfree(mca);
                        err = 0;
                        goto out;
                }
        }
 
+       /* check limits after checking if already attached */
        if (grp->num_qp >= rxe->attr.max_mcast_qp_attach) {
+               kfree(mca);
                err = -ENOMEM;
                goto out;
        }
 
-       elem = rxe_alloc_locked(&rxe->mc_elem_pool);
-       if (!elem) {
-               err = -ENOMEM;
-               goto out;
-       }
+       /* protect pointer to qp in mca */
+       rxe_add_ref(qp);
+       mca->qp = qp;
 
-       /* each qp holds a ref on the grp */
-       rxe_add_ref(grp);
-
-       grp->num_qp++;
-       elem->qp = qp;
        atomic_inc(&qp->mcg_num);
-
-       list_add(&elem->qp_list, &grp->qp_list);
+       grp->num_qp++;
+       list_add(&mca->qp_list, &grp->qp_list);
 
        err = 0;
 out:
@@ -123,46 +131,80 @@ out:
        return err;
 }
 
+/* caller should be holding rxe->mcg_lock */
+static void __rxe_destroy_grp(struct rxe_mcg *grp)
+{
+       /* first remove grp from red-black tree then drop ref */
+       rxe_drop_key_locked(grp);
+       rxe_drop_ref(grp);
+
+       rxe_mcast_delete(grp->rxe, &grp->mgid);
+}
+
+static void rxe_destroy_grp(struct rxe_mcg *grp)
+{
+       struct rxe_dev *rxe = grp->rxe;
+       unsigned long flags;
+
+       spin_lock_irqsave(&rxe->mcg_lock, flags);
+       __rxe_destroy_grp(grp);
+       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+}
+
+void rxe_mc_cleanup(struct rxe_pool_elem *elem)
+{
+       /* nothing left to do for now */
+}
+
 static int rxe_mcast_drop_grp_elem(struct rxe_dev *rxe, struct rxe_qp *qp,
                                   union ib_gid *mgid)
 {
        struct rxe_mcg *grp;
-       struct rxe_mca *elem, *tmp;
+       struct rxe_mca *mca, *tmp;
        unsigned long flags;
-
-       grp = rxe_pool_get_key(&rxe->mc_grp_pool, mgid);
-       if (!grp)
-               goto err1;
+       int err;
 
        spin_lock_irqsave(&rxe->mcg_lock, flags);
+       grp = rxe_pool_get_key_locked(&rxe->mc_grp_pool, mgid);
+       if (!grp) {
+               /* we didn't find the mcast group for mgid */
+               err = -EINVAL;
+               goto out_unlock;
+       }
 
-       list_for_each_entry_safe(elem, tmp, &grp->qp_list, qp_list) {
-               if (elem->qp == qp) {
-                       list_del(&elem->qp_list);
+       list_for_each_entry_safe(mca, tmp, &grp->qp_list, qp_list) {
+               if (mca->qp == qp) {
+                       list_del(&mca->qp_list);
+
+                       /* if the number of qp's attached to the
+                        * mcast group falls to zero go ahead and
+                        * tear it down. This will not free the
+                        * object since we are still holding a ref
+                        * from the get key above.
+                        */
                        grp->num_qp--;
+                       if (grp->num_qp <= 0)
+                               __rxe_destroy_grp(grp);
+
                        atomic_dec(&qp->mcg_num);
 
-                       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-                       rxe_drop_ref(elem);
-                       rxe_drop_ref(grp);      /* ref held by QP */
-                       rxe_drop_ref(grp);      /* ref from get_key */
-                       return 0;
+                       /* drop the ref from get key. This will free the
+                        * object if num_qp is zero.
+                        */
+                       rxe_drop_ref(grp);
+                       kfree(mca);
+                       err = 0;
+                       goto out_unlock;
                }
        }
 
-       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
-       rxe_drop_ref(grp);                      /* ref from get_key */
-err1:
-       return -EINVAL;
-}
-
-void rxe_mc_cleanup(struct rxe_pool_elem *elem)
-{
-       struct rxe_mcg *grp = container_of(elem, typeof(*grp), elem);
-       struct rxe_dev *rxe = grp->rxe;
+       /* we didn't find the qp on the list */
+       rxe_drop_ref(grp);
+       err = -EINVAL;
 
-       rxe_drop_key(grp);
-       rxe_mcast_delete(rxe, &grp->mgid);
+out_unlock:
+       spin_unlock_irqrestore(&rxe->mcg_lock, flags);
+       return err;
 }
 
 int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
@@ -173,12 +215,16 @@ int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid)
        struct rxe_mcg *grp;
 
        /* takes a ref on grp if successful */
-       err = rxe_mcast_get_grp(rxe, mgid, &grp);
-       if (err)
-               return err;
+       grp = rxe_mcast_get_grp(rxe, mgid);
+       if (IS_ERR(grp))
+               return PTR_ERR(grp);
 
        err = rxe_mcast_add_grp_elem(rxe, qp, grp);
 
+       /* if we failed to attach the first qp to grp tear it down */
+       if (grp->num_qp == 0)
+               rxe_destroy_grp(grp);
+
        rxe_drop_ref(grp);
        return err;
 }
index a11dab13c192fa81cfbeaeac77dc66731b1fb1e2..eaf4bfc9b8561dab26a50ae85499979ba56f4013 100644 (file)
@@ -90,11 +90,6 @@ static const struct rxe_type_info {
                .key_offset     = offsetof(struct rxe_mcg, mgid),
                .key_size       = sizeof(union ib_gid),
        },
-       [RXE_TYPE_MC_ELEM] = {
-               .name           = "rxe-mc_elem",
-               .size           = sizeof(struct rxe_mca),
-               .elem_offset    = offsetof(struct rxe_mca, elem),
-       },
 };
 
 static int rxe_pool_init_index(struct rxe_pool *pool, u32 max, u32 min)
index 214279310f4df91acc821550649b5dfebcf7db69..9201bb6b8b07bc451f5c43168a0ff094fe3564e0 100644 (file)
@@ -23,7 +23,6 @@ enum rxe_elem_type {
        RXE_TYPE_MR,
        RXE_TYPE_MW,
        RXE_TYPE_MC_GRP,
-       RXE_TYPE_MC_ELEM,
        RXE_NUM_TYPES,          /* keep me last */
 };
 
@@ -156,4 +155,6 @@ void rxe_elem_release(struct kref *kref);
 /* drop a reference on an object */
 #define rxe_drop_ref(obj) kref_put(&(obj)->elem.ref_cnt, rxe_elem_release)
 
+#define rxe_read_ref(obj) kref_read(&(obj)->elem.ref_cnt)
+
 #endif /* RXE_POOL_H */
index 9940c69cbb635022758b3a4246d0847ff5a351b1..1b0f4088189540ebe6cfd2c829b7d2c84395476a 100644 (file)
@@ -362,7 +362,6 @@ struct rxe_mcg {
 };
 
 struct rxe_mca {
-       struct rxe_pool_elem    elem;
        struct list_head        qp_list;
        struct rxe_qp           *qp;
 };
@@ -396,7 +395,6 @@ struct rxe_dev {
        struct rxe_pool         mr_pool;
        struct rxe_pool         mw_pool;
        struct rxe_pool         mc_grp_pool;
-       struct rxe_pool         mc_elem_pool;
 
        spinlock_t              mcg_lock;