RDMA/hns: Support userspace configuring congestion control algorithm with QP granularity
authorJunxian Huang <huangjunxian6@hisilicon.com>
Fri, 1 Mar 2024 10:48:45 +0000 (18:48 +0800)
committerLeon Romanovsky <leon@kernel.org>
Sun, 3 Mar 2024 13:01:33 +0000 (15:01 +0200)
Currently, congestion control algorithm is statically configured in
FW, and all QPs use the same algorithm(except UD which has a fixed
configuration of DCQCN). This is not flexible enough.

Support userspace configuring congestion control algorithm with QP
granularity while creating QPs. If the algorithm is not specified in
userspace, use the default one.

Signed-off-by: Junxian Huang <huangjunxian6@hisilicon.com>
Link: https://lore.kernel.org/r/20240301104845.1141083-1-huangjunxian6@hisilicon.com
Signed-off-by: Leon Romanovsky <leon@kernel.org>
drivers/infiniband/hw/hns/hns_roce_device.h
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.h
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/hns/hns_roce_qp.c
include/uapi/rdma/hns-abi.h

index 1d062c522d690f1f6261580e29c41cae69165a88..bc015901a7d37b5d64e165573faed9d4e83b2b80 100644 (file)
@@ -845,7 +845,8 @@ struct hns_roce_caps {
        u16             default_aeq_period;
        u16             default_aeq_arm_st;
        u16             default_ceq_arm_st;
-       enum hns_roce_cong_type cong_type;
+       u8              cong_cap;
+       enum hns_roce_cong_type default_cong_type;
 };
 
 enum hns_roce_device_state {
index 42e28586cefacfffcbc1a9c54135e1ab94846652..38e426f4afb5c111f196e6105fd3c8386e51dbe6 100644 (file)
@@ -2209,11 +2209,12 @@ static int hns_roce_query_caps(struct hns_roce_dev *hr_dev)
        caps->max_wqes = 1 << le16_to_cpu(resp_c->sq_depth);
 
        caps->num_srqs = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_SRQS);
-       caps->cong_type = hr_reg_read(resp_d, PF_CAPS_D_CONG_TYPE);
+       caps->cong_cap = hr_reg_read(resp_d, PF_CAPS_D_CONG_CAP);
        caps->max_srq_wrs = 1 << le16_to_cpu(resp_d->srq_depth);
        caps->ceqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_CEQ_DEPTH);
        caps->num_comp_vectors = hr_reg_read(resp_d, PF_CAPS_D_NUM_CEQS);
        caps->aeqe_depth = 1 << hr_reg_read(resp_d, PF_CAPS_D_AEQ_DEPTH);
+       caps->default_cong_type = hr_reg_read(resp_d, PF_CAPS_D_DEFAULT_ALG);
        caps->reserved_pds = hr_reg_read(resp_d, PF_CAPS_D_RSV_PDS);
        caps->num_uars = 1 << hr_reg_read(resp_d, PF_CAPS_D_NUM_UARS);
        caps->reserved_qps = hr_reg_read(resp_d, PF_CAPS_D_RSV_QPS);
@@ -4737,14 +4738,8 @@ enum {
 static int check_cong_type(struct ib_qp *ibqp,
                           struct hns_roce_congestion_algorithm *cong_alg)
 {
-       struct hns_roce_dev *hr_dev = to_hr_dev(ibqp->device);
        struct hns_roce_qp *hr_qp = to_hr_qp(ibqp);
 
-       if (ibqp->qp_type == IB_QPT_UD || ibqp->qp_type == IB_QPT_GSI)
-               hr_qp->cong_type = CONG_TYPE_DCQCN;
-       else
-               hr_qp->cong_type = hr_dev->caps.cong_type;
-
        /* different congestion types match different configurations */
        switch (hr_qp->cong_type) {
        case CONG_TYPE_DCQCN:
@@ -4772,9 +4767,6 @@ static int check_cong_type(struct ib_qp *ibqp,
                cong_alg->wnd_mode_sel = WND_LIMIT;
                break;
        default:
-               ibdev_warn(&hr_dev->ib_dev,
-                          "invalid type(%u) for congestion selection.\n",
-                          hr_qp->cong_type);
                hr_qp->cong_type = CONG_TYPE_DCQCN;
                cong_alg->alg_sel = CONG_DCQCN;
                cong_alg->alg_sub_sel = UNSUPPORT_CONG_LEVEL;
index cd97cbee682a6a51a4290720881a7a3b89d26a42..359a74672ba1d9d393667403049147639eb7b3dc 100644 (file)
@@ -1214,12 +1214,13 @@ struct hns_roce_query_pf_caps_d {
 #define PF_CAPS_D_RQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(21, 20)
 #define PF_CAPS_D_EX_SGE_HOP_NUM PF_CAPS_D_FIELD_LOC(23, 22)
 #define PF_CAPS_D_SQWQE_HOP_NUM PF_CAPS_D_FIELD_LOC(25, 24)
-#define PF_CAPS_D_CONG_TYPE PF_CAPS_D_FIELD_LOC(29, 26)
+#define PF_CAPS_D_CONG_CAP PF_CAPS_D_FIELD_LOC(29, 26)
 #define PF_CAPS_D_CEQ_DEPTH PF_CAPS_D_FIELD_LOC(85, 64)
 #define PF_CAPS_D_NUM_CEQS PF_CAPS_D_FIELD_LOC(95, 86)
 #define PF_CAPS_D_AEQ_DEPTH PF_CAPS_D_FIELD_LOC(117, 96)
 #define PF_CAPS_D_AEQ_ARM_ST PF_CAPS_D_FIELD_LOC(119, 118)
 #define PF_CAPS_D_CEQ_ARM_ST PF_CAPS_D_FIELD_LOC(121, 120)
+#define PF_CAPS_D_DEFAULT_ALG PF_CAPS_D_FIELD_LOC(127, 122)
 #define PF_CAPS_D_RSV_PDS PF_CAPS_D_FIELD_LOC(147, 128)
 #define PF_CAPS_D_NUM_UARS PF_CAPS_D_FIELD_LOC(155, 148)
 #define PF_CAPS_D_RSV_QPS PF_CAPS_D_FIELD_LOC(179, 160)
index b55fe6911f9f1a8230ace74475f2b0d2cd8937e6..1dc60c2b2b7ab841266a4e25da2af3a4ce8218b9 100644 (file)
@@ -394,6 +394,9 @@ static int hns_roce_alloc_ucontext(struct ib_ucontext *uctx,
                        resp.config |= HNS_ROCE_RSP_CQE_INLINE_FLAGS;
        }
 
+       if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
+               resp.congest_type = hr_dev->caps.cong_cap;
+
        ret = hns_roce_uar_alloc(hr_dev, &context->uar);
        if (ret)
                goto error_out;
index 31b147210688ab6600d68124d063d78356b73fd0..f35a66325d9a6596b3425bac9676644fac675d4f 100644 (file)
@@ -1004,6 +1004,60 @@ static void free_kernel_wrid(struct hns_roce_qp *hr_qp)
        kfree(hr_qp->sq.wrid);
 }
 
+static void default_congest_type(struct hns_roce_dev *hr_dev,
+                                struct hns_roce_qp *hr_qp)
+{
+       if (hr_qp->ibqp.qp_type == IB_QPT_UD ||
+           hr_qp->ibqp.qp_type == IB_QPT_GSI)
+               hr_qp->cong_type = CONG_TYPE_DCQCN;
+       else
+               hr_qp->cong_type = hr_dev->caps.default_cong_type;
+}
+
+static int set_congest_type(struct hns_roce_qp *hr_qp,
+                           struct hns_roce_ib_create_qp *ucmd)
+{
+       struct hns_roce_dev *hr_dev = to_hr_dev(hr_qp->ibqp.device);
+
+       switch (ucmd->cong_type_flags) {
+       case HNS_ROCE_CREATE_QP_FLAGS_DCQCN:
+               hr_qp->cong_type = CONG_TYPE_DCQCN;
+               break;
+       case HNS_ROCE_CREATE_QP_FLAGS_LDCP:
+               hr_qp->cong_type = CONG_TYPE_LDCP;
+               break;
+       case HNS_ROCE_CREATE_QP_FLAGS_HC3:
+               hr_qp->cong_type = CONG_TYPE_HC3;
+               break;
+       case HNS_ROCE_CREATE_QP_FLAGS_DIP:
+               hr_qp->cong_type = CONG_TYPE_DIP;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (!test_bit(hr_qp->cong_type, (unsigned long *)&hr_dev->caps.cong_cap))
+               return -EOPNOTSUPP;
+
+       if (hr_qp->ibqp.qp_type == IB_QPT_UD &&
+           hr_qp->cong_type != CONG_TYPE_DCQCN)
+               return -EOPNOTSUPP;
+
+       return 0;
+}
+
+static int set_congest_param(struct hns_roce_dev *hr_dev,
+                            struct hns_roce_qp *hr_qp,
+                            struct hns_roce_ib_create_qp *ucmd)
+{
+       if (ucmd->comp_mask & HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE)
+               return set_congest_type(hr_qp, ucmd);
+
+       default_congest_type(hr_dev, hr_qp);
+
+       return 0;
+}
+
 static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
                        struct ib_qp_init_attr *init_attr,
                        struct ib_udata *udata,
@@ -1043,6 +1097,10 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
                        ibdev_err(ibdev,
                                  "failed to set user SQ size, ret = %d.\n",
                                  ret);
+
+               ret = set_congest_param(hr_dev, hr_qp, ucmd);
+               if (ret)
+                       return ret;
        } else {
                if (hr_dev->pci_dev->revision >= PCI_REVISION_ID_HIP09)
                        hr_qp->config = HNS_ROCE_EXSGE_FLAGS;
@@ -1051,6 +1109,8 @@ static int set_qp_param(struct hns_roce_dev *hr_dev, struct hns_roce_qp *hr_qp,
                        ibdev_err(ibdev,
                                  "failed to set kernel SQ size, ret = %d.\n",
                                  ret);
+
+               default_congest_type(hr_dev, hr_qp);
        }
 
        return ret;
index c996e151081eb845b42734df878d1216d4418db8..158670da2b2a56d6217cb8f3022fecd3a1acaf38 100644 (file)
@@ -73,6 +73,17 @@ struct hns_roce_ib_create_srq_resp {
        __u32   cap_flags; /* Use enum hns_roce_srq_cap_flags */
 };
 
+enum hns_roce_congest_type_flags {
+       HNS_ROCE_CREATE_QP_FLAGS_DCQCN,
+       HNS_ROCE_CREATE_QP_FLAGS_LDCP,
+       HNS_ROCE_CREATE_QP_FLAGS_HC3,
+       HNS_ROCE_CREATE_QP_FLAGS_DIP,
+};
+
+enum hns_roce_create_qp_comp_mask {
+       HNS_ROCE_CREATE_QP_MASK_CONGEST_TYPE = 1 << 0,
+};
+
 struct hns_roce_ib_create_qp {
        __aligned_u64 buf_addr;
        __aligned_u64 db_addr;
@@ -81,6 +92,9 @@ struct hns_roce_ib_create_qp {
        __u8    sq_no_prefetch;
        __u8    reserved[5];
        __aligned_u64 sdb_addr;
+       __aligned_u64 comp_mask; /* Use enum hns_roce_create_qp_comp_mask */
+       __aligned_u64 create_flags;
+       __aligned_u64 cong_type_flags;
 };
 
 enum hns_roce_qp_cap_flags {
@@ -114,6 +128,8 @@ struct hns_roce_ib_alloc_ucontext_resp {
        __u32   reserved;
        __u32   config;
        __u32   max_inline_data;
+       __u8    congest_type;
+       __u8    reserved0[7];
 };
 
 struct hns_roce_ib_alloc_ucontext {