RDMA/counter: Add optional counter support
authorAharon Landau <aharonl@nvidia.com>
Fri, 8 Oct 2021 12:24:32 +0000 (15:24 +0300)
committerJason Gunthorpe <jgg@nvidia.com>
Tue, 12 Oct 2021 15:48:05 +0000 (12:48 -0300)
An optional counter is a driver-specific counter that may be dynamically
enabled/disabled.  This enhancement allows drivers to expose counters
which are, for example, mutually exclusive and cannot be enabled at the
same time, counters that might degrades performance, optional debug
counters, etc.

Optional counters are marked with IB_STAT_FLAG_OPTIONAL flag. They are not
exported in sysfs, and must be at the end of all stats, otherwise the
attr->show() in sysfs would get wrong indexes for hwcounters that are
behind optional counters.

Link: https://lore.kernel.org/r/20211008122439.166063-7-markzhang@nvidia.com
Signed-off-by: Aharon Landau <aharonl@nvidia.com>
Signed-off-by: Neta Ostrovsky <netao@nvidia.com>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Mark Zhang <markzhang@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
drivers/infiniband/core/counters.c
drivers/infiniband/core/device.c
drivers/infiniband/core/sysfs.c
include/rdma/ib_verbs.h
include/rdma/rdma_counter.h

index 331cd29f0d61c2bc2a0ba283eb06b3f6fbeb19cd..af59486fe41852533fe2463fb161884ef303d2d1 100644 (file)
@@ -106,6 +106,38 @@ static int __rdma_counter_bind_qp(struct rdma_counter *counter,
        return ret;
 }
 
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+                       unsigned int index, bool enable)
+{
+       struct rdma_hw_stats *stats;
+       int ret = 0;
+
+       if (!dev->ops.modify_hw_stat)
+               return -EOPNOTSUPP;
+
+       stats = ib_get_hw_stats_port(dev, port);
+       if (!stats || index >= stats->num_counters ||
+           !(stats->descs[index].flags & IB_STAT_FLAG_OPTIONAL))
+               return -EINVAL;
+
+       mutex_lock(&stats->lock);
+
+       if (enable != test_bit(index, stats->is_disabled))
+               goto out;
+
+       ret = dev->ops.modify_hw_stat(dev, port, index, enable);
+       if (ret)
+               goto out;
+
+       if (enable)
+               clear_bit(index, stats->is_disabled);
+       else
+               set_bit(index, stats->is_disabled);
+out:
+       mutex_unlock(&stats->lock);
+       return ret;
+}
+
 static struct rdma_counter *alloc_and_bind(struct ib_device *dev, u32 port,
                                           struct ib_qp *qp,
                                           enum rdma_nl_counter_mode mode)
index f4814bb7f082f16edb8484bfefaf70d6d7409231..22a4adda7981d25f848ece2a97e0ae761f92b816 100644 (file)
@@ -2676,6 +2676,7 @@ void ib_set_device_ops(struct ib_device *dev, const struct ib_device_ops *ops)
        SET_DEVICE_OP(dev_ops, modify_cq);
        SET_DEVICE_OP(dev_ops, modify_device);
        SET_DEVICE_OP(dev_ops, modify_flow_action_esp);
+       SET_DEVICE_OP(dev_ops, modify_hw_stat);
        SET_DEVICE_OP(dev_ops, modify_port);
        SET_DEVICE_OP(dev_ops, modify_qp);
        SET_DEVICE_OP(dev_ops, modify_srq);
index 8d831d4fd2adfe0d354b84a8762de624bb958d48..1bf3aea4b71eaa948375f9176fa1187baef247f7 100644 (file)
@@ -934,7 +934,8 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
 {
        struct hw_stats_device_attribute *attr;
        struct hw_stats_device_data *data;
-       int i, ret;
+       bool opstat_skipped = false;
+       int i, ret, pos = 0;
 
        data = alloc_hw_stats_device(ibdev);
        if (IS_ERR(data)) {
@@ -955,16 +956,23 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
        data->stats->timestamp = jiffies;
 
        for (i = 0; i < data->stats->num_counters; i++) {
-               attr = &data->attrs[i];
+               if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+                       opstat_skipped = true;
+                       continue;
+               }
+
+               WARN_ON(opstat_skipped);
+               attr = &data->attrs[pos];
                sysfs_attr_init(&attr->attr.attr);
                attr->attr.attr.name = data->stats->descs[i].name;
                attr->attr.attr.mode = 0444;
                attr->attr.show = hw_stat_device_show;
                attr->show = show_hw_stats;
-               data->group.attrs[i] = &attr->attr.attr;
+               data->group.attrs[pos] = &attr->attr.attr;
+               pos++;
        }
 
-       attr = &data->attrs[i];
+       attr = &data->attrs[pos];
        sysfs_attr_init(&attr->attr.attr);
        attr->attr.attr.name = "lifespan";
        attr->attr.attr.mode = 0644;
@@ -972,7 +980,7 @@ int ib_setup_device_attrs(struct ib_device *ibdev)
        attr->show = show_stats_lifespan;
        attr->attr.store = hw_stat_device_store;
        attr->store = set_stats_lifespan;
-       data->group.attrs[i] = &attr->attr.attr;
+       data->group.attrs[pos] = &attr->attr.attr;
        for (i = 0; i != ARRAY_SIZE(ibdev->groups); i++)
                if (!ibdev->groups[i]) {
                        ibdev->groups[i] = &data->group;
@@ -1027,7 +1035,8 @@ static int setup_hw_port_stats(struct ib_port *port,
 {
        struct hw_stats_port_attribute *attr;
        struct hw_stats_port_data *data;
-       int i, ret;
+       bool opstat_skipped = false;
+       int i, ret, pos = 0;
 
        data = alloc_hw_stats_port(port, group);
        if (IS_ERR(data))
@@ -1045,16 +1054,23 @@ static int setup_hw_port_stats(struct ib_port *port,
        data->stats->timestamp = jiffies;
 
        for (i = 0; i < data->stats->num_counters; i++) {
-               attr = &data->attrs[i];
+               if (data->stats->descs[i].flags & IB_STAT_FLAG_OPTIONAL) {
+                       opstat_skipped = true;
+                       continue;
+               }
+
+               WARN_ON(opstat_skipped);
+               attr = &data->attrs[pos];
                sysfs_attr_init(&attr->attr.attr);
                attr->attr.attr.name = data->stats->descs[i].name;
                attr->attr.attr.mode = 0444;
                attr->attr.show = hw_stat_port_show;
                attr->show = show_hw_stats;
-               group->attrs[i] = &attr->attr.attr;
+               group->attrs[pos] = &attr->attr.attr;
+               pos++;
        }
 
-       attr = &data->attrs[i];
+       attr = &data->attrs[pos];
        sysfs_attr_init(&attr->attr.attr);
        attr->attr.attr.name = "lifespan";
        attr->attr.attr.mode = 0644;
@@ -1062,7 +1078,7 @@ static int setup_hw_port_stats(struct ib_port *port,
        attr->show = show_stats_lifespan;
        attr->attr.store = hw_stat_port_store;
        attr->store = set_stats_lifespan;
-       group->attrs[i] = &attr->attr.attr;
+       group->attrs[pos] = &attr->attr.attr;
 
        port->hw_stats_data = data;
        return 0;
index ae467365706bbeec7831eb0b5615808784c0b7ed..2207f60b002f772d71aed05a22a83de40e5388df 100644 (file)
@@ -545,12 +545,18 @@ enum ib_port_speed {
        IB_SPEED_NDR    = 128,
 };
 
+enum ib_stat_flag {
+       IB_STAT_FLAG_OPTIONAL = 1 << 0,
+};
+
 /**
  * struct rdma_stat_desc
  * @name - The name of the counter
+ * @flags - Flags of the counter; For example, IB_STAT_FLAG_OPTIONAL
  */
 struct rdma_stat_desc {
        const char *name;
+       unsigned int flags;
 };
 
 /**
@@ -2562,6 +2568,13 @@ struct ib_device_ops {
        int (*get_hw_stats)(struct ib_device *device,
                            struct rdma_hw_stats *stats, u32 port, int index);
 
+       /**
+        * modify_hw_stat - Modify the counter configuration
+        * @enable: true/false when enable/disable a counter
+        * Return codes - 0 on success or error code otherwise.
+        */
+       int (*modify_hw_stat)(struct ib_device *device, u32 port,
+                             unsigned int counter_index, bool enable);
        /**
         * Allows rdma drivers to add their own restrack attributes.
         */
index 0295b22cd1cd8e2fcab9b4f19ef896917277fea8..45d5481a7846abc566a31ecae789d3037ab8af06 100644 (file)
@@ -63,4 +63,6 @@ int rdma_counter_get_mode(struct ib_device *dev, u32 port,
                          enum rdma_nl_counter_mode *mode,
                          enum rdma_nl_counter_mask *mask);
 
+int rdma_counter_modify(struct ib_device *dev, u32 port,
+                       unsigned int index, bool enable);
 #endif /* _RDMA_COUNTER_H_ */