net/mlx5: Bridge, add per-port multicast replication tables
authorVlad Buslov <vladbu@nvidia.com>
Wed, 22 Feb 2023 12:10:02 +0000 (13:10 +0100)
committerSaeed Mahameed <saeedm@nvidia.com>
Wed, 12 Apr 2023 03:57:36 +0000 (20:57 -0700)
Multicast replication requires adding one more level of FDB_BR_OFFLOAD
priority flow tables. The new level is used for per-port multicast-specific
tables that have following flow groups structure (flow highest to lowest
priority):

- Flow group of size one that matches on source port metadata. This will
have a static single rule that prevent packets from being replicated to
their source port.

- Flow group of size one that matches all packets and forwards them to the
port that owns the table.

Initialize the table dynamically on all bridge ports when adding a port to
the bridge that has multicast enabled and on all existing bridge ports when
receiving multicast enable notification.

Signed-off-by: Vlad Buslov <vladbu@nvidia.com>
Reviewed-by: Maor Dickman <maord@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_mcast.c
drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c

index 35436aa9548da2a55d3c932d37a0a4c5b978441e..4bc8c6fc394b248c974e0a3a2e3bb4b1036ac537 100644 (file)
@@ -61,7 +61,7 @@ mlx5_esw_bridge_pkt_reformat_vlan_pop_create(struct mlx5_eswitch *esw)
        return mlx5_packet_reformat_alloc(esw->dev, &reformat_params, MLX5_FLOW_NAMESPACE_FDB);
 }
 
-static struct mlx5_flow_table *
+struct mlx5_flow_table *
 mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw)
 {
        struct mlx5_flow_table_attr ft_attr = {};
@@ -1506,6 +1506,15 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
        port->bridge = bridge;
        port->flags |= flags;
        xa_init(&port->vlans);
+
+       err = mlx5_esw_bridge_port_mcast_init(port);
+       if (err) {
+               esw_warn(esw->dev,
+                        "Failed to initialize port multicast (vport=%u,esw_owner_vhca_id=%u,err=%d)\n",
+                        port->vport_num, port->esw_owner_vhca_id, err);
+               goto err_port_mcast;
+       }
+
        err = mlx5_esw_bridge_port_insert(port, br_offloads);
        if (err) {
                esw_warn(esw->dev,
@@ -1518,6 +1527,8 @@ static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16
        return 0;
 
 err_port_insert:
+       mlx5_esw_bridge_port_mcast_cleanup(port);
+err_port_mcast:
        kvfree(port);
        return err;
 }
@@ -1535,6 +1546,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off
 
        trace_mlx5_esw_bridge_vport_cleanup(port);
        mlx5_esw_bridge_port_vlans_flush(port, bridge);
+       mlx5_esw_bridge_port_mcast_cleanup(port);
        mlx5_esw_bridge_port_erase(port, br_offloads);
        kvfree(port);
        mlx5_esw_bridge_put(br_offloads, bridge);
index d5a89a86c9e84fa7adb576dfc51fc381f6bcc396..4f54cb41ed198699f5c38371af03d7802ad6e3b0 100644 (file)
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
 
+#include "lib/devcom.h"
 #include "bridge.h"
 #include "eswitch.h"
 #include "bridge_priv.h"
 
+static int mlx5_esw_bridge_port_mcast_fts_init(struct mlx5_esw_bridge_port *port,
+                                              struct mlx5_esw_bridge *bridge)
+{
+       struct mlx5_eswitch *esw = bridge->br_offloads->esw;
+       struct mlx5_flow_table *mcast_ft;
+
+       mcast_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE,
+                                               MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
+                                               esw);
+       if (IS_ERR(mcast_ft))
+               return PTR_ERR(mcast_ft);
+
+       port->mcast.ft = mcast_ft;
+       return 0;
+}
+
+static void mlx5_esw_bridge_port_mcast_fts_cleanup(struct mlx5_esw_bridge_port *port)
+{
+       if (port->mcast.ft)
+               mlx5_destroy_flow_table(port->mcast.ft);
+       port->mcast.ft = NULL;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_filter_fg_create(struct mlx5_eswitch *esw,
+                                      struct mlx5_flow_table *mcast_ft)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_flow_group *fg;
+       u32 *in, *match;
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return ERR_PTR(-ENOMEM);
+
+       MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_MISC_PARAMETERS_2);
+       match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria);
+
+       MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0,
+                mlx5_eswitch_get_vport_metadata_mask());
+
+       MLX5_SET(create_flow_group_in, in, start_flow_index,
+                MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM);
+       MLX5_SET(create_flow_group_in, in, end_flow_index,
+                MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO);
+
+       fg = mlx5_create_flow_group(mcast_ft, in);
+       kvfree(in);
+       if (IS_ERR(fg))
+               esw_warn(esw->dev,
+                        "Failed to create filter flow group for bridge mcast table (err=%pe)\n",
+                        fg);
+
+       return fg;
+}
+
+static struct mlx5_flow_group *
+mlx5_esw_bridge_mcast_fwd_fg_create(struct mlx5_eswitch *esw,
+                                   struct mlx5_flow_table *mcast_ft)
+{
+       int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+       struct mlx5_flow_group *fg;
+       u32 *in;
+
+       in = kvzalloc(inlen, GFP_KERNEL);
+       if (!in)
+               return ERR_PTR(-ENOMEM);
+
+       MLX5_SET(create_flow_group_in, in, start_flow_index,
+                MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM);
+       MLX5_SET(create_flow_group_in, in, end_flow_index,
+                MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO);
+
+       fg = mlx5_create_flow_group(mcast_ft, in);
+       kvfree(in);
+       if (IS_ERR(fg))
+               esw_warn(esw->dev,
+                        "Failed to create forward flow group for bridge mcast table (err=%pe)\n",
+                        fg);
+
+       return fg;
+}
+
+static int mlx5_esw_bridge_port_mcast_fgs_init(struct mlx5_esw_bridge_port *port)
+{
+       struct mlx5_eswitch *esw = port->bridge->br_offloads->esw;
+       struct mlx5_flow_table *mcast_ft = port->mcast.ft;
+       struct mlx5_flow_group *fwd_fg, *filter_fg;
+       int err;
+
+       filter_fg = mlx5_esw_bridge_mcast_filter_fg_create(esw, mcast_ft);
+       if (IS_ERR(filter_fg))
+               return PTR_ERR(filter_fg);
+
+       fwd_fg = mlx5_esw_bridge_mcast_fwd_fg_create(esw, mcast_ft);
+       if (IS_ERR(fwd_fg)) {
+               err = PTR_ERR(fwd_fg);
+               goto err_fwd_fg;
+       }
+
+       port->mcast.filter_fg = filter_fg;
+       port->mcast.fwd_fg = fwd_fg;
+
+       return 0;
+
+err_fwd_fg:
+       mlx5_destroy_flow_group(filter_fg);
+       return err;
+}
+
+static void mlx5_esw_bridge_port_mcast_fgs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+       if (port->mcast.fwd_fg)
+               mlx5_destroy_flow_group(port->mcast.fwd_fg);
+       port->mcast.fwd_fg = NULL;
+       if (port->mcast.filter_fg)
+               mlx5_destroy_flow_group(port->mcast.filter_fg);
+       port->mcast.filter_fg = NULL;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_flow_with_esw_create(struct mlx5_esw_bridge_port *port,
+                                          struct mlx5_eswitch *esw)
+{
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_DROP,
+               .flags = FLOW_ACT_NO_APPEND,
+       };
+       struct mlx5_flow_spec *rule_spec;
+       struct mlx5_flow_handle *handle;
+
+       rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+       if (!rule_spec)
+               return ERR_PTR(-ENOMEM);
+
+       rule_spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS_2;
+
+       MLX5_SET(fte_match_param, rule_spec->match_criteria,
+                misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask());
+       MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0,
+                mlx5_eswitch_get_vport_metadata_for_match(esw, port->vport_num));
+
+       handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, NULL, 0);
+
+       kvfree(rule_spec);
+       return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_create(struct mlx5_esw_bridge_port *port)
+{
+       return mlx5_esw_bridge_mcast_flow_with_esw_create(port, port->bridge->br_offloads->esw);
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_filter_flow_peer_create(struct mlx5_esw_bridge_port *port)
+{
+       struct mlx5_devcom *devcom = port->bridge->br_offloads->esw->dev->priv.devcom;
+       static struct mlx5_flow_handle *handle;
+       struct mlx5_eswitch *peer_esw;
+
+       peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       if (!peer_esw)
+               return ERR_PTR(-ENODEV);
+
+       handle = mlx5_esw_bridge_mcast_flow_with_esw_create(port, peer_esw);
+
+       mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
+       return handle;
+}
+
+static struct mlx5_flow_handle *
+mlx5_esw_bridge_mcast_fwd_flow_create(struct mlx5_esw_bridge_port *port)
+{
+       struct mlx5_flow_act flow_act = {
+               .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
+               .flags = FLOW_ACT_NO_APPEND,
+       };
+       struct mlx5_flow_destination dest = {
+               .type = MLX5_FLOW_DESTINATION_TYPE_VPORT,
+               .vport.num = port->vport_num,
+       };
+       struct mlx5_esw_bridge *bridge = port->bridge;
+       struct mlx5_flow_spec *rule_spec;
+       struct mlx5_flow_handle *handle;
+
+       rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL);
+       if (!rule_spec)
+               return ERR_PTR(-ENOMEM);
+
+       if (MLX5_CAP_ESW_FLOWTABLE(bridge->br_offloads->esw->dev, flow_source) &&
+           port->vport_num == MLX5_VPORT_UPLINK)
+               rule_spec->flow_context.flow_source =
+                       MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
+
+       if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) {
+               dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+               dest.vport.vhca_id = port->esw_owner_vhca_id;
+       }
+       handle = mlx5_add_flow_rules(port->mcast.ft, rule_spec, &flow_act, &dest, 1);
+
+       kvfree(rule_spec);
+       return handle;
+}
+
+static int mlx5_esw_bridge_port_mcast_fhs_init(struct mlx5_esw_bridge_port *port)
+{
+       struct mlx5_flow_handle *filter_handle, *fwd_handle;
+
+       filter_handle = (port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) ?
+               mlx5_esw_bridge_mcast_filter_flow_peer_create(port) :
+               mlx5_esw_bridge_mcast_filter_flow_create(port);
+       if (IS_ERR(filter_handle))
+               return PTR_ERR(filter_handle);
+
+       fwd_handle = mlx5_esw_bridge_mcast_fwd_flow_create(port);
+       if (IS_ERR(fwd_handle)) {
+               mlx5_del_flow_rules(filter_handle);
+               return PTR_ERR(fwd_handle);
+       }
+
+       port->mcast.filter_handle = filter_handle;
+       port->mcast.fwd_handle = fwd_handle;
+
+       return 0;
+}
+
+static void mlx5_esw_bridge_port_mcast_fhs_cleanup(struct mlx5_esw_bridge_port *port)
+{
+       if (port->mcast.fwd_handle)
+               mlx5_del_flow_rules(port->mcast.fwd_handle);
+       port->mcast.fwd_handle = NULL;
+       if (port->mcast.filter_handle)
+               mlx5_del_flow_rules(port->mcast.filter_handle);
+       port->mcast.filter_handle = NULL;
+}
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port)
+{
+       struct mlx5_esw_bridge *bridge = port->bridge;
+       int err;
+
+       if (!(bridge->flags & MLX5_ESW_BRIDGE_MCAST_FLAG))
+               return 0;
+
+       err = mlx5_esw_bridge_port_mcast_fts_init(port, bridge);
+       if (err)
+               return err;
+
+       err = mlx5_esw_bridge_port_mcast_fgs_init(port);
+       if (err)
+               goto err_fgs;
+
+       err = mlx5_esw_bridge_port_mcast_fhs_init(port);
+       if (err)
+               goto err_fhs;
+       return err;
+
+err_fhs:
+       mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+err_fgs:
+       mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+       return err;
+}
+
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port)
+{
+       mlx5_esw_bridge_port_mcast_fhs_cleanup(port);
+       mlx5_esw_bridge_port_mcast_fgs_cleanup(port);
+       mlx5_esw_bridge_port_mcast_fts_cleanup(port);
+}
+
 static struct mlx5_flow_group *
 mlx5_esw_bridge_ingress_igmp_fg_create(struct mlx5_eswitch *esw,
                                       struct mlx5_flow_table *ingress_ft)
@@ -251,6 +524,51 @@ mlx5_esw_bridge_ingress_mcast_fhs_cleanup(struct mlx5_esw_bridge_offloads *br_of
        br_offloads->igmp_handle = NULL;
 }
 
+static int mlx5_esw_brige_mcast_init(struct mlx5_esw_bridge *bridge)
+{
+       struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+       struct mlx5_esw_bridge_port *port, *failed;
+       unsigned long i;
+       int err;
+
+       xa_for_each(&br_offloads->ports, i, port) {
+               if (port->bridge != bridge)
+                       continue;
+
+               err = mlx5_esw_bridge_port_mcast_init(port);
+               if (err) {
+                       failed = port;
+                       goto err_port;
+               }
+       }
+       return 0;
+
+err_port:
+       xa_for_each(&br_offloads->ports, i, port) {
+               if (port == failed)
+                       break;
+               if (port->bridge != bridge)
+                       continue;
+
+               mlx5_esw_bridge_port_mcast_cleanup(port);
+       }
+       return err;
+}
+
+static void mlx5_esw_brige_mcast_cleanup(struct mlx5_esw_bridge *bridge)
+{
+       struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads;
+       struct mlx5_esw_bridge_port *port;
+       unsigned long i;
+
+       xa_for_each(&br_offloads->ports, i, port) {
+               if (port->bridge != bridge)
+                       continue;
+
+               mlx5_esw_bridge_port_mcast_cleanup(port);
+       }
+}
+
 static int mlx5_esw_brige_mcast_global_enable(struct mlx5_esw_bridge_offloads *br_offloads)
 {
        int err;
@@ -306,11 +624,20 @@ int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge)
                return err;
 
        bridge->flags |= MLX5_ESW_BRIDGE_MCAST_FLAG;
-       return 0;
+
+       err = mlx5_esw_brige_mcast_init(bridge);
+       if (err) {
+               esw_warn(bridge->br_offloads->esw->dev, "Failed to enable multicast (err=%d)\n",
+                        err);
+               bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
+               mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
+       }
+       return err;
 }
 
 void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge)
 {
+       mlx5_esw_brige_mcast_cleanup(bridge);
        bridge->flags &= ~MLX5_ESW_BRIDGE_MCAST_FLAG;
        mlx5_esw_brige_mcast_global_disable(bridge->br_offloads);
 }
index dbb935db1b3c750f238fae59942cd8788e377146..7fdd719f363c44c10d45ed97572dfb7b8094b973 100644 (file)
@@ -81,9 +81,23 @@ static_assert(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE == 524288);
 
 #define MLX5_ESW_BRIDGE_SKIP_TABLE_SIZE 0
 
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE 1
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_FROM 0
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO          \
+       (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_SIZE - 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM           \
+       (MLX5_ESW_BRIDGE_MCAST_TABLE_FILTER_GRP_IDX_TO + 1)
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO                     \
+       (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_FROM +                 \
+        MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_SIZE - 1)
+
+#define MLX5_ESW_BRIDGE_MCAST_TABLE_SIZE                       \
+       (MLX5_ESW_BRIDGE_MCAST_TABLE_FWD_GRP_IDX_TO + 1)
 enum {
        MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE,
        MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE,
+       MLX5_ESW_BRIDGE_LEVEL_MCAST_TABLE,
        MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE,
 };
 
@@ -138,6 +152,14 @@ struct mlx5_esw_bridge_port {
        u16 flags;
        struct mlx5_esw_bridge *bridge;
        struct xarray vlans;
+       struct {
+               struct mlx5_flow_table *ft;
+               struct mlx5_flow_group *filter_fg;
+               struct mlx5_flow_group *fwd_fg;
+
+               struct mlx5_flow_handle *filter_handle;
+               struct mlx5_flow_handle *fwd_handle;
+       } mcast;
 };
 
 struct mlx5_esw_bridge {
@@ -161,6 +183,12 @@ struct mlx5_esw_bridge {
        u16 vlan_proto;
 };
 
+struct mlx5_flow_table *mlx5_esw_bridge_table_create(int max_fte, u32 level,
+                                                    struct mlx5_eswitch *esw);
+
+int mlx5_esw_bridge_port_mcast_init(struct mlx5_esw_bridge_port *port);
+void mlx5_esw_bridge_port_mcast_cleanup(struct mlx5_esw_bridge_port *port);
+
 int mlx5_esw_bridge_mcast_enable(struct mlx5_esw_bridge *bridge);
 void mlx5_esw_bridge_mcast_disable(struct mlx5_esw_bridge *bridge);
 
index 8e3da9d4fe1c6ad3dec2c4c0a81a46cb3ee04bea..19da02c41616182ca26f8ca1d28eb3fd3e93d1ee 100644 (file)
@@ -2997,7 +2997,7 @@ static int init_fdb_root_ns(struct mlx5_flow_steering *steering)
                goto out_err;
        }
 
-       maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 3);
+       maj_prio = fs_create_prio(&steering->fdb_root_ns->ns, FDB_BR_OFFLOAD, 4);
        if (IS_ERR(maj_prio)) {
                err = PTR_ERR(maj_prio);
                goto out_err;