From: Maor Gottlieb Date: Mon, 5 Jul 2021 12:34:00 +0000 (+0300) Subject: net/mlx5: Lag, move lag files into directory X-Git-Url: http://git.maquefel.me/?a=commitdiff_plain;h=3d677735d3b7f00f42cfd57a51d3d2109cf65f87;p=linux.git net/mlx5: Lag, move lag files into directory Downstream patches add another lag related file so it makes sense to have all the lag files in a dedicated directory. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index a151575be51f2..fb123e26927d7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -14,7 +14,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ health.o mcg.o cq.o alloc.o port.o mr.o pd.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ - fs_counters.o fs_ft_pool.o rl.o lag.o dev.o events.o wq.o lib/gid.o \ + fs_counters.o fs_ft_pool.o rl.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ fw_reset.o qos.o lib/tout.o @@ -37,7 +37,7 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS) += en_arfs.o mlx5_core-$(CONFIG_MLX5_EN_RXNFC) += en_fs_ethtool.o mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o -mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag_mp.o lib/geneve.o lib/port_tun.o \ +mlx5_core-$(CONFIG_MLX5_ESWITCH) += lag/mp.o lib/geneve.o lib/port_tun.o \ en_rep.o en/rep/bond.o en/mod_hdr.o \ en/mapping.o mlx5_core-$(CONFIG_MLX5_CLS_ACT) += en_tc.o en/rep/tc.o en/rep/neigh.o \ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c deleted file mode 100644 index f35c8ba48aac8..0000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ /dev/null @@ -1,1082 +0,0 @@ -/* - * Copyright (c) 2016, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include -#include -#include -#include -#include "lib/devcom.h" -#include "mlx5_core.h" -#include "eswitch.h" -#include "lag.h" -#include "lag_mp.h" - -/* General purpose, use for short periods of time. - * Beware of lock dependencies (preferably, no locks should be acquired - * under it). - */ -static DEFINE_SPINLOCK(lag_lock); - -static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2, bool shared_fdb) -{ - u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; - void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); - - MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); - - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); - MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); - - return mlx5_cmd_exec_in(dev, create_lag, in); -} - -static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, - u8 remap_port2) -{ - u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; - void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); - - MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); - MLX5_SET(modify_lag_in, in, field_select, 0x1); - - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); - MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); - - return mlx5_cmd_exec_in(dev, modify_lag, in); -} - -int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; - - MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); - - return mlx5_cmd_exec_in(dev, create_vport_lag, in); -} -EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); - -int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; - - MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); - - return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); -} -EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); - -static int mlx5_lag_netdev_event(struct notifier_block *this, - unsigned long event, void *ptr); -static void mlx5_do_bond_work(struct work_struct *work); - -static void mlx5_ldev_free(struct kref *ref) -{ - struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); - - if (ldev->nb.notifier_call) - unregister_netdevice_notifier_net(&init_net, &ldev->nb); - mlx5_lag_mp_cleanup(ldev); - cancel_delayed_work_sync(&ldev->bond_work); - destroy_workqueue(ldev->wq); - kfree(ldev); -} - -static void mlx5_ldev_put(struct mlx5_lag *ldev) -{ - kref_put(&ldev->ref, mlx5_ldev_free); -} - -static void mlx5_ldev_get(struct mlx5_lag *ldev) -{ - kref_get(&ldev->ref); -} - -static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - int err; - - ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); - if (!ldev) - return NULL; - - ldev->wq = create_singlethread_workqueue("mlx5_lag"); - if (!ldev->wq) { - kfree(ldev); - return NULL; - } - - kref_init(&ldev->ref); - INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); - - ldev->nb.notifier_call = mlx5_lag_netdev_event; - if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { - ldev->nb.notifier_call = NULL; - mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); - } - - err = mlx5_lag_mp_init(ldev); - if (err) - mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", - err); - - return ldev; -} - -int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, - struct net_device *ndev) -{ - int i; - - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].netdev == ndev) - return i; - - return -ENOENT; -} - -static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) -{ - return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); -} - -static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) -{ - return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); -} - -static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, - u8 *port1, u8 *port2) -{ - bool p1en; - bool p2en; - - p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && - tracker->netdev_state[MLX5_LAG_P1].link_up; - - p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && - tracker->netdev_state[MLX5_LAG_P2].link_up; - - *port1 = 1; - *port2 = 2; - if ((!p1en && !p2en) || (p1en && p2en)) - return; - - if (p1en) - *port2 = 1; - else - *port1 = 2; -} - -void mlx5_modify_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u8 v2p_port1, v2p_port2; - int err; - - mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, - &v2p_port2); - - if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || - v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { - ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; - ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; - - mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", - ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2]); - - err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); - if (err) - mlx5_core_err(dev0, - "Failed to modify LAG (%d)\n", - err); - } -} - -static int mlx5_create_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - bool shared_fdb) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; - int err; - - mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], - &ldev->v2p_map[MLX5_LAG_P2]); - - mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", - ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], - shared_fdb); - - err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], - ldev->v2p_map[MLX5_LAG_P2], shared_fdb); - if (err) { - mlx5_core_err(dev0, - "Failed to create LAG (%d)\n", - err); - return err; - } - - if (shared_fdb) { - err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, - dev1->priv.eswitch); - if (err) - mlx5_core_err(dev0, "Can't enable single FDB mode\n"); - else - mlx5_core_info(dev0, "Operation mode is single FDB\n"); - } - - if (err) { - MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); - if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) - mlx5_core_err(dev0, - "Failed to deactivate RoCE LAG; driver restart required\n"); - } - - return err; -} - -int mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - u8 flags, - bool shared_fdb) -{ - bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - int err; - - err = mlx5_create_lag(ldev, tracker, shared_fdb); - if (err) { - if (roce_lag) { - mlx5_core_err(dev0, - "Failed to activate RoCE LAG\n"); - } else { - mlx5_core_err(dev0, - "Failed to activate VF LAG\n" - "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); - } - return err; - } - - ldev->flags |= flags; - ldev->shared_fdb = shared_fdb; - return 0; -} - -static int mlx5_deactivate_lag(struct mlx5_lag *ldev) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; - bool roce_lag = __mlx5_lag_is_roce(ldev); - int err; - - ldev->flags &= ~MLX5_LAG_MODE_FLAGS; - mlx5_lag_mp_reset(ldev); - - if (ldev->shared_fdb) { - mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, - ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); - ldev->shared_fdb = false; - } - - MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); - err = mlx5_cmd_exec_in(dev0, destroy_lag, in); - if (err) { - if (roce_lag) { - mlx5_core_err(dev0, - "Failed to deactivate RoCE LAG; driver restart required\n"); - } else { - mlx5_core_err(dev0, - "Failed to deactivate VF LAG; driver restart required\n" - "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); - } - } - - return err; -} - -static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) -{ - if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) - return false; - -#ifdef CONFIG_MLX5_ESWITCH - return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); -#else - return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && - !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); -#endif -} - -static void mlx5_lag_add_devices(struct mlx5_lag *ldev) -{ - int i; - - for (i = 0; i < MLX5_MAX_PORTS; i++) { - if (!ldev->pf[i].dev) - continue; - - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) - continue; - - ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); - } -} - -static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) -{ - int i; - - for (i = 0; i < MLX5_MAX_PORTS; i++) { - if (!ldev->pf[i].dev) - continue; - - if (ldev->pf[i].dev->priv.flags & - MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) - continue; - - ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(ldev->pf[i].dev); - } -} - -static void mlx5_disable_lag(struct mlx5_lag *ldev) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - bool shared_fdb = ldev->shared_fdb; - bool roce_lag; - int err; - - roce_lag = __mlx5_lag_is_roce(ldev); - - if (shared_fdb) { - mlx5_lag_remove_devices(ldev); - } else if (roce_lag) { - if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { - dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(dev0); - } - mlx5_nic_vport_disable_roce(dev1); - } - - err = mlx5_deactivate_lag(ldev); - if (err) - return; - - if (shared_fdb || roce_lag) - mlx5_lag_add_devices(ldev); - - if (shared_fdb) { - if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) - mlx5_eswitch_reload_reps(dev1->priv.eswitch); - } -} - -static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - - if (is_mdev_switchdev_mode(dev0) && - is_mdev_switchdev_mode(dev1) && - mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && - mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && - mlx5_devcom_is_paired(dev0->priv.devcom, - MLX5_DEVCOM_ESW_OFFLOADS) && - MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && - MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && - MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) - return true; - - return false; -} - -static void mlx5_do_bond(struct mlx5_lag *ldev) -{ - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - struct lag_tracker tracker; - bool do_bond, roce_lag; - int err; - - if (!mlx5_lag_is_ready(ldev)) { - do_bond = false; - } else { - tracker = ldev->tracker; - - do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); - } - - if (do_bond && !__mlx5_lag_is_active(ldev)) { - bool shared_fdb = mlx5_shared_fdb_supported(ldev); - - roce_lag = !mlx5_sriov_is_enabled(dev0) && - !mlx5_sriov_is_enabled(dev1); - -#ifdef CONFIG_MLX5_ESWITCH - roce_lag = roce_lag && - dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && - dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; -#endif - - if (shared_fdb || roce_lag) - mlx5_lag_remove_devices(ldev); - - err = mlx5_activate_lag(ldev, &tracker, - roce_lag ? MLX5_LAG_FLAG_ROCE : - MLX5_LAG_FLAG_SRIOV, - shared_fdb); - if (err) { - if (shared_fdb || roce_lag) - mlx5_lag_add_devices(ldev); - - return; - } else if (roce_lag) { - dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(dev0); - mlx5_nic_vport_enable_roce(dev1); - } else if (shared_fdb) { - dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(dev0); - - err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); - if (!err) - err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); - - if (err) { - dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; - mlx5_rescan_drivers_locked(dev0); - mlx5_deactivate_lag(ldev); - mlx5_lag_add_devices(ldev); - mlx5_eswitch_reload_reps(dev0->priv.eswitch); - mlx5_eswitch_reload_reps(dev1->priv.eswitch); - mlx5_core_err(dev0, "Failed to enable lag\n"); - return; - } - } - } else if (do_bond && __mlx5_lag_is_active(ldev)) { - mlx5_modify_lag(ldev, &tracker); - } else if (!do_bond && __mlx5_lag_is_active(ldev)) { - mlx5_disable_lag(ldev); - } -} - -static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) -{ - queue_delayed_work(ldev->wq, &ldev->bond_work, delay); -} - -static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev0) - mlx5_esw_lock(dev0->priv.eswitch); - if (dev1) - mlx5_esw_lock(dev1->priv.eswitch); -} - -static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, - struct mlx5_core_dev *dev1) -{ - if (dev1) - mlx5_esw_unlock(dev1->priv.eswitch); - if (dev0) - mlx5_esw_unlock(dev0->priv.eswitch); -} - -static void mlx5_do_bond_work(struct work_struct *work) -{ - struct delayed_work *delayed_work = to_delayed_work(work); - struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, - bond_work); - struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; - struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; - int status; - - status = mlx5_dev_list_trylock(); - if (!status) { - mlx5_queue_bond_work(ldev, HZ); - return; - } - - if (ldev->mode_changes_in_progress) { - mlx5_dev_list_unlock(); - mlx5_queue_bond_work(ldev, HZ); - return; - } - - mlx5_lag_lock_eswitches(dev0, dev1); - mlx5_do_bond(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); - mlx5_dev_list_unlock(); -} - -static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - struct net_device *ndev, - struct netdev_notifier_changeupper_info *info) -{ - struct net_device *upper = info->upper_dev, *ndev_tmp; - struct netdev_lag_upper_info *lag_upper_info = NULL; - bool is_bonded, is_in_lag, mode_supported; - int bond_status = 0; - int num_slaves = 0; - int idx; - - if (!netif_is_lag_master(upper)) - return 0; - - if (info->linking) - lag_upper_info = info->upper_info; - - /* The event may still be of interest if the slave does not belong to - * us, but is enslaved to a master which has one or more of our netdevs - * as slaves (e.g., if a new slave is added to a master that bonds two - * of our netdevs, we should unbond). - */ - rcu_read_lock(); - for_each_netdev_in_bond_rcu(upper, ndev_tmp) { - idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); - if (idx >= 0) - bond_status |= (1 << idx); - - num_slaves++; - } - rcu_read_unlock(); - - /* None of this lagdev's netdevs are slaves of this master. */ - if (!(bond_status & 0x3)) - return 0; - - if (lag_upper_info) - tracker->tx_type = lag_upper_info->tx_type; - - /* Determine bonding status: - * A device is considered bonded if both its physical ports are slaves - * of the same lag master, and only them. - */ - is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; - - if (!mlx5_lag_is_ready(ldev) && is_in_lag) { - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, PF is configured with more than 64 VFs"); - return 0; - } - - /* Lag mode must be activebackup or hash. */ - mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || - tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; - - if (is_in_lag && !mode_supported) - NL_SET_ERR_MSG_MOD(info->info.extack, - "Can't activate LAG offload, TX type isn't supported"); - - is_bonded = is_in_lag && mode_supported; - if (tracker->is_bonded != is_bonded) { - tracker->is_bonded = is_bonded; - return 1; - } - - return 0; -} - -static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - struct net_device *ndev, - struct netdev_notifier_changelowerstate_info *info) -{ - struct netdev_lag_lower_state_info *lag_lower_info; - int idx; - - if (!netif_is_lag_port(ndev)) - return 0; - - idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); - if (idx < 0) - return 0; - - /* This information is used to determine virtual to physical - * port mapping. - */ - lag_lower_info = info->lower_state_info; - if (!lag_lower_info) - return 0; - - tracker->netdev_state[idx] = *lag_lower_info; - - return 1; -} - -static int mlx5_lag_netdev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *ndev = netdev_notifier_info_to_dev(ptr); - struct lag_tracker tracker; - struct mlx5_lag *ldev; - int changed = 0; - - if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) - return NOTIFY_DONE; - - ldev = container_of(this, struct mlx5_lag, nb); - - if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) - return NOTIFY_DONE; - - tracker = ldev->tracker; - - switch (event) { - case NETDEV_CHANGEUPPER: - changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, - ptr); - break; - case NETDEV_CHANGELOWERSTATE: - changed = mlx5_handle_changelowerstate_event(ldev, &tracker, - ndev, ptr); - break; - } - - ldev->tracker = tracker; - - if (changed) - mlx5_queue_bond_work(ldev, 0); - - return NOTIFY_DONE; -} - -static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev, - struct net_device *netdev) -{ - unsigned int fn = mlx5_get_dev_index(dev); - - if (fn >= MLX5_MAX_PORTS) - return; - - spin_lock(&lag_lock); - ldev->pf[fn].netdev = netdev; - ldev->tracker.netdev_state[fn].link_up = 0; - ldev->tracker.netdev_state[fn].tx_enabled = 0; - spin_unlock(&lag_lock); -} - -static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, - struct net_device *netdev) -{ - int i; - - spin_lock(&lag_lock); - for (i = 0; i < MLX5_MAX_PORTS; i++) { - if (ldev->pf[i].netdev == netdev) { - ldev->pf[i].netdev = NULL; - break; - } - } - spin_unlock(&lag_lock); -} - -static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev) -{ - unsigned int fn = mlx5_get_dev_index(dev); - - if (fn >= MLX5_MAX_PORTS) - return; - - ldev->pf[fn].dev = dev; - dev->priv.lag = ldev; -} - -/* Must be called with intf_mutex held */ -static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, - struct mlx5_core_dev *dev) -{ - int i; - - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (ldev->pf[i].dev == dev) - break; - - if (i == MLX5_MAX_PORTS) - return; - - ldev->pf[i].dev = NULL; - dev->priv.lag = NULL; -} - -/* Must be called with intf_mutex held */ -static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev = NULL; - struct mlx5_core_dev *tmp_dev; - - if (!MLX5_CAP_GEN(dev, vport_group_manager) || - !MLX5_CAP_GEN(dev, lag_master) || - MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) - return 0; - - tmp_dev = mlx5_get_next_phys_dev(dev); - if (tmp_dev) - ldev = tmp_dev->priv.lag; - - if (!ldev) { - ldev = mlx5_lag_dev_alloc(dev); - if (!ldev) { - mlx5_core_err(dev, "Failed to alloc lag dev\n"); - return 0; - } - } else { - if (ldev->mode_changes_in_progress) - return -EAGAIN; - mlx5_ldev_get(ldev); - } - - mlx5_ldev_add_mdev(ldev, dev); - - return 0; -} - -void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - - ldev = mlx5_lag_dev(dev); - if (!ldev) - return; - -recheck: - mlx5_dev_list_lock(); - if (ldev->mode_changes_in_progress) { - mlx5_dev_list_unlock(); - msleep(100); - goto recheck; - } - mlx5_ldev_remove_mdev(ldev, dev); - mlx5_dev_list_unlock(); - mlx5_ldev_put(ldev); -} - -void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) -{ - int err; - -recheck: - mlx5_dev_list_lock(); - err = __mlx5_lag_dev_add_mdev(dev); - if (err) { - mlx5_dev_list_unlock(); - msleep(100); - goto recheck; - } - mlx5_dev_list_unlock(); -} - -/* Must be called with intf_mutex held */ -void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, - struct net_device *netdev) -{ - struct mlx5_lag *ldev; - - ldev = mlx5_lag_dev(dev); - if (!ldev) - return; - - mlx5_ldev_remove_netdev(ldev, netdev); - ldev->flags &= ~MLX5_LAG_FLAG_READY; - - if (__mlx5_lag_is_active(ldev)) - mlx5_queue_bond_work(ldev, 0); -} - -/* Must be called with intf_mutex held */ -void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, - struct net_device *netdev) -{ - struct mlx5_lag *ldev; - int i; - - ldev = mlx5_lag_dev(dev); - if (!ldev) - return; - - mlx5_ldev_add_netdev(ldev, dev, netdev); - - for (i = 0; i < MLX5_MAX_PORTS; i++) - if (!ldev->pf[i].dev) - break; - - if (i >= MLX5_MAX_PORTS) - ldev->flags |= MLX5_LAG_FLAG_READY; - mlx5_queue_bond_work(ldev, 0); -} - -bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_roce(ldev); - spin_unlock(&lag_lock); - - return res; -} -EXPORT_SYMBOL(mlx5_lag_is_roce); - -bool mlx5_lag_is_active(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_active(ldev); - spin_unlock(&lag_lock); - - return res; -} -EXPORT_SYMBOL(mlx5_lag_is_active); - -bool mlx5_lag_is_master(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_active(ldev) && - dev == ldev->pf[MLX5_LAG_P1].dev; - spin_unlock(&lag_lock); - - return res; -} -EXPORT_SYMBOL(mlx5_lag_is_master); - -bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_sriov(ldev); - spin_unlock(&lag_lock); - - return res; -} -EXPORT_SYMBOL(mlx5_lag_is_sriov); - -bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; - spin_unlock(&lag_lock); - - return res; -} -EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); - -void mlx5_lag_disable_change(struct mlx5_core_dev *dev) -{ - struct mlx5_core_dev *dev0; - struct mlx5_core_dev *dev1; - struct mlx5_lag *ldev; - - ldev = mlx5_lag_dev(dev); - if (!ldev) - return; - - mlx5_dev_list_lock(); - - dev0 = ldev->pf[MLX5_LAG_P1].dev; - dev1 = ldev->pf[MLX5_LAG_P2].dev; - - ldev->mode_changes_in_progress++; - if (__mlx5_lag_is_active(ldev)) { - mlx5_lag_lock_eswitches(dev0, dev1); - mlx5_disable_lag(ldev); - mlx5_lag_unlock_eswitches(dev0, dev1); - } - mlx5_dev_list_unlock(); -} - -void mlx5_lag_enable_change(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - - ldev = mlx5_lag_dev(dev); - if (!ldev) - return; - - mlx5_dev_list_lock(); - ldev->mode_changes_in_progress--; - mlx5_dev_list_unlock(); - mlx5_queue_bond_work(ldev, 0); -} - -struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) -{ - struct net_device *ndev = NULL; - struct mlx5_lag *ldev; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - - if (!(ldev && __mlx5_lag_is_roce(ldev))) - goto unlock; - - if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { - ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? - ldev->pf[MLX5_LAG_P1].netdev : - ldev->pf[MLX5_LAG_P2].netdev; - } else { - ndev = ldev->pf[MLX5_LAG_P1].netdev; - } - if (ndev) - dev_hold(ndev); - -unlock: - spin_unlock(&lag_lock); - - return ndev; -} -EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); - -u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, - struct net_device *slave) -{ - struct mlx5_lag *ldev; - u8 port = 0; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - if (!(ldev && __mlx5_lag_is_roce(ldev))) - goto unlock; - - if (ldev->pf[MLX5_LAG_P1].netdev == slave) - port = MLX5_LAG_P1; - else - port = MLX5_LAG_P2; - - port = ldev->v2p_map[port]; - -unlock: - spin_unlock(&lag_lock); - return port; -} -EXPORT_SYMBOL(mlx5_lag_get_slave_port); - -struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) -{ - struct mlx5_core_dev *peer_dev = NULL; - struct mlx5_lag *ldev; - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - if (!ldev) - goto unlock; - - peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? - ldev->pf[MLX5_LAG_P2].dev : - ldev->pf[MLX5_LAG_P1].dev; - -unlock: - spin_unlock(&lag_lock); - return peer_dev; -} -EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); - -int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, - u64 *values, - int num_counters, - size_t *offsets) -{ - int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); - struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; - struct mlx5_lag *ldev; - int num_ports; - int ret, i, j; - void *out; - - out = kvzalloc(outlen, GFP_KERNEL); - if (!out) - return -ENOMEM; - - memset(values, 0, sizeof(*values) * num_counters); - - spin_lock(&lag_lock); - ldev = mlx5_lag_dev(dev); - if (ldev && __mlx5_lag_is_active(ldev)) { - num_ports = MLX5_MAX_PORTS; - mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; - mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; - } else { - num_ports = 1; - mdev[MLX5_LAG_P1] = dev; - } - spin_unlock(&lag_lock); - - for (i = 0; i < num_ports; ++i) { - u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; - - MLX5_SET(query_cong_statistics_in, in, opcode, - MLX5_CMD_OP_QUERY_CONG_STATISTICS); - ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, - out); - if (ret) - goto free; - - for (j = 0; j < num_counters; ++j) - values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); - } - -free: - kvfree(out); - return ret; -} -EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag.h deleted file mode 100644 index d4bae528954e6..0000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.h +++ /dev/null @@ -1,81 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2019 Mellanox Technologies. */ - -#ifndef __MLX5_LAG_H__ -#define __MLX5_LAG_H__ - -#include "mlx5_core.h" -#include "lag_mp.h" - -enum { - MLX5_LAG_P1, - MLX5_LAG_P2, -}; - -enum { - MLX5_LAG_FLAG_ROCE = 1 << 0, - MLX5_LAG_FLAG_SRIOV = 1 << 1, - MLX5_LAG_FLAG_MULTIPATH = 1 << 2, - MLX5_LAG_FLAG_READY = 1 << 3, -}; - -#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ - MLX5_LAG_FLAG_MULTIPATH) - -struct lag_func { - struct mlx5_core_dev *dev; - struct net_device *netdev; -}; - -/* Used for collection of netdev event info. */ -struct lag_tracker { - enum netdev_lag_tx_type tx_type; - struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; - unsigned int is_bonded:1; -}; - -/* LAG data of a ConnectX card. - * It serves both its phys functions. - */ -struct mlx5_lag { - u8 flags; - int mode_changes_in_progress; - bool shared_fdb; - u8 v2p_map[MLX5_MAX_PORTS]; - struct kref ref; - struct lag_func pf[MLX5_MAX_PORTS]; - struct lag_tracker tracker; - struct workqueue_struct *wq; - struct delayed_work bond_work; - struct notifier_block nb; - struct lag_mp lag_mp; -}; - -static inline struct mlx5_lag * -mlx5_lag_dev(struct mlx5_core_dev *dev) -{ - return dev->priv.lag; -} - -static inline bool -__mlx5_lag_is_active(struct mlx5_lag *ldev) -{ - return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); -} - -static inline bool -mlx5_lag_is_ready(struct mlx5_lag *ldev) -{ - return ldev->flags & MLX5_LAG_FLAG_READY; -} - -void mlx5_modify_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker); -int mlx5_activate_lag(struct mlx5_lag *ldev, - struct lag_tracker *tracker, - u8 flags, - bool shared_fdb); -int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, - struct net_device *ndev); - -#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c new file mode 100644 index 0000000000000..b37724fc53873 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.c @@ -0,0 +1,1082 @@ +/* + * Copyright (c) 2016, Mellanox Technologies. All rights reserved. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * OpenIB.org BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include +#include +#include +#include +#include "lib/devcom.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lag.h" +#include "mp.h" + +/* General purpose, use for short periods of time. + * Beware of lock dependencies (preferably, no locks should be acquired + * under it). + */ +static DEFINE_SPINLOCK(lag_lock); + +static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2, bool shared_fdb) +{ + u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {}; + void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx); + + MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb); + + return mlx5_cmd_exec_in(dev, create_lag, in); +} + +static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1, + u8 remap_port2) +{ + u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {}; + void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx); + + MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG); + MLX5_SET(modify_lag_in, in, field_select, 0x1); + + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1); + MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2); + + return mlx5_cmd_exec_in(dev, modify_lag, in); +} + +int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {}; + + MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, create_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_create_vport_lag); + +int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev) +{ + u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {}; + + MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG); + + return mlx5_cmd_exec_in(dev, destroy_vport_lag, in); +} +EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag); + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr); +static void mlx5_do_bond_work(struct work_struct *work); + +static void mlx5_ldev_free(struct kref *ref) +{ + struct mlx5_lag *ldev = container_of(ref, struct mlx5_lag, ref); + + if (ldev->nb.notifier_call) + unregister_netdevice_notifier_net(&init_net, &ldev->nb); + mlx5_lag_mp_cleanup(ldev); + cancel_delayed_work_sync(&ldev->bond_work); + destroy_workqueue(ldev->wq); + kfree(ldev); +} + +static void mlx5_ldev_put(struct mlx5_lag *ldev) +{ + kref_put(&ldev->ref, mlx5_ldev_free); +} + +static void mlx5_ldev_get(struct mlx5_lag *ldev) +{ + kref_get(&ldev->ref); +} + +static struct mlx5_lag *mlx5_lag_dev_alloc(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + int err; + + ldev = kzalloc(sizeof(*ldev), GFP_KERNEL); + if (!ldev) + return NULL; + + ldev->wq = create_singlethread_workqueue("mlx5_lag"); + if (!ldev->wq) { + kfree(ldev); + return NULL; + } + + kref_init(&ldev->ref); + INIT_DELAYED_WORK(&ldev->bond_work, mlx5_do_bond_work); + + ldev->nb.notifier_call = mlx5_lag_netdev_event; + if (register_netdevice_notifier_net(&init_net, &ldev->nb)) { + ldev->nb.notifier_call = NULL; + mlx5_core_err(dev, "Failed to register LAG netdev notifier\n"); + } + + err = mlx5_lag_mp_init(ldev); + if (err) + mlx5_core_err(dev, "Failed to init multipath lag err=%d\n", + err); + + return ldev; +} + +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].netdev == ndev) + return i; + + return -ENOENT; +} + +static bool __mlx5_lag_is_roce(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_ROCE); +} + +static bool __mlx5_lag_is_sriov(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_SRIOV); +} + +static void mlx5_infer_tx_affinity_mapping(struct lag_tracker *tracker, + u8 *port1, u8 *port2) +{ + bool p1en; + bool p2en; + + p1en = tracker->netdev_state[MLX5_LAG_P1].tx_enabled && + tracker->netdev_state[MLX5_LAG_P1].link_up; + + p2en = tracker->netdev_state[MLX5_LAG_P2].tx_enabled && + tracker->netdev_state[MLX5_LAG_P2].link_up; + + *port1 = 1; + *port2 = 2; + if ((!p1en && !p2en) || (p1en && p2en)) + return; + + if (p1en) + *port2 = 1; + else + *port1 = 2; +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u8 v2p_port1, v2p_port2; + int err; + + mlx5_infer_tx_affinity_mapping(tracker, &v2p_port1, + &v2p_port2); + + if (v2p_port1 != ldev->v2p_map[MLX5_LAG_P1] || + v2p_port2 != ldev->v2p_map[MLX5_LAG_P2]) { + ldev->v2p_map[MLX5_LAG_P1] = v2p_port1; + ldev->v2p_map[MLX5_LAG_P2] = v2p_port2; + + mlx5_core_info(dev0, "modify lag map port 1:%d port 2:%d", + ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2]); + + err = mlx5_cmd_modify_lag(dev0, v2p_port1, v2p_port2); + if (err) + mlx5_core_err(dev0, + "Failed to modify LAG (%d)\n", + err); + } +} + +static int mlx5_create_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + bool shared_fdb) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + int err; + + mlx5_infer_tx_affinity_mapping(tracker, &ldev->v2p_map[MLX5_LAG_P1], + &ldev->v2p_map[MLX5_LAG_P2]); + + mlx5_core_info(dev0, "lag map port 1:%d port 2:%d shared_fdb:%d", + ldev->v2p_map[MLX5_LAG_P1], ldev->v2p_map[MLX5_LAG_P2], + shared_fdb); + + err = mlx5_cmd_create_lag(dev0, ldev->v2p_map[MLX5_LAG_P1], + ldev->v2p_map[MLX5_LAG_P2], shared_fdb); + if (err) { + mlx5_core_err(dev0, + "Failed to create LAG (%d)\n", + err); + return err; + } + + if (shared_fdb) { + err = mlx5_eswitch_offloads_config_single_fdb(dev0->priv.eswitch, + dev1->priv.eswitch); + if (err) + mlx5_core_err(dev0, "Can't enable single FDB mode\n"); + else + mlx5_core_info(dev0, "Operation mode is single FDB\n"); + } + + if (err) { + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + if (mlx5_cmd_exec_in(dev0, destroy_lag, in)) + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } + + return err; +} + +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags, + bool shared_fdb) +{ + bool roce_lag = !!(flags & MLX5_LAG_FLAG_ROCE); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + int err; + + err = mlx5_create_lag(ldev, tracker, shared_fdb); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to activate RoCE LAG\n"); + } else { + mlx5_core_err(dev0, + "Failed to activate VF LAG\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + return err; + } + + ldev->flags |= flags; + ldev->shared_fdb = shared_fdb; + return 0; +} + +static int mlx5_deactivate_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {}; + bool roce_lag = __mlx5_lag_is_roce(ldev); + int err; + + ldev->flags &= ~MLX5_LAG_MODE_FLAGS; + mlx5_lag_mp_reset(ldev); + + if (ldev->shared_fdb) { + mlx5_eswitch_offloads_destroy_single_fdb(ldev->pf[MLX5_LAG_P1].dev->priv.eswitch, + ldev->pf[MLX5_LAG_P2].dev->priv.eswitch); + ldev->shared_fdb = false; + } + + MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG); + err = mlx5_cmd_exec_in(dev0, destroy_lag, in); + if (err) { + if (roce_lag) { + mlx5_core_err(dev0, + "Failed to deactivate RoCE LAG; driver restart required\n"); + } else { + mlx5_core_err(dev0, + "Failed to deactivate VF LAG; driver restart required\n" + "Make sure all VFs are unbound prior to VF LAG activation or deactivation\n"); + } + } + + return err; +} + +static bool mlx5_lag_check_prereq(struct mlx5_lag *ldev) +{ + if (!ldev->pf[MLX5_LAG_P1].dev || !ldev->pf[MLX5_LAG_P2].dev) + return false; + +#ifdef CONFIG_MLX5_ESWITCH + return mlx5_esw_lag_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); +#else + return (!mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P1].dev) && + !mlx5_sriov_is_enabled(ldev->pf[MLX5_LAG_P2].dev)); +#endif +} + +static void mlx5_lag_add_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +static void mlx5_lag_remove_devices(struct mlx5_lag *ldev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (!ldev->pf[i].dev) + continue; + + if (ldev->pf[i].dev->priv.flags & + MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV) + continue; + + ldev->pf[i].dev->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(ldev->pf[i].dev); + } +} + +static void mlx5_disable_lag(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + bool shared_fdb = ldev->shared_fdb; + bool roce_lag; + int err; + + roce_lag = __mlx5_lag_is_roce(ldev); + + if (shared_fdb) { + mlx5_lag_remove_devices(ldev); + } else if (roce_lag) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + } + mlx5_nic_vport_disable_roce(dev1); + } + + err = mlx5_deactivate_lag(ldev); + if (err) + return; + + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + if (shared_fdb) { + if (!(dev0->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!(dev1->priv.flags & MLX5_PRIV_FLAGS_DISABLE_ALL_ADEV)) + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + } +} + +static bool mlx5_shared_fdb_supported(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + + if (is_mdev_switchdev_mode(dev0) && + is_mdev_switchdev_mode(dev1) && + mlx5_eswitch_vport_match_metadata_enabled(dev0->priv.eswitch) && + mlx5_eswitch_vport_match_metadata_enabled(dev1->priv.eswitch) && + mlx5_devcom_is_paired(dev0->priv.devcom, + MLX5_DEVCOM_ESW_OFFLOADS) && + MLX5_CAP_GEN(dev1, lag_native_fdb_selection) && + MLX5_CAP_ESW(dev1, root_ft_on_other_esw) && + MLX5_CAP_ESW(dev0, esw_shared_ingress_acl)) + return true; + + return false; +} + +static void mlx5_do_bond(struct mlx5_lag *ldev) +{ + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + struct lag_tracker tracker; + bool do_bond, roce_lag; + int err; + + if (!mlx5_lag_is_ready(ldev)) { + do_bond = false; + } else { + tracker = ldev->tracker; + + do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev); + } + + if (do_bond && !__mlx5_lag_is_active(ldev)) { + bool shared_fdb = mlx5_shared_fdb_supported(ldev); + + roce_lag = !mlx5_sriov_is_enabled(dev0) && + !mlx5_sriov_is_enabled(dev1); + +#ifdef CONFIG_MLX5_ESWITCH + roce_lag = roce_lag && + dev0->priv.eswitch->mode == MLX5_ESWITCH_NONE && + dev1->priv.eswitch->mode == MLX5_ESWITCH_NONE; +#endif + + if (shared_fdb || roce_lag) + mlx5_lag_remove_devices(ldev); + + err = mlx5_activate_lag(ldev, &tracker, + roce_lag ? MLX5_LAG_FLAG_ROCE : + MLX5_LAG_FLAG_SRIOV, + shared_fdb); + if (err) { + if (shared_fdb || roce_lag) + mlx5_lag_add_devices(ldev); + + return; + } else if (roce_lag) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_nic_vport_enable_roce(dev1); + } else if (shared_fdb) { + dev0->priv.flags &= ~MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + + err = mlx5_eswitch_reload_reps(dev0->priv.eswitch); + if (!err) + err = mlx5_eswitch_reload_reps(dev1->priv.eswitch); + + if (err) { + dev0->priv.flags |= MLX5_PRIV_FLAGS_DISABLE_IB_ADEV; + mlx5_rescan_drivers_locked(dev0); + mlx5_deactivate_lag(ldev); + mlx5_lag_add_devices(ldev); + mlx5_eswitch_reload_reps(dev0->priv.eswitch); + mlx5_eswitch_reload_reps(dev1->priv.eswitch); + mlx5_core_err(dev0, "Failed to enable lag\n"); + return; + } + } + } else if (do_bond && __mlx5_lag_is_active(ldev)) { + mlx5_modify_lag(ldev, &tracker); + } else if (!do_bond && __mlx5_lag_is_active(ldev)) { + mlx5_disable_lag(ldev); + } +} + +static void mlx5_queue_bond_work(struct mlx5_lag *ldev, unsigned long delay) +{ + queue_delayed_work(ldev->wq, &ldev->bond_work, delay); +} + +static void mlx5_lag_lock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev0) + mlx5_esw_lock(dev0->priv.eswitch); + if (dev1) + mlx5_esw_lock(dev1->priv.eswitch); +} + +static void mlx5_lag_unlock_eswitches(struct mlx5_core_dev *dev0, + struct mlx5_core_dev *dev1) +{ + if (dev1) + mlx5_esw_unlock(dev1->priv.eswitch); + if (dev0) + mlx5_esw_unlock(dev0->priv.eswitch); +} + +static void mlx5_do_bond_work(struct work_struct *work) +{ + struct delayed_work *delayed_work = to_delayed_work(work); + struct mlx5_lag *ldev = container_of(delayed_work, struct mlx5_lag, + bond_work); + struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev; + struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev; + int status; + + status = mlx5_dev_list_trylock(); + if (!status) { + mlx5_queue_bond_work(ldev, HZ); + return; + } + + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, HZ); + return; + } + + mlx5_lag_lock_eswitches(dev0, dev1); + mlx5_do_bond(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); + mlx5_dev_list_unlock(); +} + +static int mlx5_handle_changeupper_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changeupper_info *info) +{ + struct net_device *upper = info->upper_dev, *ndev_tmp; + struct netdev_lag_upper_info *lag_upper_info = NULL; + bool is_bonded, is_in_lag, mode_supported; + int bond_status = 0; + int num_slaves = 0; + int idx; + + if (!netif_is_lag_master(upper)) + return 0; + + if (info->linking) + lag_upper_info = info->upper_info; + + /* The event may still be of interest if the slave does not belong to + * us, but is enslaved to a master which has one or more of our netdevs + * as slaves (e.g., if a new slave is added to a master that bonds two + * of our netdevs, we should unbond). + */ + rcu_read_lock(); + for_each_netdev_in_bond_rcu(upper, ndev_tmp) { + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev_tmp); + if (idx >= 0) + bond_status |= (1 << idx); + + num_slaves++; + } + rcu_read_unlock(); + + /* None of this lagdev's netdevs are slaves of this master. */ + if (!(bond_status & 0x3)) + return 0; + + if (lag_upper_info) + tracker->tx_type = lag_upper_info->tx_type; + + /* Determine bonding status: + * A device is considered bonded if both its physical ports are slaves + * of the same lag master, and only them. + */ + is_in_lag = num_slaves == MLX5_MAX_PORTS && bond_status == 0x3; + + if (!mlx5_lag_is_ready(ldev) && is_in_lag) { + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, PF is configured with more than 64 VFs"); + return 0; + } + + /* Lag mode must be activebackup or hash. */ + mode_supported = tracker->tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP || + tracker->tx_type == NETDEV_LAG_TX_TYPE_HASH; + + if (is_in_lag && !mode_supported) + NL_SET_ERR_MSG_MOD(info->info.extack, + "Can't activate LAG offload, TX type isn't supported"); + + is_bonded = is_in_lag && mode_supported; + if (tracker->is_bonded != is_bonded) { + tracker->is_bonded = is_bonded; + return 1; + } + + return 0; +} + +static int mlx5_handle_changelowerstate_event(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + struct net_device *ndev, + struct netdev_notifier_changelowerstate_info *info) +{ + struct netdev_lag_lower_state_info *lag_lower_info; + int idx; + + if (!netif_is_lag_port(ndev)) + return 0; + + idx = mlx5_lag_dev_get_netdev_idx(ldev, ndev); + if (idx < 0) + return 0; + + /* This information is used to determine virtual to physical + * port mapping. + */ + lag_lower_info = info->lower_state_info; + if (!lag_lower_info) + return 0; + + tracker->netdev_state[idx] = *lag_lower_info; + + return 1; +} + +static int mlx5_lag_netdev_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct net_device *ndev = netdev_notifier_info_to_dev(ptr); + struct lag_tracker tracker; + struct mlx5_lag *ldev; + int changed = 0; + + if ((event != NETDEV_CHANGEUPPER) && (event != NETDEV_CHANGELOWERSTATE)) + return NOTIFY_DONE; + + ldev = container_of(this, struct mlx5_lag, nb); + + if (!mlx5_lag_is_ready(ldev) && event == NETDEV_CHANGELOWERSTATE) + return NOTIFY_DONE; + + tracker = ldev->tracker; + + switch (event) { + case NETDEV_CHANGEUPPER: + changed = mlx5_handle_changeupper_event(ldev, &tracker, ndev, + ptr); + break; + case NETDEV_CHANGELOWERSTATE: + changed = mlx5_handle_changelowerstate_event(ldev, &tracker, + ndev, ptr); + break; + } + + ldev->tracker = tracker; + + if (changed) + mlx5_queue_bond_work(ldev, 0); + + return NOTIFY_DONE; +} + +static void mlx5_ldev_add_netdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + + if (fn >= MLX5_MAX_PORTS) + return; + + spin_lock(&lag_lock); + ldev->pf[fn].netdev = netdev; + ldev->tracker.netdev_state[fn].link_up = 0; + ldev->tracker.netdev_state[fn].tx_enabled = 0; + spin_unlock(&lag_lock); +} + +static void mlx5_ldev_remove_netdev(struct mlx5_lag *ldev, + struct net_device *netdev) +{ + int i; + + spin_lock(&lag_lock); + for (i = 0; i < MLX5_MAX_PORTS; i++) { + if (ldev->pf[i].netdev == netdev) { + ldev->pf[i].netdev = NULL; + break; + } + } + spin_unlock(&lag_lock); +} + +static void mlx5_ldev_add_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + unsigned int fn = mlx5_get_dev_index(dev); + + if (fn >= MLX5_MAX_PORTS) + return; + + ldev->pf[fn].dev = dev; + dev->priv.lag = ldev; +} + +/* Must be called with intf_mutex held */ +static void mlx5_ldev_remove_mdev(struct mlx5_lag *ldev, + struct mlx5_core_dev *dev) +{ + int i; + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (ldev->pf[i].dev == dev) + break; + + if (i == MLX5_MAX_PORTS) + return; + + ldev->pf[i].dev = NULL; + dev->priv.lag = NULL; +} + +/* Must be called with intf_mutex held */ +static int __mlx5_lag_dev_add_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev = NULL; + struct mlx5_core_dev *tmp_dev; + + if (!MLX5_CAP_GEN(dev, vport_group_manager) || + !MLX5_CAP_GEN(dev, lag_master) || + MLX5_CAP_GEN(dev, num_lag_ports) != MLX5_MAX_PORTS) + return 0; + + tmp_dev = mlx5_get_next_phys_dev(dev); + if (tmp_dev) + ldev = tmp_dev->priv.lag; + + if (!ldev) { + ldev = mlx5_lag_dev_alloc(dev); + if (!ldev) { + mlx5_core_err(dev, "Failed to alloc lag dev\n"); + return 0; + } + } else { + if (ldev->mode_changes_in_progress) + return -EAGAIN; + mlx5_ldev_get(ldev); + } + + mlx5_ldev_add_mdev(ldev, dev); + + return 0; +} + +void mlx5_lag_remove_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + +recheck: + mlx5_dev_list_lock(); + if (ldev->mode_changes_in_progress) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } + mlx5_ldev_remove_mdev(ldev, dev); + mlx5_dev_list_unlock(); + mlx5_ldev_put(ldev); +} + +void mlx5_lag_add_mdev(struct mlx5_core_dev *dev) +{ + int err; + +recheck: + mlx5_dev_list_lock(); + err = __mlx5_lag_dev_add_mdev(dev); + if (err) { + mlx5_dev_list_unlock(); + msleep(100); + goto recheck; + } + mlx5_dev_list_unlock(); +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_remove_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_ldev_remove_netdev(ldev, netdev); + ldev->flags &= ~MLX5_LAG_FLAG_READY; + + if (__mlx5_lag_is_active(ldev)) + mlx5_queue_bond_work(ldev, 0); +} + +/* Must be called with intf_mutex held */ +void mlx5_lag_add_netdev(struct mlx5_core_dev *dev, + struct net_device *netdev) +{ + struct mlx5_lag *ldev; + int i; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_ldev_add_netdev(ldev, dev, netdev); + + for (i = 0; i < MLX5_MAX_PORTS; i++) + if (!ldev->pf[i].dev) + break; + + if (i >= MLX5_MAX_PORTS) + ldev->flags |= MLX5_LAG_FLAG_READY; + mlx5_queue_bond_work(ldev, 0); +} + +bool mlx5_lag_is_roce(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_roce(ldev); + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_roce); + +bool mlx5_lag_is_active(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev); + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_active); + +bool mlx5_lag_is_master(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_active(ldev) && + dev == ldev->pf[MLX5_LAG_P1].dev; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_master); + +bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev); + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_sriov); + +bool mlx5_lag_is_shared_fdb(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_sriov(ldev) && ldev->shared_fdb; + spin_unlock(&lag_lock); + + return res; +} +EXPORT_SYMBOL(mlx5_lag_is_shared_fdb); + +void mlx5_lag_disable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *dev0; + struct mlx5_core_dev *dev1; + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); + + dev0 = ldev->pf[MLX5_LAG_P1].dev; + dev1 = ldev->pf[MLX5_LAG_P2].dev; + + ldev->mode_changes_in_progress++; + if (__mlx5_lag_is_active(ldev)) { + mlx5_lag_lock_eswitches(dev0, dev1); + mlx5_disable_lag(ldev); + mlx5_lag_unlock_eswitches(dev0, dev1); + } + mlx5_dev_list_unlock(); +} + +void mlx5_lag_enable_change(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + + ldev = mlx5_lag_dev(dev); + if (!ldev) + return; + + mlx5_dev_list_lock(); + ldev->mode_changes_in_progress--; + mlx5_dev_list_unlock(); + mlx5_queue_bond_work(ldev, 0); +} + +struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev) +{ + struct net_device *ndev = NULL; + struct mlx5_lag *ldev; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->tracker.tx_type == NETDEV_LAG_TX_TYPE_ACTIVEBACKUP) { + ndev = ldev->tracker.netdev_state[MLX5_LAG_P1].tx_enabled ? + ldev->pf[MLX5_LAG_P1].netdev : + ldev->pf[MLX5_LAG_P2].netdev; + } else { + ndev = ldev->pf[MLX5_LAG_P1].netdev; + } + if (ndev) + dev_hold(ndev); + +unlock: + spin_unlock(&lag_lock); + + return ndev; +} +EXPORT_SYMBOL(mlx5_lag_get_roce_netdev); + +u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev, + struct net_device *slave) +{ + struct mlx5_lag *ldev; + u8 port = 0; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (!(ldev && __mlx5_lag_is_roce(ldev))) + goto unlock; + + if (ldev->pf[MLX5_LAG_P1].netdev == slave) + port = MLX5_LAG_P1; + else + port = MLX5_LAG_P2; + + port = ldev->v2p_map[port]; + +unlock: + spin_unlock(&lag_lock); + return port; +} +EXPORT_SYMBOL(mlx5_lag_get_slave_port); + +struct mlx5_core_dev *mlx5_lag_get_peer_mdev(struct mlx5_core_dev *dev) +{ + struct mlx5_core_dev *peer_dev = NULL; + struct mlx5_lag *ldev; + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (!ldev) + goto unlock; + + peer_dev = ldev->pf[MLX5_LAG_P1].dev == dev ? + ldev->pf[MLX5_LAG_P2].dev : + ldev->pf[MLX5_LAG_P1].dev; + +unlock: + spin_unlock(&lag_lock); + return peer_dev; +} +EXPORT_SYMBOL(mlx5_lag_get_peer_mdev); + +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets) +{ + int outlen = MLX5_ST_SZ_BYTES(query_cong_statistics_out); + struct mlx5_core_dev *mdev[MLX5_MAX_PORTS]; + struct mlx5_lag *ldev; + int num_ports; + int ret, i, j; + void *out; + + out = kvzalloc(outlen, GFP_KERNEL); + if (!out) + return -ENOMEM; + + memset(values, 0, sizeof(*values) * num_counters); + + spin_lock(&lag_lock); + ldev = mlx5_lag_dev(dev); + if (ldev && __mlx5_lag_is_active(ldev)) { + num_ports = MLX5_MAX_PORTS; + mdev[MLX5_LAG_P1] = ldev->pf[MLX5_LAG_P1].dev; + mdev[MLX5_LAG_P2] = ldev->pf[MLX5_LAG_P2].dev; + } else { + num_ports = 1; + mdev[MLX5_LAG_P1] = dev; + } + spin_unlock(&lag_lock); + + for (i = 0; i < num_ports; ++i) { + u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {}; + + MLX5_SET(query_cong_statistics_in, in, opcode, + MLX5_CMD_OP_QUERY_CONG_STATISTICS); + ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in, + out); + if (ret) + goto free; + + for (j = 0; j < num_counters; ++j) + values[j] += be64_to_cpup((__be64 *)(out + offsets[j])); + } + +free: + kvfree(out); + return ret; +} +EXPORT_SYMBOL(mlx5_lag_query_cong_counters); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h new file mode 100644 index 0000000000000..c268663c89b41 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/lag.h @@ -0,0 +1,81 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_H__ +#define __MLX5_LAG_H__ + +#include "mlx5_core.h" +#include "mp.h" + +enum { + MLX5_LAG_P1, + MLX5_LAG_P2, +}; + +enum { + MLX5_LAG_FLAG_ROCE = 1 << 0, + MLX5_LAG_FLAG_SRIOV = 1 << 1, + MLX5_LAG_FLAG_MULTIPATH = 1 << 2, + MLX5_LAG_FLAG_READY = 1 << 3, +}; + +#define MLX5_LAG_MODE_FLAGS (MLX5_LAG_FLAG_ROCE | MLX5_LAG_FLAG_SRIOV |\ + MLX5_LAG_FLAG_MULTIPATH) + +struct lag_func { + struct mlx5_core_dev *dev; + struct net_device *netdev; +}; + +/* Used for collection of netdev event info. */ +struct lag_tracker { + enum netdev_lag_tx_type tx_type; + struct netdev_lag_lower_state_info netdev_state[MLX5_MAX_PORTS]; + unsigned int is_bonded:1; +}; + +/* LAG data of a ConnectX card. + * It serves both its phys functions. + */ +struct mlx5_lag { + u8 flags; + int mode_changes_in_progress; + bool shared_fdb; + u8 v2p_map[MLX5_MAX_PORTS]; + struct kref ref; + struct lag_func pf[MLX5_MAX_PORTS]; + struct lag_tracker tracker; + struct workqueue_struct *wq; + struct delayed_work bond_work; + struct notifier_block nb; + struct lag_mp lag_mp; +}; + +static inline struct mlx5_lag * +mlx5_lag_dev(struct mlx5_core_dev *dev) +{ + return dev->priv.lag; +} + +static inline bool +__mlx5_lag_is_active(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_MODE_FLAGS); +} + +static inline bool +mlx5_lag_is_ready(struct mlx5_lag *ldev) +{ + return ldev->flags & MLX5_LAG_FLAG_READY; +} + +void mlx5_modify_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker); +int mlx5_activate_lag(struct mlx5_lag *ldev, + struct lag_tracker *tracker, + u8 flags, + bool shared_fdb); +int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev, + struct net_device *ndev); + +#endif /* __MLX5_LAG_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c new file mode 100644 index 0000000000000..810a15b83b9f1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.c @@ -0,0 +1,352 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2019 Mellanox Technologies. */ + +#include +#include +#include "lag/lag.h" +#include "lag/mp.h" +#include "mlx5_core.h" +#include "eswitch.h" +#include "lib/mlx5.h" + +static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) +{ + if (!mlx5_lag_is_ready(ldev)) + return false; + + return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, + ldev->pf[MLX5_LAG_P2].dev); +} + +static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) +{ + return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); +} + +bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) +{ + struct mlx5_lag *ldev; + bool res; + + ldev = mlx5_lag_dev(dev); + res = ldev && __mlx5_lag_is_multipath(ldev); + + return res; +} + +/** + * mlx5_lag_set_port_affinity + * + * @ldev: lag device + * @port: + * 0 - set normal affinity. + * 1 - set affinity to port 1. + * 2 - set affinity to port 2. + * + **/ +static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, + enum mlx5_lag_port_affinity port) +{ + struct lag_tracker tracker; + + if (!__mlx5_lag_is_multipath(ldev)) + return; + + switch (port) { + case MLX5_LAG_NORMAL_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + case MLX5_LAG_P1_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P1].link_up = true; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P2].link_up = false; + break; + case MLX5_LAG_P2_AFFINITY: + tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; + tracker.netdev_state[MLX5_LAG_P1].link_up = false; + tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; + tracker.netdev_state[MLX5_LAG_P2].link_up = true; + break; + default: + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Invalid affinity port %d", port); + return; + } + + if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) + mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, + MLX5_DEV_EVENT_PORT_AFFINITY, + (void *)0); + + mlx5_modify_lag(ldev, &tracker); +} + +static void mlx5_lag_fib_event_flush(struct notifier_block *nb) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + + flush_workqueue(mp->wq); +} + +struct mlx5_fib_event_work { + struct work_struct work; + struct mlx5_lag *ldev; + unsigned long event; + union { + struct fib_entry_notifier_info fen_info; + struct fib_nh_notifier_info fnh_info; + }; +}; + +static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + struct fib_nh *fib_nh0, *fib_nh1; + unsigned int nhs; + + /* Handle delete event */ + if (event == FIB_EVENT_ENTRY_DEL) { + /* stop track */ + if (mp->mfi == fi) + mp->mfi = NULL; + return; + } + + /* Handle add/replace event */ + nhs = fib_info_num_path(fi); + if (nhs == 1) { + if (__mlx5_lag_is_active(ldev)) { + struct fib_nh *nh = fib_info_nh(fi, 0); + struct net_device *nh_dev = nh->fib_nh_dev; + int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); + + if (i < 0) + i = MLX5_LAG_NORMAL_AFFINITY; + else + ++i; + + mlx5_lag_set_port_affinity(ldev, i); + } + return; + } + + if (nhs != 2) + return; + + /* Verify next hops are ports of the same hca */ + fib_nh0 = fib_info_nh(fi, 0); + fib_nh1 = fib_info_nh(fi, 1); + if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && + !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && + fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { + mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, + "Multipath offload require two ports of the same HCA\n"); + return; + } + + /* First time we see multipath route */ + if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { + struct lag_tracker tracker; + + tracker = ldev->tracker; + mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); + } + + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + mp->mfi = fi; +} + +static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, + unsigned long event, + struct fib_nh *fib_nh, + struct fib_info *fi) +{ + struct lag_mp *mp = &ldev->lag_mp; + + /* Check the nh event is related to the route */ + if (!mp->mfi || mp->mfi != fi) + return; + + /* nh added/removed */ + if (event == FIB_EVENT_NH_DEL) { + int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); + + if (i >= 0) { + i = (i + 1) % 2 + 1; /* peer port */ + mlx5_lag_set_port_affinity(ldev, i); + } + } else if (event == FIB_EVENT_NH_ADD && + fib_info_num_path(fi) == 2) { + mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); + } +} + +static void mlx5_lag_fib_update(struct work_struct *work) +{ + struct mlx5_fib_event_work *fib_work = + container_of(work, struct mlx5_fib_event_work, work); + struct mlx5_lag *ldev = fib_work->ldev; + struct fib_nh *fib_nh; + + /* Protect internal structures from changes */ + rtnl_lock(); + switch (fib_work->event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + mlx5_lag_fib_route_event(ldev, fib_work->event, + fib_work->fen_info.fi); + fib_info_put(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fib_nh = fib_work->fnh_info.fib_nh; + mlx5_lag_fib_nexthop_event(ldev, + fib_work->event, + fib_work->fnh_info.fib_nh, + fib_nh->nh_parent); + fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); + break; + } + + rtnl_unlock(); + kfree(fib_work); +} + +static struct mlx5_fib_event_work * +mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) +{ + struct mlx5_fib_event_work *fib_work; + + fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); + if (WARN_ON(!fib_work)) + return NULL; + + INIT_WORK(&fib_work->work, mlx5_lag_fib_update); + fib_work->ldev = ldev; + fib_work->event = event; + + return fib_work; +} + +static int mlx5_lag_fib_event(struct notifier_block *nb, + unsigned long event, + void *ptr) +{ + struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); + struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); + struct fib_notifier_info *info = ptr; + struct mlx5_fib_event_work *fib_work; + struct fib_entry_notifier_info *fen_info; + struct fib_nh_notifier_info *fnh_info; + struct net_device *fib_dev; + struct fib_info *fi; + + if (info->family != AF_INET) + return NOTIFY_DONE; + + if (!mlx5_lag_multipath_check_prereq(ldev)) + return NOTIFY_DONE; + + switch (event) { + case FIB_EVENT_ENTRY_REPLACE: + case FIB_EVENT_ENTRY_DEL: + fen_info = container_of(info, struct fib_entry_notifier_info, + info); + fi = fen_info->fi; + if (fi->nh) { + NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); + return notifier_from_errno(-EINVAL); + } + fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; + if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && + fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { + return NOTIFY_DONE; + } + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fen_info = *fen_info; + /* Take reference on fib_info to prevent it from being + * freed while work is queued. Release it afterwards. + */ + fib_info_hold(fib_work->fen_info.fi); + break; + case FIB_EVENT_NH_ADD: + case FIB_EVENT_NH_DEL: + fnh_info = container_of(info, struct fib_nh_notifier_info, + info); + fib_work = mlx5_lag_init_fib_work(ldev, event); + if (!fib_work) + return NOTIFY_DONE; + fib_work->fnh_info = *fnh_info; + fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); + break; + default: + return NOTIFY_DONE; + } + + queue_work(mp->wq, &fib_work->work); + + return NOTIFY_DONE; +} + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev) +{ + /* Clear mfi, as it might become stale when a route delete event + * has been missed, see mlx5_lag_fib_route_event(). + */ + ldev->lag_mp.mfi = NULL; +} + +int mlx5_lag_mp_init(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + int err; + + /* always clear mfi, as it might become stale when a route delete event + * has been missed + */ + mp->mfi = NULL; + + if (mp->fib_nb.notifier_call) + return 0; + + mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); + if (!mp->wq) + return -ENOMEM; + + mp->fib_nb.notifier_call = mlx5_lag_fib_event; + err = register_fib_notifier(&init_net, &mp->fib_nb, + mlx5_lag_fib_event_flush, NULL); + if (err) { + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + } + + return err; +} + +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) +{ + struct lag_mp *mp = &ldev->lag_mp; + + if (!mp->fib_nb.notifier_call) + return; + + unregister_fib_notifier(&init_net, &mp->fib_nb); + destroy_workqueue(mp->wq); + mp->fib_nb.notifier_call = NULL; + mp->mfi = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h new file mode 100644 index 0000000000000..729c839397a89 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag/mp.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2019 Mellanox Technologies. */ + +#ifndef __MLX5_LAG_MP_H__ +#define __MLX5_LAG_MP_H__ + +#include "lag.h" +#include "mlx5_core.h" + +enum mlx5_lag_port_affinity { + MLX5_LAG_NORMAL_AFFINITY, + MLX5_LAG_P1_AFFINITY, + MLX5_LAG_P2_AFFINITY, +}; + +struct lag_mp { + struct notifier_block fib_nb; + struct fib_info *mfi; /* used in tracking fib events */ + struct workqueue_struct *wq; +}; + +#ifdef CONFIG_MLX5_ESWITCH + +void mlx5_lag_mp_reset(struct mlx5_lag *ldev); +int mlx5_lag_mp_init(struct mlx5_lag *ldev); +void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); + +#else /* CONFIG_MLX5_ESWITCH */ + +static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; +static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } +static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} + +#endif /* CONFIG_MLX5_ESWITCH */ +#endif /* __MLX5_LAG_MP_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c deleted file mode 100644 index f239b352a58a1..0000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.c +++ /dev/null @@ -1,352 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2019 Mellanox Technologies. */ - -#include -#include -#include "lag.h" -#include "lag_mp.h" -#include "mlx5_core.h" -#include "eswitch.h" -#include "lib/mlx5.h" - -static bool mlx5_lag_multipath_check_prereq(struct mlx5_lag *ldev) -{ - if (!mlx5_lag_is_ready(ldev)) - return false; - - return mlx5_esw_multipath_prereq(ldev->pf[MLX5_LAG_P1].dev, - ldev->pf[MLX5_LAG_P2].dev); -} - -static bool __mlx5_lag_is_multipath(struct mlx5_lag *ldev) -{ - return !!(ldev->flags & MLX5_LAG_FLAG_MULTIPATH); -} - -bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev) -{ - struct mlx5_lag *ldev; - bool res; - - ldev = mlx5_lag_dev(dev); - res = ldev && __mlx5_lag_is_multipath(ldev); - - return res; -} - -/** - * mlx5_lag_set_port_affinity - * - * @ldev: lag device - * @port: - * 0 - set normal affinity. - * 1 - set affinity to port 1. - * 2 - set affinity to port 2. - * - **/ -static void mlx5_lag_set_port_affinity(struct mlx5_lag *ldev, - enum mlx5_lag_port_affinity port) -{ - struct lag_tracker tracker; - - if (!__mlx5_lag_is_multipath(ldev)) - return; - - switch (port) { - case MLX5_LAG_NORMAL_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; - break; - case MLX5_LAG_P1_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P1].link_up = true; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P2].link_up = false; - break; - case MLX5_LAG_P2_AFFINITY: - tracker.netdev_state[MLX5_LAG_P1].tx_enabled = false; - tracker.netdev_state[MLX5_LAG_P1].link_up = false; - tracker.netdev_state[MLX5_LAG_P2].tx_enabled = true; - tracker.netdev_state[MLX5_LAG_P2].link_up = true; - break; - default: - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, - "Invalid affinity port %d", port); - return; - } - - if (tracker.netdev_state[MLX5_LAG_P1].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P1].dev->priv.events, - MLX5_DEV_EVENT_PORT_AFFINITY, - (void *)0); - - if (tracker.netdev_state[MLX5_LAG_P2].tx_enabled) - mlx5_notifier_call_chain(ldev->pf[MLX5_LAG_P2].dev->priv.events, - MLX5_DEV_EVENT_PORT_AFFINITY, - (void *)0); - - mlx5_modify_lag(ldev, &tracker); -} - -static void mlx5_lag_fib_event_flush(struct notifier_block *nb) -{ - struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); - - flush_workqueue(mp->wq); -} - -struct mlx5_fib_event_work { - struct work_struct work; - struct mlx5_lag *ldev; - unsigned long event; - union { - struct fib_entry_notifier_info fen_info; - struct fib_nh_notifier_info fnh_info; - }; -}; - -static void mlx5_lag_fib_route_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_info *fi) -{ - struct lag_mp *mp = &ldev->lag_mp; - struct fib_nh *fib_nh0, *fib_nh1; - unsigned int nhs; - - /* Handle delete event */ - if (event == FIB_EVENT_ENTRY_DEL) { - /* stop track */ - if (mp->mfi == fi) - mp->mfi = NULL; - return; - } - - /* Handle add/replace event */ - nhs = fib_info_num_path(fi); - if (nhs == 1) { - if (__mlx5_lag_is_active(ldev)) { - struct fib_nh *nh = fib_info_nh(fi, 0); - struct net_device *nh_dev = nh->fib_nh_dev; - int i = mlx5_lag_dev_get_netdev_idx(ldev, nh_dev); - - if (i < 0) - i = MLX5_LAG_NORMAL_AFFINITY; - else - ++i; - - mlx5_lag_set_port_affinity(ldev, i); - } - return; - } - - if (nhs != 2) - return; - - /* Verify next hops are ports of the same hca */ - fib_nh0 = fib_info_nh(fi, 0); - fib_nh1 = fib_info_nh(fi, 1); - if (!(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev && - fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev) && - !(fib_nh0->fib_nh_dev == ldev->pf[MLX5_LAG_P2].netdev && - fib_nh1->fib_nh_dev == ldev->pf[MLX5_LAG_P1].netdev)) { - mlx5_core_warn(ldev->pf[MLX5_LAG_P1].dev, - "Multipath offload require two ports of the same HCA\n"); - return; - } - - /* First time we see multipath route */ - if (!mp->mfi && !__mlx5_lag_is_active(ldev)) { - struct lag_tracker tracker; - - tracker = ldev->tracker; - mlx5_activate_lag(ldev, &tracker, MLX5_LAG_FLAG_MULTIPATH, false); - } - - mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - mp->mfi = fi; -} - -static void mlx5_lag_fib_nexthop_event(struct mlx5_lag *ldev, - unsigned long event, - struct fib_nh *fib_nh, - struct fib_info *fi) -{ - struct lag_mp *mp = &ldev->lag_mp; - - /* Check the nh event is related to the route */ - if (!mp->mfi || mp->mfi != fi) - return; - - /* nh added/removed */ - if (event == FIB_EVENT_NH_DEL) { - int i = mlx5_lag_dev_get_netdev_idx(ldev, fib_nh->fib_nh_dev); - - if (i >= 0) { - i = (i + 1) % 2 + 1; /* peer port */ - mlx5_lag_set_port_affinity(ldev, i); - } - } else if (event == FIB_EVENT_NH_ADD && - fib_info_num_path(fi) == 2) { - mlx5_lag_set_port_affinity(ldev, MLX5_LAG_NORMAL_AFFINITY); - } -} - -static void mlx5_lag_fib_update(struct work_struct *work) -{ - struct mlx5_fib_event_work *fib_work = - container_of(work, struct mlx5_fib_event_work, work); - struct mlx5_lag *ldev = fib_work->ldev; - struct fib_nh *fib_nh; - - /* Protect internal structures from changes */ - rtnl_lock(); - switch (fib_work->event) { - case FIB_EVENT_ENTRY_REPLACE: - case FIB_EVENT_ENTRY_DEL: - mlx5_lag_fib_route_event(ldev, fib_work->event, - fib_work->fen_info.fi); - fib_info_put(fib_work->fen_info.fi); - break; - case FIB_EVENT_NH_ADD: - case FIB_EVENT_NH_DEL: - fib_nh = fib_work->fnh_info.fib_nh; - mlx5_lag_fib_nexthop_event(ldev, - fib_work->event, - fib_work->fnh_info.fib_nh, - fib_nh->nh_parent); - fib_info_put(fib_work->fnh_info.fib_nh->nh_parent); - break; - } - - rtnl_unlock(); - kfree(fib_work); -} - -static struct mlx5_fib_event_work * -mlx5_lag_init_fib_work(struct mlx5_lag *ldev, unsigned long event) -{ - struct mlx5_fib_event_work *fib_work; - - fib_work = kzalloc(sizeof(*fib_work), GFP_ATOMIC); - if (WARN_ON(!fib_work)) - return NULL; - - INIT_WORK(&fib_work->work, mlx5_lag_fib_update); - fib_work->ldev = ldev; - fib_work->event = event; - - return fib_work; -} - -static int mlx5_lag_fib_event(struct notifier_block *nb, - unsigned long event, - void *ptr) -{ - struct lag_mp *mp = container_of(nb, struct lag_mp, fib_nb); - struct mlx5_lag *ldev = container_of(mp, struct mlx5_lag, lag_mp); - struct fib_notifier_info *info = ptr; - struct mlx5_fib_event_work *fib_work; - struct fib_entry_notifier_info *fen_info; - struct fib_nh_notifier_info *fnh_info; - struct net_device *fib_dev; - struct fib_info *fi; - - if (info->family != AF_INET) - return NOTIFY_DONE; - - if (!mlx5_lag_multipath_check_prereq(ldev)) - return NOTIFY_DONE; - - switch (event) { - case FIB_EVENT_ENTRY_REPLACE: - case FIB_EVENT_ENTRY_DEL: - fen_info = container_of(info, struct fib_entry_notifier_info, - info); - fi = fen_info->fi; - if (fi->nh) { - NL_SET_ERR_MSG_MOD(info->extack, "IPv4 route with nexthop objects is not supported"); - return notifier_from_errno(-EINVAL); - } - fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev; - if (fib_dev != ldev->pf[MLX5_LAG_P1].netdev && - fib_dev != ldev->pf[MLX5_LAG_P2].netdev) { - return NOTIFY_DONE; - } - fib_work = mlx5_lag_init_fib_work(ldev, event); - if (!fib_work) - return NOTIFY_DONE; - fib_work->fen_info = *fen_info; - /* Take reference on fib_info to prevent it from being - * freed while work is queued. Release it afterwards. - */ - fib_info_hold(fib_work->fen_info.fi); - break; - case FIB_EVENT_NH_ADD: - case FIB_EVENT_NH_DEL: - fnh_info = container_of(info, struct fib_nh_notifier_info, - info); - fib_work = mlx5_lag_init_fib_work(ldev, event); - if (!fib_work) - return NOTIFY_DONE; - fib_work->fnh_info = *fnh_info; - fib_info_hold(fib_work->fnh_info.fib_nh->nh_parent); - break; - default: - return NOTIFY_DONE; - } - - queue_work(mp->wq, &fib_work->work); - - return NOTIFY_DONE; -} - -void mlx5_lag_mp_reset(struct mlx5_lag *ldev) -{ - /* Clear mfi, as it might become stale when a route delete event - * has been missed, see mlx5_lag_fib_route_event(). - */ - ldev->lag_mp.mfi = NULL; -} - -int mlx5_lag_mp_init(struct mlx5_lag *ldev) -{ - struct lag_mp *mp = &ldev->lag_mp; - int err; - - /* always clear mfi, as it might become stale when a route delete event - * has been missed - */ - mp->mfi = NULL; - - if (mp->fib_nb.notifier_call) - return 0; - - mp->wq = create_singlethread_workqueue("mlx5_lag_mp"); - if (!mp->wq) - return -ENOMEM; - - mp->fib_nb.notifier_call = mlx5_lag_fib_event; - err = register_fib_notifier(&init_net, &mp->fib_nb, - mlx5_lag_fib_event_flush, NULL); - if (err) { - destroy_workqueue(mp->wq); - mp->fib_nb.notifier_call = NULL; - } - - return err; -} - -void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) -{ - struct lag_mp *mp = &ldev->lag_mp; - - if (!mp->fib_nb.notifier_call) - return; - - unregister_fib_notifier(&init_net, &mp->fib_nb); - destroy_workqueue(mp->wq); - mp->fib_nb.notifier_call = NULL; - mp->mfi = NULL; -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h b/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h deleted file mode 100644 index 729c839397a89..0000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag_mp.h +++ /dev/null @@ -1,35 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2019 Mellanox Technologies. */ - -#ifndef __MLX5_LAG_MP_H__ -#define __MLX5_LAG_MP_H__ - -#include "lag.h" -#include "mlx5_core.h" - -enum mlx5_lag_port_affinity { - MLX5_LAG_NORMAL_AFFINITY, - MLX5_LAG_P1_AFFINITY, - MLX5_LAG_P2_AFFINITY, -}; - -struct lag_mp { - struct notifier_block fib_nb; - struct fib_info *mfi; /* used in tracking fib events */ - struct workqueue_struct *wq; -}; - -#ifdef CONFIG_MLX5_ESWITCH - -void mlx5_lag_mp_reset(struct mlx5_lag *ldev); -int mlx5_lag_mp_init(struct mlx5_lag *ldev); -void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev); - -#else /* CONFIG_MLX5_ESWITCH */ - -static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {}; -static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; } -static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {} - -#endif /* CONFIG_MLX5_ESWITCH */ -#endif /* __MLX5_LAG_MP_H__ */