When handling FIB_EVENT_ENTRY_REPLACE event for a new multipath route,
lag activation can be missed if a stale (struct lag_mp)->mfi pointer
exists, which was associated with an older multipath route that had been
removed.
Normally, when a route is removed, it triggers mlx5_lag_fib_event(),
which handles FIB_EVENT_ENTRY_DEL and clears mfi pointer. But, if
mlx5_lag_check_prereq() condition isn't met, for example when eswitch is
in legacy mode, the fib event is skipped and mfi pointer becomes stale.
Fix by resetting mfi pointer to NULL in mlx5_deactivate_lag().
Fixes: 8a66e4585979 ("net/mlx5: Change ownership model for lag")
Signed-off-by: Dima Chumak <dchumak@nvidia.com>
Reviewed-by: Roi Dayan <roid@nvidia.com>
Reviewed-by: Mark Bloch <mbloch@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
int err;
ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
+ mlx5_lag_mp_reset(ldev);
MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
return NOTIFY_DONE;
}
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev)
+{
+ /* Clear mfi, as it might become stale when a route delete event
+ * has been missed, see mlx5_lag_fib_route_event().
+ */
+ ldev->lag_mp.mfi = NULL;
+}
+
int mlx5_lag_mp_init(struct mlx5_lag *ldev)
{
struct lag_mp *mp = &ldev->lag_mp;
#ifdef CONFIG_MLX5_ESWITCH
+void mlx5_lag_mp_reset(struct mlx5_lag *ldev);
int mlx5_lag_mp_init(struct mlx5_lag *ldev);
void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev);
#else /* CONFIG_MLX5_ESWITCH */
+static inline void mlx5_lag_mp_reset(struct mlx5_lag *ldev) {};
static inline int mlx5_lag_mp_init(struct mlx5_lag *ldev) { return 0; }
static inline void mlx5_lag_mp_cleanup(struct mlx5_lag *ldev) {}