net/mlx5: Fix command stats access after free
authorMoshe Shemesh <moshe@nvidia.com>
Mon, 28 Nov 2022 17:05:47 +0000 (19:05 +0200)
committerSaeed Mahameed <saeedm@nvidia.com>
Tue, 10 Jan 2023 06:08:34 +0000 (22:08 -0800)
Command may fail while driver is reloading and can't accept FW commands
till command interface is reinitialized. Such command failure is being
logged to command stats. This results in NULL pointer access as command
stats structure is being freed and reallocated during mlx5 devlink
reload (see kernel log below).

Fix it by making command stats statically allocated on driver probe.

Kernel log:
[ 2394.808802] BUG: unable to handle kernel paging request at 000000000002a9c0
[ 2394.810610] PGD 0 P4D 0
[ 2394.811811] Oops: 0002 [#1] SMP NOPTI
...
[ 2394.815482] RIP: 0010:native_queued_spin_lock_slowpath+0x183/0x1d0
...
[ 2394.829505] Call Trace:
[ 2394.830667]  _raw_spin_lock_irq+0x23/0x26
[ 2394.831858]  cmd_status_err+0x55/0x110 [mlx5_core]
[ 2394.833020]  mlx5_access_reg+0xe7/0x150 [mlx5_core]
[ 2394.834175]  mlx5_query_port_ptys+0x78/0xa0 [mlx5_core]
[ 2394.835337]  mlx5e_ethtool_get_link_ksettings+0x74/0x590 [mlx5_core]
[ 2394.836454]  ? kmem_cache_alloc_trace+0x140/0x1c0
[ 2394.837562]  __rh_call_get_link_ksettings+0x33/0x100
[ 2394.838663]  ? __rtnl_unlock+0x25/0x50
[ 2394.839755]  __ethtool_get_link_ksettings+0x72/0x150
[ 2394.840862]  duplex_show+0x6e/0xc0
[ 2394.841963]  dev_attr_show+0x1c/0x40
[ 2394.843048]  sysfs_kf_seq_show+0x9b/0x100
[ 2394.844123]  seq_read+0x153/0x410
[ 2394.845187]  vfs_read+0x91/0x140
[ 2394.846226]  ksys_read+0x4f/0xb0
[ 2394.847234]  do_syscall_64+0x5b/0x1a0
[ 2394.848228]  entry_SYSCALL_64_after_hwframe+0x65/0xca

Fixes: 34f46ae0d4b3 ("net/mlx5: Add command failures data to debugfs")
Signed-off-by: Moshe Shemesh <moshe@nvidia.com>
Reviewed-by: Shay Drory <shayd@nvidia.com>
Signed-off-by: Saeed Mahameed <saeedm@nvidia.com>
drivers/net/ethernet/mellanox/mlx5/core/cmd.c
include/linux/mlx5/driver.h

index d3ca745d107d62ca9e6cac4bb2d9ef729ed7dded..c837103a9ee33870d2a3d6a9e903d4133b51d657 100644 (file)
@@ -2176,15 +2176,9 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
                return -EINVAL;
        }
 
-       cmd->stats = kvcalloc(MLX5_CMD_OP_MAX, sizeof(*cmd->stats), GFP_KERNEL);
-       if (!cmd->stats)
-               return -ENOMEM;
-
        cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0);
-       if (!cmd->pool) {
-               err = -ENOMEM;
-               goto dma_pool_err;
-       }
+       if (!cmd->pool)
+               return -ENOMEM;
 
        err = alloc_cmd_page(dev, cmd);
        if (err)
@@ -2268,8 +2262,6 @@ err_free_page:
 
 err_free_pool:
        dma_pool_destroy(cmd->pool);
-dma_pool_err:
-       kvfree(cmd->stats);
        return err;
 }
 
@@ -2282,7 +2274,6 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
        destroy_msg_cache(dev);
        free_cmd_page(dev, cmd);
        dma_pool_destroy(cmd->pool);
-       kvfree(cmd->stats);
 }
 
 void mlx5_cmd_set_state(struct mlx5_core_dev *dev,
index d476255c9a3f0d9ea7f0dcd3c8231fbcd9e30d3e..76ef2e4fde38d6da43e62f9c5a064212f8619736 100644 (file)
@@ -315,7 +315,7 @@ struct mlx5_cmd {
        struct mlx5_cmd_debug dbg;
        struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
        int checksum_disabled;
-       struct mlx5_cmd_stats *stats;
+       struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
 };
 
 struct mlx5_cmd_mailbox {