drm/amdgpu: xgmi_fill_topology_info
authorVignesh Chander <Vignesh.Chander@amd.com>
Fri, 8 Dec 2023 18:00:26 +0000 (12:00 -0600)
committerAlex Deucher <alexander.deucher@amd.com>
Wed, 13 Dec 2023 20:09:55 +0000 (15:09 -0500)
1. Use the mirrored topology info to fill links for VF.
The new solution is required to simplify and optimize host driver logic.
Only use the new solution for VFs that support full duplex and
extended_peer_link_info otherwise the info would be incomplete.

2. avoid calling extended_link_info on VF as its not supported

Signed-off-by: Vignesh Chander <Vignesh.Chander@amd.com>
Reviewed-by: Zhigang Luo <zhigang.luo@amd.com>
Reviewed-by: Jonathan Kim <jonathan.kim@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c

index a21045d018f2b6efe6a281c1cec74fd25357c064..1bf975b8d083edb68a3790ac74c76b66d430bd00 100644 (file)
@@ -1433,8 +1433,8 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
                         get_extended_data) ||
                        amdgpu_ip_version(psp->adev, MP0_HWIP, 0) ==
                                IP_VERSION(13, 0, 6);
-               bool ta_port_num_support = psp->xgmi_context.xgmi_ta_caps &
-                                               EXTEND_PEER_LINK_INFO_CMD_FLAG;
+               bool ta_port_num_support = amdgpu_sriov_vf(psp->adev) ? 0 :
+                               psp->xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG;
 
                /* popluate the shared output buffer rather than the cmd input buffer
                 * with node_ids as the input for GET_PEER_LINKS command execution.
index 44d8c1a11e1b3c83f0d33effe02fbebcaeda4f07..9a95b9f226b85a5c711cd4f21e28dc36021611dd 100644 (file)
@@ -823,6 +823,28 @@ static int amdgpu_xgmi_initialize_hive_get_data_partition(struct amdgpu_hive_inf
        return 0;
 }
 
+static void amdgpu_xgmi_fill_topology_info(struct amdgpu_device *adev,
+       struct amdgpu_device *peer_adev)
+{
+       struct psp_xgmi_topology_info *top_info = &adev->psp.xgmi_context.top_info;
+       struct psp_xgmi_topology_info *peer_info = &peer_adev->psp.xgmi_context.top_info;
+
+       for (int i = 0; i < peer_info->num_nodes; i++) {
+               if (peer_info->nodes[i].node_id == adev->gmc.xgmi.node_id) {
+                       for (int j = 0; j < top_info->num_nodes; j++) {
+                               if (top_info->nodes[j].node_id == peer_adev->gmc.xgmi.node_id) {
+                                       peer_info->nodes[i].num_hops = top_info->nodes[j].num_hops;
+                                       peer_info->nodes[i].is_sharing_enabled =
+                                                       top_info->nodes[j].is_sharing_enabled;
+                                       peer_info->nodes[i].num_links =
+                                                       top_info->nodes[j].num_links;
+                                       return;
+                               }
+                       }
+               }
+       }
+}
+
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 {
        struct psp_xgmi_topology_info *top_info;
@@ -897,18 +919,38 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
                                goto exit_unlock;
                }
 
-               /* get latest topology info for each device from psp */
-               list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
-                       ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
-                                       &tmp_adev->psp.xgmi_context.top_info, false);
+               if (amdgpu_sriov_vf(adev) &&
+                       adev->psp.xgmi_context.xgmi_ta_caps & EXTEND_PEER_LINK_INFO_CMD_FLAG) {
+                       /* only get topology for VF being init if it can support full duplex */
+                       ret = psp_xgmi_get_topology_info(&adev->psp, count,
+                                               &adev->psp.xgmi_context.top_info, false);
                        if (ret) {
-                               dev_err(tmp_adev->dev,
+                               dev_err(adev->dev,
                                        "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
-                                       tmp_adev->gmc.xgmi.node_id,
-                                       tmp_adev->gmc.xgmi.hive_id, ret);
-                               /* To do : continue with some node failed or disable the whole hive */
+                                       adev->gmc.xgmi.node_id,
+                                       adev->gmc.xgmi.hive_id, ret);
+                               /* To do: continue with some node failed or disable the whole hive*/
                                goto exit_unlock;
                        }
+
+                       /* fill the topology info for peers instead of getting from PSP */
+                       list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+                               amdgpu_xgmi_fill_topology_info(adev, tmp_adev);
+                       }
+               } else {
+                       /* get latest topology info for each device from psp */
+                       list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
+                               ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count,
+                                       &tmp_adev->psp.xgmi_context.top_info, false);
+                               if (ret) {
+                                       dev_err(tmp_adev->dev,
+                                               "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
+                                               tmp_adev->gmc.xgmi.node_id,
+                                               tmp_adev->gmc.xgmi.hive_id, ret);
+                                       /* To do : continue with some node failed or disable the whole hive */
+                                       goto exit_unlock;
+                               }
+                       }
                }
 
                /* get topology again for hives that support extended data */