for (se_idx = 0; se_idx < se_cnt; se_idx++) {
                for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
 
-                       amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff, 0);
                        queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
 
                        /*
                }
        }
 
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        soc15_grbm_select(adev, 0, 0, 0, 0, 0);
        unlock_spi_csq_mutexes(adev);
 
 
                }
                mutex_lock(&adev->grbm_idx_mutex);
                amdgpu_gfx_select_se_sh(adev, se_bank,
-                                       sh_bank, instance_bank);
+                                       sh_bank, instance_bank, 0);
        } else if (use_ring) {
                mutex_lock(&adev->srbm_mutex);
                amdgpu_gfx_select_me_pipe_q(adev, me, pipe, queue, vmid);
 
 end:
        if (use_bank) {
-               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                mutex_unlock(&adev->grbm_idx_mutex);
        } else if (use_ring) {
                amdgpu_gfx_select_me_pipe_q(adev, 0, 0, 0, 0);
                mutex_lock(&adev->grbm_idx_mutex);
                amdgpu_gfx_select_se_sh(adev, rd->id.grbm.se,
                                                                rd->id.grbm.sh,
-                                                               rd->id.grbm.instance);
+                                                               rd->id.grbm.instance, 0);
        }
 
        if (rd->id.use_srbm) {
        }
 end:
        if (rd->id.use_grbm) {
-               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                mutex_unlock(&adev->grbm_idx_mutex);
        }
 
 
        /* switch to the specific se/sh/cu */
        mutex_lock(&adev->grbm_idx_mutex);
-       amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+       amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
 
        x = 0;
        if (adev->gfx.funcs->read_wave_data)
                adev->gfx.funcs->read_wave_data(adev, simd, wave, data, &x);
 
-       amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+       amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 
        /* switch to the specific se/sh/cu */
        mutex_lock(&adev->grbm_idx_mutex);
-       amdgpu_gfx_select_se_sh(adev, se, sh, cu);
+       amdgpu_gfx_select_se_sh(adev, se, sh, cu, 0);
 
        if (bank == 0) {
                if (adev->gfx.funcs->read_wave_vgprs)
                        adev->gfx.funcs->read_wave_sgprs(adev, simd, wave, offset, size>>2, data);
        }
 
-       amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+       amdgpu_gfx_select_se_sh(adev, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
 
        /* get the gpu clock counter */
        uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
        void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num,
-                            u32 sh_num, u32 instance);
+                            u32 sh_num, u32 instance, int xcc_id);
        void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd,
                               uint32_t wave, uint32_t *dst, int *no_fields);
        void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd,
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
-#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance, xcc_id) ((adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance), (xcc_id)))
 #define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q, vmid) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q), (vmid))
 #define amdgpu_gfx_init_spm_golden(adev) (adev)->gfx.funcs->init_spm_golden((adev))
 
 
 
                mutex_lock(&adev->grbm_idx_mutex);
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
                val = RREG32(reg_offset);
 
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                mutex_unlock(&adev->grbm_idx_mutex);
                return val;
        } else {
 
                                 struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v10_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
-                                  u32 sh_num, u32 instance);
+                                  u32 sh_num, u32 instance, int xcc_id);
 static u32 gfx_v10_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
 
 static int gfx_v10_0_rlc_backdoor_autoload_buffer_init(struct amdgpu_device *adev);
 }
 
 static void gfx_v10_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
-                                  u32 sh_num, u32 instance)
+                                  u32 sh_num, u32 instance, int xcc_id)
 {
        u32 data;
 
                                (adev->ip_versions[GC_HWIP][0] == IP_VERSION(10, 3, 6))) &&
                            ((gfx_v10_3_get_disabled_sa(adev) >> bitmap) & 1))
                                continue;
-                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = gfx_v10_0_get_rb_active_bitmap(adev);
                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
                                               rb_bitmap_width_per_sh);
                }
        }
-       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        wgp_active_bitmap = gfx_v10_0_get_wgp_active_bitmap_per_sh(adev);
                        /*
                         * Set corresponding TCP bits for the inactive WGPs in
                }
        }
 
-       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v10_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        if (i < 4 && j < 2)
                                gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(
                                        adev, disable_masks[i * 2 + j]);
                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
                }
        }
-       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v10_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
                                  struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev);
 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
-                                  u32 sh_num, u32 instance);
+                                  u32 sh_num, u32 instance, int xcc_id);
 static u32 gfx_v11_0_get_wgp_active_bitmap_per_sh(struct amdgpu_device *adev);
 
 static void gfx_v11_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 }
 
 static void gfx_v11_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
-                                  u32 sh_num, u32 instance)
+                                  u32 sh_num, u32 instance, int xcc_id)
 {
        u32 data;
 
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
                        mask = 1;
                        counter = 0;
-                       gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v11_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        if (i < 8 && j < 2)
                                gfx_v11_0_set_user_wgp_inactive_bitmap_per_sh(
                                        adev, disable_masks[i * 2 + j]);
                        active_cu_number += counter;
                }
        }
-       gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v11_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
 }
 
 static void gfx_v6_0_select_se_sh(struct amdgpu_device *adev, u32 se_num,
-                                 u32 sh_num, u32 instance)
+                                 u32 sh_num, u32 instance, int xcc_id)
 {
        u32 data;
 
                }
 
                /* GRBM_GFX_INDEX has a different offset on SI */
-               gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+               gfx_v6_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
        }
 
        /* GRBM_GFX_INDEX has a different offset on SI */
-       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 }
 
 static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = gfx_v6_0_get_rb_active_bitmap(adev);
                        active_rbs |= data <<
                                ((i * adev->gfx.config.max_sh_per_se + j) *
                                 rb_bitmap_width_per_sh);
                }
        }
-       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        adev->gfx.config.num_rbs = hweight32(active_rbs);
        /* cache the values for userspace */
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
                                RREG32(mmCC_RB_BACKEND_DISABLE);
                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
                                RREG32(mmPA_SC_RASTER_CONFIG);
                }
        }
-       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
                        active_cu = gfx_v6_0_get_cu_enabled(adev);
 
                        }
                }
        }
-       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
        WREG32_FIELD(RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0);
 
        if (!enable) {
-               gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                WREG32(mmSPI_LB_CU_MASK, 0x00ff);
        }
 }
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        if (i < 4 && j < 2)
                                gfx_v6_0_set_user_cu_inactive_bitmap(
                                        adev, disable_masks[i * 2 + j]);
                }
        }
 
-       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
  * Select which SE, SH combinations to address.
  */
 static void gfx_v7_0_select_se_sh(struct amdgpu_device *adev,
-                                 u32 se_num, u32 sh_num, u32 instance)
+                                 u32 se_num, u32 sh_num, u32 instance,
+                                 int xcc_id)
 {
        u32 data;
 
                }
 
                /* GRBM_GFX_INDEX has a different offset on CI+ */
-               gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+               gfx_v7_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
        }
 
        /* GRBM_GFX_INDEX has a different offset on CI+ */
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 }
 
 /**
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = gfx_v7_0_get_rb_active_bitmap(adev);
                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
                                               rb_bitmap_width_per_sh);
                }
        }
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        adev->gfx.config.num_rbs = hweight32(active_rbs);
        /* cache the values for userspace */
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
                                RREG32(mmCC_RB_BACKEND_DISABLE);
                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
                                RREG32(mmPA_SC_RASTER_CONFIG_1);
                }
        }
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
         * making sure that the following register writes will be broadcasted
         * to all the shaders
         */
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        /* XXX SH_MEM regs */
        /* where to put LDS, scratch, GPUVM in FSA64 space */
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        for (k = 0; k < adev->usec_timeout; k++) {
                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
                                        break;
                        }
                }
        }
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
        WREG32(mmRLC_LB_CNTR_MAX, 0x00008000);
 
        mutex_lock(&adev->grbm_idx_mutex);
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        WREG32(mmRLC_LB_INIT_CU_MASK, 0xffffffff);
        WREG32(mmRLC_LB_PARAMS, 0x00600408);
        WREG32(mmRLC_LB_CNTL, 0x80000004);
                tmp = gfx_v7_0_halt_rlc(adev);
 
                mutex_lock(&adev->grbm_idx_mutex);
-               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
                tmp2 = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
                tmp = gfx_v7_0_halt_rlc(adev);
 
                mutex_lock(&adev->grbm_idx_mutex);
-               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK |
                tmp = gfx_v7_0_halt_rlc(adev);
 
                mutex_lock(&adev->grbm_idx_mutex);
-               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
                WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
                data = RLC_SERDES_WR_CTRL__BPM_ADDR_MASK | RLC_SERDES_WR_CTRL__MGCG_OVERRIDE_1_MASK;
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v7_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        if (i < 4 && j < 2)
                                gfx_v7_0_set_user_cu_inactive_bitmap(
                                        adev, disable_masks[i * 2 + j]);
                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
                }
        }
-       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
 }
 
 static void gfx_v8_0_select_se_sh(struct amdgpu_device *adev,
-                                 u32 se_num, u32 sh_num, u32 instance)
+                                 u32 se_num, u32 sh_num, u32 instance,
+                                 int xcc_id)
 {
        u32 data;
 
                }
 
                /* GRBM_GFX_INDEX has a different offset on VI */
-               gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff);
+               gfx_v8_0_select_se_sh(adev, se, 0xffffffff, 0xffffffff, 0);
                WREG32(mmPA_SC_RASTER_CONFIG, raster_config_se);
                WREG32(mmPA_SC_RASTER_CONFIG_1, raster_config_1);
        }
 
        /* GRBM_GFX_INDEX has a different offset on VI */
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 }
 
 static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = gfx_v8_0_get_rb_active_bitmap(adev);
                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
                                               rb_bitmap_width_per_sh);
                }
        }
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        adev->gfx.config.num_rbs = hweight32(active_rbs);
        /* cache the values for userspace */
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        adev->gfx.config.rb_config[i][j].rb_backend_disable =
                                RREG32(mmCC_RB_BACKEND_DISABLE);
                        adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
                                RREG32(mmPA_SC_RASTER_CONFIG_1);
                }
        }
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
         * making sure that the following register writes will be broadcasted
         * to all the shaders
         */
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        WREG32(mmPA_SC_FIFO_SIZE,
                   (adev->gfx.config.sc_prim_fifo_size_frontend <<
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        for (k = 0; k < adev->usec_timeout; k++) {
                                if (RREG32(mmRLC_SERDES_CU_MASTER_BUSY) == 0)
                                        break;
                        }
                        if (k == adev->usec_timeout) {
                                gfx_v8_0_select_se_sh(adev, 0xffffffff,
-                                                     0xffffffff, 0xffffffff);
+                                                     0xffffffff, 0xffffffff, 0);
                                mutex_unlock(&adev->grbm_idx_mutex);
                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
                                         i, j);
                        }
                }
        }
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
 {
        uint32_t data;
 
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        WREG32(mmRLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff);
        WREG32(mmRLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff);
                         */
                        if (from_wq) {
                                mutex_lock(&adev->grbm_idx_mutex);
-                               gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id);
+                               gfx_v8_0_select_se_sh(adev, se_id, sh_id, cu_id, 0);
 
                                sq_edc_source = REG_GET_FIELD(RREG32(mmSQ_EDC_INFO), SQ_EDC_INFO, SOURCE);
 
-                               gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+                               gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                                mutex_unlock(&adev->grbm_idx_mutex);
                        }
 
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v8_0_select_se_sh(adev, i, j, 0xffffffff, 0);
                        if (i < 4 && j < 2)
                                gfx_v8_0_set_user_cu_inactive_bitmap(
                                        adev, disable_masks[i * 2 + j]);
                        cu_info->ao_cu_bitmap[i][j] = ao_bitmap;
                }
        }
-       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
                        mask = 1;
                        cu_bitmap = 0;
                        counter = 0;
-                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
 
                        for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
                                if (cu_info->bitmap[i][j] & mask) {
                        cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
                }
        }
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 }
 
 
        mutex_lock(&adev->grbm_idx_mutex);
        /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 
        /* set mmRLC_LB_PARAMS = 0x003F_1006 */
 
        mutex_lock(&adev->grbm_idx_mutex);
        /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 
        /* set mmRLC_LB_PARAMS = 0x003F_1006 */
 }
 
 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
-                          u32 instance)
+                          u32 instance, int xcc_id)
 {
        u32 data;
 
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
                        data = gfx_v9_0_get_rb_active_bitmap(adev);
                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
                                               rb_bitmap_width_per_sh);
                }
        }
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
                        for (k = 0; k < adev->usec_timeout; k++) {
                                if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
                                        break;
                        }
                        if (k == adev->usec_timeout) {
                                amdgpu_gfx_select_se_sh(adev, 0xffffffff,
-                                                     0xffffffff, 0xffffffff);
+                                                     0xffffffff, 0xffffffff, 0);
                                mutex_unlock(&adev->grbm_idx_mutex);
                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
                                         i, j);
                        }
                }
        }
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
        for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
                for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
                        for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
-                               amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
+                               amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0);
                                RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
                        }
                }
        for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
                for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
                        for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
-                               amdgpu_gfx_select_se_sh(adev, j, 0, k);
+                               amdgpu_gfx_select_se_sh(adev, j, 0, k, 0);
                                reg_value =
                                        RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
                                if (reg_value)
        err_data->ce_count += sec_count;
        err_data->ue_count += ded_count;
 
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        gfx_v9_0_query_utc_edc_status(adev, err_data);
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0);
                        gfx_v9_0_set_user_cu_inactive_bitmap(
                                adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
                        bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
                        cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
                }
        }
-       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
 extern const struct amdgpu_ip_block_version gfx_v9_0_ip_block;
 
 void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num,
-                          u32 instance);
+                          u32 instance, int xcc_id);
 
 #endif
 
 {
        u32 tmp;
 
-       gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
 
        tmp = 0;
        tmp = REG_SET_FIELD(tmp, GC_THROTTLE_CTRL, PATTERN_MODE, 1);
 
 static void gfx_v9_4_3_select_se_sh(struct amdgpu_device *adev,
                                    u32 se_num,
                                    u32 sh_num,
-                                   u32 instance)
+                                   u32 instance,
+                                   int xcc_id)
 {
        u32 data;
 
        else
                data = REG_SET_FIELD(data, GRBM_GFX_INDEX, SH_INDEX, sh_num);
 
-       WREG32_SOC15_RLC_SHADOW_EX(reg, GC, 0, regGRBM_GFX_INDEX, data);
+       WREG32_SOC15_RLC_SHADOW_EX(reg, GC, xcc_id, regGRBM_GFX_INDEX, data);
 }
 
 static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address)
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
                        data = gfx_v9_4_3_get_rb_active_bitmap(adev);
                        active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
                                               rb_bitmap_width_per_sh);
                }
        }
-       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        adev->gfx.config.backend_enable_mask = active_rbs;
        mutex_lock(&adev->grbm_idx_mutex);
        for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
                for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, xcc_id);
                        for (k = 0; k < adev->usec_timeout; k++) {
                                if (RREG32_SOC15(GC, 0, regRLC_SERDES_CU_MASTER_BUSY) == 0)
                                        break;
                        }
                        if (k == adev->usec_timeout) {
                                gfx_v9_4_3_select_se_sh(adev, 0xffffffff,
-                                                     0xffffffff, 0xffffffff);
+                                                       0xffffffff, 0xffffffff,
+                                                       xcc_id);
                                mutex_unlock(&adev->grbm_idx_mutex);
                                DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
                                         i, j);
                        }
                }
        }
-       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, xcc_id);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
                        mask = 1;
                        ao_bitmap = 0;
                        counter = 0;
-                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff);
+                       gfx_v9_4_3_select_se_sh(adev, i, j, 0xffffffff, 0);
                        gfx_v9_4_3_set_user_cu_inactive_bitmap(
                                adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
                        bitmap = gfx_v9_4_3_get_cu_active_bitmap(adev);
                        cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
                }
        }
-       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+       gfx_v9_4_3_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
 
        cu_info->number = active_cu_number;
 
 
        mutex_lock(&adev->grbm_idx_mutex);
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
        val = RREG32(reg_offset);
 
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
        return val;
 }
 
 
                mutex_lock(&adev->grbm_idx_mutex);
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
                val = RREG32(reg_offset);
 
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                mutex_unlock(&adev->grbm_idx_mutex);
                return val;
        } else {
 
 
        mutex_lock(&adev->grbm_idx_mutex);
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
        val = RREG32(reg_offset);
 
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
        return val;
 }
 
 
        mutex_lock(&adev->grbm_idx_mutex);
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
        val = RREG32(reg_offset);
 
        if (se_num != 0xffffffff || sh_num != 0xffffffff)
-               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+               amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
        mutex_unlock(&adev->grbm_idx_mutex);
        return val;
 }
 
 
                mutex_lock(&adev->grbm_idx_mutex);
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff, 0);
 
                val = RREG32(reg_offset);
 
                if (se_num != 0xffffffff || sh_num != 0xffffffff)
-                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+                       amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0);
                mutex_unlock(&adev->grbm_idx_mutex);
                return val;
        } else {