From 60770fa28bd7d69097d3a186fe8cfa1ec21c9c1d Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Tue, 8 May 2018 20:39:46 +1000 Subject: [PATCH] drm/nouveau/gr/gf100-: virtualise dist_skip_table + improve algorithm The algorithm for GM200 and newer matches RM for all the boards I have, but I don't have enough data to try and figure something out for earlier boards, so these will still write zeroes to the table as we did before. The code in NVGPU isn't helpful here, it appears to handle specific cases. Signed-off-by: Ben Skeggs --- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c | 2 ++ .../gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h | 3 +++ .../gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c | 15 ++++++++--- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c | 5 +--- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c | 1 + .../drm/nouveau/nvkm/engine/gr/ctxgk110b.c | 1 + .../gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c | 1 + .../gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c | 5 +--- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c | 25 +++++++++++++++++-- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c | 3 +-- .../gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c | 1 + .../gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c | 1 + .../gpu/drm/nouveau/nvkm/engine/gr/gf100.c | 3 +++ .../gpu/drm/nouveau/nvkm/engine/gr/gf100.h | 1 + 14 files changed, 51 insertions(+), 16 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c index cdf74f31d4be9..176be7124f290 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c @@ -1360,6 +1360,8 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr) func->alpha_beta_tables(gr); if (func->max_ways_evict) func->max_ways_evict(gr); + if (func->dist_skip_table) + func->dist_skip_table(gr); } void diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h index 41cb875464de9..dd1c73b725cde 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h @@ -57,6 +57,7 @@ struct gf100_grctx_func { void (*rop_mapping)(struct gf100_gr *); void (*alpha_beta_tables)(struct gf100_gr *); void (*max_ways_evict)(struct gf100_gr *); + void (*dist_skip_table)(struct gf100_gr *); }; extern const struct gf100_grctx_func gf100_grctx; @@ -84,6 +85,7 @@ extern const struct gf100_grctx_func gf110_grctx; extern const struct gf100_grctx_func gf117_grctx; void gf117_grctx_generate_attrib(struct gf100_grctx *); void gf117_grctx_generate_rop_mapping(struct gf100_gr *); +void gf117_grctx_generate_dist_skip_table(struct gf100_gr *); extern const struct gf100_grctx_func gf119_grctx; @@ -112,6 +114,7 @@ void gm107_grctx_generate_pagepool(struct gf100_grctx *); void gm107_grctx_generate_attrib(struct gf100_grctx *); extern const struct gf100_grctx_func gm200_grctx; +void gm200_grctx_generate_dist_skip_table(struct gf100_gr *); void gm200_grctx_generate_405b60(struct gf100_gr *); extern const struct gf100_grctx_func gm20b_grctx; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c index 423b09753bb7d..b3f4127f7520f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c @@ -179,6 +179,16 @@ gf117_grctx_pack_ppc[] = { * PGRAPH context implementation ******************************************************************************/ +void +gf117_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + int i; + + for (i = 0; i < 8; i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); +} + void gf117_grctx_generate_rop_mapping(struct gf100_gr *gr) { @@ -282,7 +292,6 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; nvkm_mc_unk260(device, 0); @@ -301,9 +310,6 @@ gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - gf100_gr_icmd(gr, grctx->icmd); nvkm_wr32(device, 0x404154, idle_timeout); gf100_gr_mthd(gr, grctx->mthd); @@ -336,4 +342,5 @@ gf117_grctx = { .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gf100_grctx_generate_alpha_beta_tables, .max_ways_evict = gf100_grctx_generate_max_ways_evict, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c index 25576c1ea9cc1..12169314f3e23 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c @@ -898,7 +898,6 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; nvkm_mc_unk260(device, 0); @@ -917,9 +916,6 @@ gk104_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); - nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); nvkm_mask(device, 0x419f78, 0x00000001, 0x00000000); @@ -1006,4 +1002,5 @@ gk104_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c index 284570a0b5cc1..e6a54dc1a01a9 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c @@ -835,4 +835,5 @@ gk110_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c index ffd8cf989309b..ef82ebee82c9f 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c @@ -96,4 +96,5 @@ gk110b_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c index e5e4d4dce86e9..226f8aa9e7f68 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c @@ -557,4 +557,5 @@ gk208_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c index c209bf38b5d99..cdf9d60683e01 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c @@ -945,7 +945,6 @@ gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) struct nvkm_device *device = gr->base.engine.subdev.device; const struct gf100_grctx_func *grctx = gr->func->grctx; u32 idle_timeout; - int i; gf100_gr_mmio(gr, grctx->hub); gf100_gr_mmio(gr, grctx->gpc); @@ -962,9 +961,6 @@ gm107_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - nvkm_wr32(device, 0x4064d0, 0x00000001); - for (i = 1; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000001); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -1005,4 +1001,5 @@ gm107_grctx = { .tpc_nr = gf100_grctx_generate_tpc_nr, .rop_mapping = gf117_grctx_generate_rop_mapping, .alpha_beta_tables = gk104_grctx_generate_alpha_beta_tables, + .dist_skip_table = gf117_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c index cfccd75dbc301..689120683fb47 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c @@ -78,8 +78,6 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000000); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -98,6 +96,28 @@ gm200_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) nvkm_mask(device, 0x418e4c, 0xffffffff, 0x70000000); } +void +gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr) +{ + struct nvkm_device *device = gr->base.engine.subdev.device; + u32 data[8] = {}; + int gpc, ppc, i; + + for (gpc = 0; gpc < gr->gpc_nr; gpc++) { + for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) { + u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc]; + u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc]; + while (ppc_tpcs-- > gr->ppc_tpc_min) + ppc_tpcm &= ppc_tpcm - 1; + ppc_tpcm ^= gr->ppc_tpc_mask[gpc][ppc]; + ((u8 *)data)[gpc] |= ppc_tpcm; + } + } + + for (i = 0; i < ARRAY_SIZE(data); i++) + nvkm_wr32(device, 0x4064d0 + (i * 0x04), data[i]); +} + const struct gf100_grctx_func gm200_grctx = { .main = gm200_grctx_generate_main, @@ -115,4 +135,5 @@ gm200_grctx = { .alpha_nr = 0x1000, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c index e09990785cb93..1a3d0c566fea4 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c @@ -140,8 +140,6 @@ gp100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info) gf100_grctx_generate_floorsweep(gr); - for (i = 0; i < 8; i++) - nvkm_wr32(device, 0x4064d0 + (i * 0x04), 0x00000000); nvkm_wr32(device, 0x406500, 0x00000000); nvkm_wr32(device, 0x405b00, (gr->tpc_total << 8) | gr->gpc_nr); @@ -174,4 +172,5 @@ gp100_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c index 553a609c4f98b..2aeabb3624470 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c @@ -96,4 +96,5 @@ gp102_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c index db3fff89bc2f1..4aea2f6552cc1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c @@ -46,4 +46,5 @@ gp107_grctx = { .alpha_nr = 0x800, .sm_id = gm107_grctx_generate_sm_id, .rop_mapping = gf117_grctx_generate_rop_mapping, + .dist_skip_table = gm200_grctx_generate_dist_skip_table, }; diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c index fe3b44d18a671..dd4a4104306c1 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c @@ -1685,6 +1685,9 @@ gf100_gr_oneinit(struct nvkm_gr *base) continue; gr->ppc_mask[i] |= (1 << j); gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]); + if (gr->ppc_tpc_min == 0 || + gr->ppc_tpc_min > gr->ppc_tpc_nr[i][j]) + gr->ppc_tpc_min = gr->ppc_tpc_nr[i][j]; } } diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h index 6f7a7864d66f2..c2a1b2adff360 100644 --- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h +++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h @@ -105,6 +105,7 @@ struct gf100_gr { u8 ppc_mask[GPC_MAX]; u8 ppc_tpc_mask[GPC_MAX][4]; u8 ppc_tpc_nr[GPC_MAX][4]; + u8 ppc_tpc_min; struct gf100_gr_data mmio_data[4]; struct gf100_gr_mmio mmio_list[4096/8]; -- 2.30.2