drm/nouveau/gr/gf100-: calculate and use sm mapping table
There's a number of places that require this data, so let's separate out the calculations to ensure they remain consistent. This is incorrect for GM200 and newer, but will produce the same results as we did before. Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
This commit is contained in:
Родитель
d00ffc0c40
Коммит
068cae743c
|
@ -1092,23 +1092,18 @@ gf100_grctx_generate_r4060a8(struct gf100_gr *gr)
|
|||
struct nvkm_device *device = gr->base.engine.subdev.device;
|
||||
const u8 gpcmax = nvkm_rd32(device, 0x022430);
|
||||
const u8 tpcmax = nvkm_rd32(device, 0x022434) * gpcmax;
|
||||
u8 tpcnr[GPC_MAX], data[TPC_MAX];
|
||||
int gpc, tpc, i;
|
||||
int i, j, sm = 0;
|
||||
u32 data;
|
||||
|
||||
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
|
||||
memset(data, 0x1f, sizeof(data));
|
||||
|
||||
gpc = -1;
|
||||
for (tpc = 0; tpc < gr->tpc_total; tpc++) {
|
||||
do {
|
||||
gpc = (gpc + 1) % gr->gpc_nr;
|
||||
} while (!tpcnr[gpc]);
|
||||
tpcnr[gpc]--;
|
||||
data[tpc] = gpc;
|
||||
for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++) {
|
||||
for (data = 0, j = 0; j < 4; j++) {
|
||||
if (sm < gr->sm_nr)
|
||||
data |= gr->sm[sm++].gpc << (j * 8);
|
||||
else
|
||||
data |= 0x1f << (j * 8);
|
||||
}
|
||||
nvkm_wr32(device, 0x4060a8 + (i * 4), data);
|
||||
}
|
||||
|
||||
for (i = 0; i < DIV_ROUND_UP(tpcmax, 4); i++)
|
||||
nvkm_wr32(device, 0x4060a8 + (i * 4), ((u32 *)data)[i]);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1326,16 +1321,13 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
|
|||
{
|
||||
struct nvkm_device *device = gr->base.engine.subdev.device;
|
||||
const struct gf100_grctx_func *func = gr->func->grctx;
|
||||
int tpc, gpc, sm, i, j;
|
||||
int gpc, sm, i, j;
|
||||
u32 data;
|
||||
|
||||
for (tpc = 0, sm = 0; tpc < gr->tpc_max; tpc++) {
|
||||
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
|
||||
if (tpc < gr->tpc_nr[gpc])
|
||||
func->sm_id(gr, gpc, tpc, sm++);
|
||||
if (func->tpc_nr)
|
||||
func->tpc_nr(gr, gpc);
|
||||
}
|
||||
for (sm = 0; sm < gr->sm_nr; sm++) {
|
||||
func->sm_id(gr, gr->sm[sm].gpc, gr->sm[sm].tpc, sm);
|
||||
if (func->tpc_nr)
|
||||
func->tpc_nr(gr, gr->sm[sm].gpc);
|
||||
}
|
||||
|
||||
for (gpc = 0, i = 0; i < 4; i++) {
|
||||
|
|
|
@ -49,23 +49,13 @@ gm200_grctx_generate_smid_config(struct gf100_gr *gr)
|
|||
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
|
||||
u32 dist[TPC_MAX / 4] = {};
|
||||
u32 gpcs[GPC_MAX] = {};
|
||||
u8 tpcnr[GPC_MAX];
|
||||
int tpc, gpc, i;
|
||||
u8 sm, i;
|
||||
|
||||
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
|
||||
|
||||
/* won't result in the same distribution as the binary driver where
|
||||
* some of the gpcs have more tpcs than others, but this shall do
|
||||
* for the moment. the code for earlier gpus has this issue too.
|
||||
*/
|
||||
for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
|
||||
do {
|
||||
gpc = (gpc + 1) % gr->gpc_nr;
|
||||
} while(!tpcnr[gpc]);
|
||||
tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
|
||||
|
||||
dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
|
||||
gpcs[gpc] |= i << (tpc * 8);
|
||||
for (sm = 0; sm < gr->sm_nr; sm++) {
|
||||
const u8 gpc = gr->sm[sm].gpc;
|
||||
const u8 tpc = gr->sm[sm].tpc;
|
||||
dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
|
||||
gpcs[gpc] |= sm << (tpc * 8);
|
||||
}
|
||||
|
||||
for (i = 0; i < dist_nr; i++)
|
||||
|
|
|
@ -95,23 +95,13 @@ gp100_grctx_generate_smid_config(struct gf100_gr *gr)
|
|||
struct nvkm_device *device = gr->base.engine.subdev.device;
|
||||
const u32 dist_nr = DIV_ROUND_UP(gr->tpc_total, 4);
|
||||
u32 dist[TPC_MAX / 4] = {}, gpcs[16] = {};
|
||||
u8 tpcnr[GPC_MAX];
|
||||
int tpc, gpc, i;
|
||||
u8 sm, i;
|
||||
|
||||
memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
|
||||
|
||||
/* won't result in the same distribution as the binary driver where
|
||||
* some of the gpcs have more tpcs than others, but this shall do
|
||||
* for the moment. the code for earlier gpus has this issue too.
|
||||
*/
|
||||
for (gpc = -1, i = 0; i < gr->tpc_total; i++) {
|
||||
do {
|
||||
gpc = (gpc + 1) % gr->gpc_nr;
|
||||
} while(!tpcnr[gpc]);
|
||||
tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
|
||||
|
||||
dist[i / 4] |= ((gpc << 4) | tpc) << ((i % 4) * 8);
|
||||
gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= i << (tpc * 8);
|
||||
for (sm = 0; sm < gr->sm_nr; sm++) {
|
||||
const u8 gpc = gr->sm[sm].gpc;
|
||||
const u8 tpc = gr->sm[sm].tpc;
|
||||
dist[sm / 4] |= ((gpc << 4) | tpc) << ((sm % 4) * 8);
|
||||
gpcs[gpc + (gr->func->gpc_nr * (tpc / 4))] |= sm << ((tpc % 4) * 8);
|
||||
}
|
||||
|
||||
for (i = 0; i < dist_nr; i++)
|
||||
|
|
|
@ -1652,6 +1652,21 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
|
|||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
|
||||
{
|
||||
int tpc, gpc;
|
||||
for (tpc = 0; tpc < gr->tpc_max; tpc++) {
|
||||
for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
|
||||
if (tpc < gr->tpc_nr[gpc]) {
|
||||
gr->sm[gr->sm_nr].gpc = gpc;
|
||||
gr->sm[gr->sm_nr].tpc = tpc;
|
||||
gr->sm_nr++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gf100_gr_oneinit_tiles(struct gf100_gr *gr)
|
||||
{
|
||||
|
@ -1769,6 +1784,7 @@ gf100_gr_oneinit(struct nvkm_gr *base)
|
|||
|
||||
memset(gr->tile, 0xff, sizeof(gr->tile));
|
||||
gr->func->oneinit_tiles(gr);
|
||||
gr->func->oneinit_sm_id(gr);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -2204,6 +2220,7 @@ gf100_gr_gpccs_ucode = {
|
|||
static const struct gf100_gr_func
|
||||
gf100_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -110,6 +110,12 @@ struct gf100_gr {
|
|||
u8 screen_tile_row_offset;
|
||||
u8 tile[TPC_MAX];
|
||||
|
||||
struct {
|
||||
u8 gpc;
|
||||
u8 tpc;
|
||||
} sm[TPC_MAX];
|
||||
u8 sm_nr;
|
||||
|
||||
struct gf100_gr_data mmio_data[4];
|
||||
struct gf100_gr_mmio mmio_list[4096/8];
|
||||
u32 size;
|
||||
|
@ -125,6 +131,7 @@ void *gf100_gr_dtor(struct nvkm_gr *);
|
|||
struct gf100_gr_func {
|
||||
void (*dtor)(struct gf100_gr *);
|
||||
void (*oneinit_tiles)(struct gf100_gr *);
|
||||
void (*oneinit_sm_id)(struct gf100_gr *);
|
||||
int (*init)(struct gf100_gr *);
|
||||
void (*init_gpc_mmu)(struct gf100_gr *);
|
||||
void (*init_r405a14)(struct gf100_gr *);
|
||||
|
@ -167,6 +174,7 @@ struct gf100_gr_func {
|
|||
|
||||
int gf100_gr_rops(struct gf100_gr *);
|
||||
void gf100_gr_oneinit_tiles(struct gf100_gr *);
|
||||
void gf100_gr_oneinit_sm_id(struct gf100_gr *);
|
||||
int gf100_gr_init(struct gf100_gr *);
|
||||
void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
|
||||
void gf100_gr_init_zcull(struct gf100_gr *);
|
||||
|
@ -195,6 +203,7 @@ void gm107_gr_init_400054(struct gf100_gr *);
|
|||
int gk20a_gr_init(struct gf100_gr *);
|
||||
|
||||
void gm200_gr_oneinit_tiles(struct gf100_gr *);
|
||||
void gm200_gr_oneinit_sm_id(struct gf100_gr *);
|
||||
int gm200_gr_rops(struct gf100_gr *);
|
||||
void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
|
||||
void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
|
||||
|
|
|
@ -115,6 +115,7 @@ gf104_gr_pack_mmio[] = {
|
|||
static const struct gf100_gr_func
|
||||
gf104_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -112,6 +112,7 @@ gf108_gr_init_r405a14(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gf108_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_r405a14 = gf108_gr_init_r405a14,
|
||||
|
|
|
@ -87,6 +87,7 @@ gf110_gr_pack_mmio[] = {
|
|||
static const struct gf100_gr_func
|
||||
gf110_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -151,6 +151,7 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gf117_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -178,6 +178,7 @@ gf119_gr_pack_mmio[] = {
|
|||
static const struct gf100_gr_func
|
||||
gf119_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gf100_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -449,6 +449,7 @@ gk104_gr_gpccs_ucode = {
|
|||
static const struct gf100_gr_func
|
||||
gk104_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -351,6 +351,7 @@ gk110_gr_init_419eb4(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gk110_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -103,6 +103,7 @@ gk110b_gr_pack_mmio[] = {
|
|||
static const struct gf100_gr_func
|
||||
gk110b_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -162,6 +162,7 @@ gk208_gr_gpccs_ucode = {
|
|||
static const struct gf100_gr_func
|
||||
gk208_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gf100_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -283,6 +283,7 @@ gk20a_gr_init(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gk20a_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gk20a_gr_init,
|
||||
.init_zcull = gf117_gr_init_zcull,
|
||||
.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
|
||||
|
|
|
@ -392,6 +392,7 @@ gm107_gr_gpccs_ucode = {
|
|||
static const struct gf100_gr_func
|
||||
gm107_gr = {
|
||||
.oneinit_tiles = gf100_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gf100_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm107_gr_init_gpc_mmu,
|
||||
.init_bios = gm107_gr_init_bios,
|
||||
|
|
|
@ -92,6 +92,13 @@ gm200_gr_tile_map_2_8[] = {
|
|||
0, 1, 1, 0, 0, 1, 1, 0,
|
||||
};
|
||||
|
||||
void
|
||||
gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
|
||||
{
|
||||
/*XXX: There's a different algorithm here I've not yet figured out. */
|
||||
gf100_gr_oneinit_sm_id(gr);
|
||||
}
|
||||
|
||||
void
|
||||
gm200_gr_oneinit_tiles(struct gf100_gr *gr)
|
||||
{
|
||||
|
@ -158,6 +165,7 @@ gm200_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
|
|||
static const struct gf100_gr_func
|
||||
gm200_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_bios = gm107_gr_init_bios,
|
||||
|
|
|
@ -65,6 +65,7 @@ gm20b_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gm20b_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gk20a_gr_init,
|
||||
.init_zcull = gf117_gr_init_zcull,
|
||||
.init_gpc_mmu = gm20b_gr_init_gpc_mmu,
|
||||
|
|
|
@ -65,6 +65,7 @@ gp100_gr_init_rop_active_fbps(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gp100_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -43,6 +43,7 @@ gp102_gr_init_swdx_pes_mask(struct gf100_gr *gr)
|
|||
static const struct gf100_gr_func
|
||||
gp102_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -27,6 +27,7 @@
|
|||
static const struct gf100_gr_func
|
||||
gp104_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
static const struct gf100_gr_func
|
||||
gp107_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
static const struct gf100_gr_func
|
||||
gp10b_gr = {
|
||||
.oneinit_tiles = gm200_gr_oneinit_tiles,
|
||||
.oneinit_sm_id = gm200_gr_oneinit_sm_id,
|
||||
.init = gf100_gr_init,
|
||||
.init_gpc_mmu = gm200_gr_init_gpc_mmu,
|
||||
.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
|
||||
|
|
Загрузка…
Ссылка в новой задаче