drm/amdgpu:implement CONTEXT_CONTROL (v5)
v1: for gfx8, use CONTEXT_CONTROL package to dynamically skip preamble CEIB and other load_xxx command in sequence. v2: support GFX7 as well. remove cntxcntl in compute ring funcs because CPC doesn't support this packet. v3: fix reduntant judgement in cntxcntl. v4: some cleanups, don't change cs_submit() v5: keep old MESA supported & bump up KMS version. Signed-off-by: Monk Liu <Monk.Liu@amd.com> Ack-by: Chunming Zhou <David1.Zhou@amd.com> Reviewed-by: Christian König <christian.koenig@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
Родитель
1db422de72
Коммит
753ad49c9f
|
@ -320,6 +320,7 @@ struct amdgpu_ring_funcs {
|
||||||
void (*begin_use)(struct amdgpu_ring *ring);
|
void (*begin_use)(struct amdgpu_ring *ring);
|
||||||
void (*end_use)(struct amdgpu_ring *ring);
|
void (*end_use)(struct amdgpu_ring *ring);
|
||||||
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
|
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
|
||||||
|
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -966,6 +967,7 @@ struct amdgpu_ctx {
|
||||||
spinlock_t ring_lock;
|
spinlock_t ring_lock;
|
||||||
struct fence **fences;
|
struct fence **fences;
|
||||||
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
|
struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS];
|
||||||
|
bool preamble_presented;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct amdgpu_ctx_mgr {
|
struct amdgpu_ctx_mgr {
|
||||||
|
@ -1231,6 +1233,10 @@ struct amdgpu_cs_parser {
|
||||||
struct amdgpu_bo_list_entry uf_entry;
|
struct amdgpu_bo_list_entry uf_entry;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define AMDGPU_PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */
|
||||||
|
#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */
|
||||||
|
#define AMDGPU_HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */
|
||||||
|
|
||||||
struct amdgpu_job {
|
struct amdgpu_job {
|
||||||
struct amd_sched_job base;
|
struct amd_sched_job base;
|
||||||
struct amdgpu_device *adev;
|
struct amdgpu_device *adev;
|
||||||
|
@ -1239,6 +1245,7 @@ struct amdgpu_job {
|
||||||
struct amdgpu_sync sync;
|
struct amdgpu_sync sync;
|
||||||
struct amdgpu_ib *ibs;
|
struct amdgpu_ib *ibs;
|
||||||
struct fence *fence; /* the hw fence */
|
struct fence *fence; /* the hw fence */
|
||||||
|
uint32_t preamble_status;
|
||||||
uint32_t num_ibs;
|
uint32_t num_ibs;
|
||||||
void *owner;
|
void *owner;
|
||||||
uint64_t fence_ctx; /* the fence_context this job uses */
|
uint64_t fence_ctx; /* the fence_context this job uses */
|
||||||
|
@ -2276,6 +2283,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
|
||||||
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
|
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
|
||||||
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
|
#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
|
||||||
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
|
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
|
||||||
|
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
|
||||||
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
|
||||||
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r))
|
||||||
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o))
|
||||||
|
|
|
@ -850,6 +850,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
|
|
||||||
|
if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) {
|
||||||
|
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT;
|
||||||
|
if (!parser->ctx->preamble_presented) {
|
||||||
|
parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT_FIRST;
|
||||||
|
parser->ctx->preamble_presented = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (parser->job->ring && parser->job->ring != ring)
|
if (parser->job->ring && parser->job->ring != ring)
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
|
|
@ -55,9 +55,10 @@
|
||||||
* - 3.3.0 - Add VM support for UVD on supported hardware.
|
* - 3.3.0 - Add VM support for UVD on supported hardware.
|
||||||
* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
|
* - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS.
|
||||||
* - 3.5.0 - Add support for new UVD_NO_OP register.
|
* - 3.5.0 - Add support for new UVD_NO_OP register.
|
||||||
|
* - 3.6.0 - kmd involves use CONTEXT_CONTROL in ring buffer.
|
||||||
*/
|
*/
|
||||||
#define KMS_DRIVER_MAJOR 3
|
#define KMS_DRIVER_MAJOR 3
|
||||||
#define KMS_DRIVER_MINOR 5
|
#define KMS_DRIVER_MINOR 6
|
||||||
#define KMS_DRIVER_PATCHLEVEL 0
|
#define KMS_DRIVER_PATCHLEVEL 0
|
||||||
|
|
||||||
int amdgpu_vram_limit = 0;
|
int amdgpu_vram_limit = 0;
|
||||||
|
|
|
@ -125,6 +125,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||||
unsigned patch_offset = ~0;
|
unsigned patch_offset = ~0;
|
||||||
struct amdgpu_vm *vm;
|
struct amdgpu_vm *vm;
|
||||||
uint64_t fence_ctx;
|
uint64_t fence_ctx;
|
||||||
|
uint32_t status = 0;
|
||||||
|
|
||||||
unsigned i;
|
unsigned i;
|
||||||
int r = 0;
|
int r = 0;
|
||||||
|
@ -176,11 +177,20 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
||||||
|
|
||||||
skip_preamble = ring->current_ctx == fence_ctx;
|
skip_preamble = ring->current_ctx == fence_ctx;
|
||||||
need_ctx_switch = ring->current_ctx != fence_ctx;
|
need_ctx_switch = ring->current_ctx != fence_ctx;
|
||||||
|
if (job && ring->funcs->emit_cntxcntl) {
|
||||||
|
if (need_ctx_switch)
|
||||||
|
status |= AMDGPU_HAVE_CTX_SWITCH;
|
||||||
|
status |= job->preamble_status;
|
||||||
|
amdgpu_ring_emit_cntxcntl(ring, status);
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < num_ibs; ++i) {
|
for (i = 0; i < num_ibs; ++i) {
|
||||||
ib = &ibs[i];
|
ib = &ibs[i];
|
||||||
|
|
||||||
/* drop preamble IBs if we don't have a context switch */
|
/* drop preamble IBs if we don't have a context switch */
|
||||||
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble)
|
if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
|
||||||
|
skip_preamble &&
|
||||||
|
!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
|
amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0,
|
||||||
|
|
|
@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
|
||||||
amdgpu_ring_write(ring, control);
|
amdgpu_ring_write(ring, control);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
||||||
|
{
|
||||||
|
uint32_t dw2 = 0;
|
||||||
|
|
||||||
|
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
|
||||||
|
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
|
||||||
|
/* set load_global_config & load_global_uconfig */
|
||||||
|
dw2 |= 0x8001;
|
||||||
|
/* set load_cs_sh_regs */
|
||||||
|
dw2 |= 0x01000000;
|
||||||
|
/* set load_per_context_state & load_gfx_sh_regs */
|
||||||
|
dw2 |= 0x10002;
|
||||||
|
}
|
||||||
|
|
||||||
|
amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||||
|
amdgpu_ring_write(ring, dw2);
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* gfx_v7_0_ring_test_ib - basic ring IB test
|
* gfx_v7_0_ring_test_ib - basic ring IB test
|
||||||
*
|
*
|
||||||
|
@ -4938,6 +4957,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
|
||||||
.test_ib = gfx_v7_0_ring_test_ib,
|
.test_ib = gfx_v7_0_ring_test_ib,
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = amdgpu_ring_insert_nop,
|
||||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||||
|
.emit_cntxcntl = gfx_v7_ring_emit_cntxcntl,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
|
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
|
||||||
|
|
|
@ -6076,6 +6076,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring)
|
||||||
amdgpu_ring_write(ring, 0);
|
amdgpu_ring_write(ring, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
|
||||||
|
{
|
||||||
|
uint32_t dw2 = 0;
|
||||||
|
|
||||||
|
dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
|
||||||
|
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
|
||||||
|
/* set load_global_config & load_global_uconfig */
|
||||||
|
dw2 |= 0x8001;
|
||||||
|
/* set load_cs_sh_regs */
|
||||||
|
dw2 |= 0x01000000;
|
||||||
|
/* set load_per_context_state & load_gfx_sh_regs for GFX */
|
||||||
|
dw2 |= 0x10002;
|
||||||
|
|
||||||
|
/* set load_ce_ram if preamble presented */
|
||||||
|
if (AMDGPU_PREAMBLE_IB_PRESENT & flags)
|
||||||
|
dw2 |= 0x10000000;
|
||||||
|
} else {
|
||||||
|
/* still load_ce_ram if this is the first time preamble presented
|
||||||
|
* although there is no context switch happens.
|
||||||
|
*/
|
||||||
|
if (AMDGPU_PREAMBLE_IB_PRESENT_FIRST & flags)
|
||||||
|
dw2 |= 0x10000000;
|
||||||
|
}
|
||||||
|
|
||||||
|
amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
|
||||||
|
amdgpu_ring_write(ring, dw2);
|
||||||
|
amdgpu_ring_write(ring, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
|
static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
|
||||||
enum amdgpu_interrupt_state state)
|
enum amdgpu_interrupt_state state)
|
||||||
{
|
{
|
||||||
|
@ -6258,6 +6287,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
|
||||||
.insert_nop = amdgpu_ring_insert_nop,
|
.insert_nop = amdgpu_ring_insert_nop,
|
||||||
.pad_ib = amdgpu_ring_generic_pad_ib,
|
.pad_ib = amdgpu_ring_generic_pad_ib,
|
||||||
.emit_switch_buffer = gfx_v8_ring_emit_sb,
|
.emit_switch_buffer = gfx_v8_ring_emit_sb,
|
||||||
|
.emit_cntxcntl = gfx_v8_ring_emit_cntxcntl,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
|
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
|
||||||
|
|
Загрузка…
Ссылка в новой задаче