This pull request brings in fragment shader threading and ETC1 support
for vc4. -----BEGIN PGP SIGNATURE----- iQIcBAABCgAGBQJYLM74AAoJELXWKTbR/J7oMhoP/iXpvkEMm5tujsiXLSJwi2yZ Kt3h8lDIuF+p0i0lQDpUj9M5OfG8z0XJwLgmIJzh64uclG70tcfAuQ9WECC0h0ix ZvV+g8OaLURu/kRpLf4MDwxMAVnd0zOxmJ3PugLjmdrVtMR7keF/iedJpwzxNNWi PI0bZ8Vl2V8S5ayerDtDGTNs6yVP3/+SBKHsjPo2tj46FspMBWlzS7tI4YsyCdts gHI+0ICtAK2Aj0imo01UEIriO5AzHxkFk/c848knUYgrK51H0zmqXxWPo2I1NHFG wMHx/gUgasAv9EbSCjfyS/KP+YRKMeOER002/xSOuy3HFoAgOBHKZ54p3zPw9T44 17eDpLU2LUwJYoMSWYGGjeeDsmHCqTDY84K9qqkwhLx8saODtk99P5zXdoMxYgyt a77ibarPAdJH1tC2WOo86yxQzcKaDzjzhhq4mqdBBDksZK0eVYfwDe1PssDZUQQe QAAs+DkygXYI+80rRdKFFeu4ApZZu151zdDKviXE2J7fYT9EQUgRJRlnZJNuYnok N7X0SVoqwGtxipZhIRlE348clEZ3abzdL6k1aUBL96O4tXu95n/BbbCXHXDYvOQf USyhrhTWVY2tSc3OAoXkZJ6bR7Mj+iXoQxV25tZohHoXUFaBu3Bgz5lBhDWBoMeN UK+TE4Wcfsr3EvA39plG =dFfc -----END PGP SIGNATURE----- Merge tag 'drm-vc4-next-2016-11-16' of https://github.com/anholt/linux into drm-next This pull request brings in fragment shader threading and ETC1 support for vc4.
This commit is contained in:
Коммит
b7c0e47d98
|
@ -61,23 +61,28 @@ static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
args->value = V3D_READ(V3D_IDENT0);
|
args->value = V3D_READ(V3D_IDENT0);
|
||||||
pm_runtime_put(&vc4->v3d->pdev->dev);
|
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
|
||||||
|
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
|
||||||
break;
|
break;
|
||||||
case DRM_VC4_PARAM_V3D_IDENT1:
|
case DRM_VC4_PARAM_V3D_IDENT1:
|
||||||
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
|
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
args->value = V3D_READ(V3D_IDENT1);
|
args->value = V3D_READ(V3D_IDENT1);
|
||||||
pm_runtime_put(&vc4->v3d->pdev->dev);
|
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
|
||||||
|
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
|
||||||
break;
|
break;
|
||||||
case DRM_VC4_PARAM_V3D_IDENT2:
|
case DRM_VC4_PARAM_V3D_IDENT2:
|
||||||
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
|
ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
return ret;
|
return ret;
|
||||||
args->value = V3D_READ(V3D_IDENT2);
|
args->value = V3D_READ(V3D_IDENT2);
|
||||||
pm_runtime_put(&vc4->v3d->pdev->dev);
|
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
|
||||||
|
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
|
||||||
break;
|
break;
|
||||||
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
|
case DRM_VC4_PARAM_SUPPORTS_BRANCHES:
|
||||||
|
case DRM_VC4_PARAM_SUPPORTS_ETC1:
|
||||||
|
case DRM_VC4_PARAM_SUPPORTS_THREADED_FS:
|
||||||
args->value = true;
|
args->value = true;
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -381,6 +381,8 @@ struct vc4_validated_shader_info {
|
||||||
|
|
||||||
uint32_t num_uniform_addr_offsets;
|
uint32_t num_uniform_addr_offsets;
|
||||||
uint32_t *uniform_addr_offsets;
|
uint32_t *uniform_addr_offsets;
|
||||||
|
|
||||||
|
bool is_threaded;
|
||||||
};
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -544,14 +544,15 @@ vc4_cl_lookup_bos(struct drm_device *dev,
|
||||||
|
|
||||||
handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
|
handles = drm_malloc_ab(exec->bo_count, sizeof(uint32_t));
|
||||||
if (!handles) {
|
if (!handles) {
|
||||||
|
ret = -ENOMEM;
|
||||||
DRM_ERROR("Failed to allocate incoming GEM handles\n");
|
DRM_ERROR("Failed to allocate incoming GEM handles\n");
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
ret = copy_from_user(handles,
|
if (copy_from_user(handles,
|
||||||
(void __user *)(uintptr_t)args->bo_handles,
|
(void __user *)(uintptr_t)args->bo_handles,
|
||||||
exec->bo_count * sizeof(uint32_t));
|
exec->bo_count * sizeof(uint32_t))) {
|
||||||
if (ret) {
|
ret = -EFAULT;
|
||||||
DRM_ERROR("Failed to copy in GEM handles\n");
|
DRM_ERROR("Failed to copy in GEM handles\n");
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
@ -708,8 +709,10 @@ vc4_complete_exec(struct drm_device *dev, struct vc4_exec_info *exec)
|
||||||
}
|
}
|
||||||
|
|
||||||
mutex_lock(&vc4->power_lock);
|
mutex_lock(&vc4->power_lock);
|
||||||
if (--vc4->power_refcount == 0)
|
if (--vc4->power_refcount == 0) {
|
||||||
pm_runtime_put(&vc4->v3d->pdev->dev);
|
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
|
||||||
|
pm_runtime_put_autosuspend(&vc4->v3d->pdev->dev);
|
||||||
|
}
|
||||||
mutex_unlock(&vc4->power_lock);
|
mutex_unlock(&vc4->power_lock);
|
||||||
|
|
||||||
kfree(exec);
|
kfree(exec);
|
||||||
|
|
|
@ -222,6 +222,8 @@ static int vc4_v3d_bind(struct device *dev, struct device *master, void *data)
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pm_runtime_use_autosuspend(dev);
|
||||||
|
pm_runtime_set_autosuspend_delay(dev, 40); /* a little over 2 frames. */
|
||||||
pm_runtime_enable(dev);
|
pm_runtime_enable(dev);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -644,6 +644,13 @@ reloc_tex(struct vc4_exec_info *exec,
|
||||||
cpp = 1;
|
cpp = 1;
|
||||||
break;
|
break;
|
||||||
case VC4_TEXTURE_TYPE_ETC1:
|
case VC4_TEXTURE_TYPE_ETC1:
|
||||||
|
/* ETC1 is arranged as 64-bit blocks, where each block is 4x4
|
||||||
|
* pixels.
|
||||||
|
*/
|
||||||
|
cpp = 8;
|
||||||
|
width = (width + 3) >> 2;
|
||||||
|
height = (height + 3) >> 2;
|
||||||
|
break;
|
||||||
case VC4_TEXTURE_TYPE_BW1:
|
case VC4_TEXTURE_TYPE_BW1:
|
||||||
case VC4_TEXTURE_TYPE_A4:
|
case VC4_TEXTURE_TYPE_A4:
|
||||||
case VC4_TEXTURE_TYPE_A1:
|
case VC4_TEXTURE_TYPE_A1:
|
||||||
|
@ -782,11 +789,6 @@ validate_gl_shader_rec(struct drm_device *dev,
|
||||||
exec->shader_rec_v += roundup(packet_size, 16);
|
exec->shader_rec_v += roundup(packet_size, 16);
|
||||||
exec->shader_rec_size -= packet_size;
|
exec->shader_rec_size -= packet_size;
|
||||||
|
|
||||||
if (!(*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD)) {
|
|
||||||
DRM_ERROR("Multi-threaded fragment shaders not supported.\n");
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < shader_reloc_count; i++) {
|
for (i = 0; i < shader_reloc_count; i++) {
|
||||||
if (src_handles[i] > exec->bo_count) {
|
if (src_handles[i] > exec->bo_count) {
|
||||||
DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
|
DRM_ERROR("Shader handle %d too big\n", src_handles[i]);
|
||||||
|
@ -803,6 +805,18 @@ validate_gl_shader_rec(struct drm_device *dev,
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (((*(uint16_t *)pkt_u & VC4_SHADER_FLAG_FS_SINGLE_THREAD) == 0) !=
|
||||||
|
to_vc4_bo(&bo[0]->base)->validated_shader->is_threaded) {
|
||||||
|
DRM_ERROR("Thread mode of CL and FS do not match\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (to_vc4_bo(&bo[1]->base)->validated_shader->is_threaded ||
|
||||||
|
to_vc4_bo(&bo[2]->base)->validated_shader->is_threaded) {
|
||||||
|
DRM_ERROR("cs and vs cannot be threaded\n");
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < shader_reloc_count; i++) {
|
for (i = 0; i < shader_reloc_count; i++) {
|
||||||
struct vc4_validated_shader_info *validated_shader;
|
struct vc4_validated_shader_info *validated_shader;
|
||||||
uint32_t o = shader_reloc_offsets[i];
|
uint32_t o = shader_reloc_offsets[i];
|
||||||
|
|
|
@ -83,6 +83,13 @@ struct vc4_shader_validation_state {
|
||||||
* basic blocks.
|
* basic blocks.
|
||||||
*/
|
*/
|
||||||
bool needs_uniform_address_for_loop;
|
bool needs_uniform_address_for_loop;
|
||||||
|
|
||||||
|
/* Set when we find an instruction writing the top half of the
|
||||||
|
* register files. If we allowed writing the unusable regs in
|
||||||
|
* a threaded shader, then the other shader running on our
|
||||||
|
* QPU's clamp validation would be invalid.
|
||||||
|
*/
|
||||||
|
bool all_registers_used;
|
||||||
};
|
};
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
|
@ -118,6 +125,13 @@ raddr_add_a_to_live_reg_index(uint64_t inst)
|
||||||
return ~0;
|
return ~0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
live_reg_is_upper_half(uint32_t lri)
|
||||||
|
{
|
||||||
|
return (lri >= 16 && lri < 32) ||
|
||||||
|
(lri >= 32 + 16 && lri < 32 + 32);
|
||||||
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
is_tmu_submit(uint32_t waddr)
|
is_tmu_submit(uint32_t waddr)
|
||||||
{
|
{
|
||||||
|
@ -390,6 +404,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,
|
||||||
} else {
|
} else {
|
||||||
validation_state->live_immediates[lri] = ~0;
|
validation_state->live_immediates[lri] = ~0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (live_reg_is_upper_half(lri))
|
||||||
|
validation_state->all_registers_used = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (waddr) {
|
switch (waddr) {
|
||||||
|
@ -598,6 +615,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ((raddr_a >= 16 && raddr_a < 32) ||
|
||||||
|
(raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) {
|
||||||
|
validation_state->all_registers_used = true;
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -608,9 +630,7 @@ static bool
|
||||||
vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
|
vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
|
||||||
{
|
{
|
||||||
uint32_t max_branch_target = 0;
|
uint32_t max_branch_target = 0;
|
||||||
bool found_shader_end = false;
|
|
||||||
int ip;
|
int ip;
|
||||||
int shader_end_ip = 0;
|
|
||||||
int last_branch = -2;
|
int last_branch = -2;
|
||||||
|
|
||||||
for (ip = 0; ip < validation_state->max_ip; ip++) {
|
for (ip = 0; ip < validation_state->max_ip; ip++) {
|
||||||
|
@ -621,8 +641,13 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
|
||||||
uint32_t branch_target_ip;
|
uint32_t branch_target_ip;
|
||||||
|
|
||||||
if (sig == QPU_SIG_PROG_END) {
|
if (sig == QPU_SIG_PROG_END) {
|
||||||
shader_end_ip = ip;
|
/* There are two delay slots after program end is
|
||||||
found_shader_end = true;
|
* signaled that are still executed, then we're
|
||||||
|
* finished. validation_state->max_ip is the
|
||||||
|
* instruction after the last valid instruction in the
|
||||||
|
* program.
|
||||||
|
*/
|
||||||
|
validation_state->max_ip = ip + 3;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -676,15 +701,9 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)
|
||||||
}
|
}
|
||||||
set_bit(after_delay_ip, validation_state->branch_targets);
|
set_bit(after_delay_ip, validation_state->branch_targets);
|
||||||
max_branch_target = max(max_branch_target, after_delay_ip);
|
max_branch_target = max(max_branch_target, after_delay_ip);
|
||||||
|
|
||||||
/* There are two delay slots after program end is signaled
|
|
||||||
* that are still executed, then we're finished.
|
|
||||||
*/
|
|
||||||
if (found_shader_end && ip == shader_end_ip + 2)
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (max_branch_target > shader_end_ip) {
|
if (max_branch_target > validation_state->max_ip - 3) {
|
||||||
DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
|
DRM_ERROR("Branch landed after QPU_SIG_PROG_END");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -756,6 +775,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
{
|
{
|
||||||
bool found_shader_end = false;
|
bool found_shader_end = false;
|
||||||
int shader_end_ip = 0;
|
int shader_end_ip = 0;
|
||||||
|
uint32_t last_thread_switch_ip = -3;
|
||||||
uint32_t ip;
|
uint32_t ip;
|
||||||
struct vc4_validated_shader_info *validated_shader = NULL;
|
struct vc4_validated_shader_info *validated_shader = NULL;
|
||||||
struct vc4_shader_validation_state validation_state;
|
struct vc4_shader_validation_state validation_state;
|
||||||
|
@ -788,6 +808,17 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
if (!vc4_handle_branch_target(&validation_state))
|
if (!vc4_handle_branch_target(&validation_state))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
if (ip == last_thread_switch_ip + 3) {
|
||||||
|
/* Reset r0-r3 live clamp data */
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 64; i < LIVE_REG_COUNT; i++) {
|
||||||
|
validation_state.live_min_clamp_offsets[i] = ~0;
|
||||||
|
validation_state.live_max_clamp_regs[i] = false;
|
||||||
|
validation_state.live_immediates[i] = ~0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (sig) {
|
switch (sig) {
|
||||||
case QPU_SIG_NONE:
|
case QPU_SIG_NONE:
|
||||||
case QPU_SIG_WAIT_FOR_SCOREBOARD:
|
case QPU_SIG_WAIT_FOR_SCOREBOARD:
|
||||||
|
@ -797,6 +828,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
case QPU_SIG_LOAD_TMU1:
|
case QPU_SIG_LOAD_TMU1:
|
||||||
case QPU_SIG_PROG_END:
|
case QPU_SIG_PROG_END:
|
||||||
case QPU_SIG_SMALL_IMM:
|
case QPU_SIG_SMALL_IMM:
|
||||||
|
case QPU_SIG_THREAD_SWITCH:
|
||||||
|
case QPU_SIG_LAST_THREAD_SWITCH:
|
||||||
if (!check_instruction_writes(validated_shader,
|
if (!check_instruction_writes(validated_shader,
|
||||||
&validation_state)) {
|
&validation_state)) {
|
||||||
DRM_ERROR("Bad write at ip %d\n", ip);
|
DRM_ERROR("Bad write at ip %d\n", ip);
|
||||||
|
@ -812,6 +845,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
shader_end_ip = ip;
|
shader_end_ip = ip;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (sig == QPU_SIG_THREAD_SWITCH ||
|
||||||
|
sig == QPU_SIG_LAST_THREAD_SWITCH) {
|
||||||
|
validated_shader->is_threaded = true;
|
||||||
|
|
||||||
|
if (ip < last_thread_switch_ip + 3) {
|
||||||
|
DRM_ERROR("Thread switch too soon after "
|
||||||
|
"last switch at ip %d\n", ip);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
last_thread_switch_ip = ip;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case QPU_SIG_LOAD_IMM:
|
case QPU_SIG_LOAD_IMM:
|
||||||
|
@ -826,6 +871,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
if (!check_branch(inst, validated_shader,
|
if (!check_branch(inst, validated_shader,
|
||||||
&validation_state, ip))
|
&validation_state, ip))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
|
if (ip < last_thread_switch_ip + 3) {
|
||||||
|
DRM_ERROR("Branch in thread switch at ip %d",
|
||||||
|
ip);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
DRM_ERROR("Unsupported QPU signal %d at "
|
DRM_ERROR("Unsupported QPU signal %d at "
|
||||||
|
@ -847,6 +899,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Might corrupt other thread */
|
||||||
|
if (validated_shader->is_threaded &&
|
||||||
|
validation_state.all_registers_used) {
|
||||||
|
DRM_ERROR("Shader uses threading, but uses the upper "
|
||||||
|
"half of the registers, too\n");
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
|
||||||
/* If we did a backwards branch and we haven't emitted a uniforms
|
/* If we did a backwards branch and we haven't emitted a uniforms
|
||||||
* reset since then, we still need the uniforms stream to have the
|
* reset since then, we still need the uniforms stream to have the
|
||||||
* uniforms address available so that the backwards branch can do its
|
* uniforms address available so that the backwards branch can do its
|
||||||
|
|
|
@ -286,6 +286,8 @@ struct drm_vc4_get_hang_state {
|
||||||
#define DRM_VC4_PARAM_V3D_IDENT1 1
|
#define DRM_VC4_PARAM_V3D_IDENT1 1
|
||||||
#define DRM_VC4_PARAM_V3D_IDENT2 2
|
#define DRM_VC4_PARAM_V3D_IDENT2 2
|
||||||
#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
|
#define DRM_VC4_PARAM_SUPPORTS_BRANCHES 3
|
||||||
|
#define DRM_VC4_PARAM_SUPPORTS_ETC1 4
|
||||||
|
#define DRM_VC4_PARAM_SUPPORTS_THREADED_FS 5
|
||||||
|
|
||||||
struct drm_vc4_get_param {
|
struct drm_vc4_get_param {
|
||||||
__u32 param;
|
__u32 param;
|
||||||
|
|
Загрузка…
Ссылка в новой задаче