[spirv] Add mesh/amplification shader tests for use of buffers and fn calls (#2390)

- Also cleans up SPIR-V.rst documentation issues.
This commit is contained in:
Sahil Parmar 2019-08-06 08:12:43 -07:00 коммит произвёл Ehsan
Родитель c8f7a6c970
Коммит 83c1bf8e92
4 изменённых файлов: 383 добавлений и 31 удалений

Просмотреть файл

@ -3104,14 +3104,14 @@ Callable Stage
Mesh and Amplification Shaders
------------------------------
DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
Amplification shaders corresponds to Task Shaders in Vulkan.
| DirectX adds 2 new shader stages for using MeshShading pipeline namely Mesh and Amplification.
| Amplification shaders corresponds to Task Shaders in Vulkan.
|
| Refer to following HLSL and SPIR-V specs for details:
| https://docs.microsoft.com/<TBD>
| https://github.com/KhronosGroup/SPIRV-Registry/blob/master/extensions/NV/SPV_NV_mesh_shader.asciidoc
This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
|
| This section describes how Mesh and Amplification shaders are translated to SPIR-V for Vulkan.
Entry Point Attributes
~~~~~~~~~~~~~~~~~~~~~~
@ -3120,18 +3120,19 @@ shaders and are translated to SPIR-V execution modes according to the table belo
.. table:: Mapping from HLSL attribute to SPIR-V execution mode
+--------------------+----------------+-------------------------+
+-------------------+--------------------+-------------------------+
| HLSL Attribute | Value | SPIR-V Execution Mode |
+====================+================+=========================+
| | ``point`` | ``OutputPoints`` |
| +----------------+-------------------------+
| ``outputtopology`` | ``line`` | ``OutputLinesNV`` |
| (Mesh shader) +----------------+-------------------------+
+===================+====================+=========================+
|``outputtopology`` | ``point`` | ``OutputPoints`` |
| +--------------------+-------------------------+
|``(Mesh shader)`` | ``line`` | ``OutputLinesNV`` |
| +--------------------+-------------------------+
| | ``triangle`` | ``OutputTrianglesNV`` |
+--------------------+----------------+-------------------------+
+-------------------+--------------------+-------------------------+
| ``numthreads`` | ``X, Y, Z`` | ``LocalSize X, Y, Z`` |
| | (X*Y*Z <= 128) | |
+--------------------+----------------+-------------------------+
| | | |
| | ``(X*Y*Z <= 128)`` | |
+-------------------+--------------------+-------------------------+
Intrinsics
~~~~~~~~~~
@ -3140,24 +3141,29 @@ and are translated to SPIR-V intrinsics according to the table below:
.. table:: Mapping from HLSL intrinsics to SPIR-V intrinsics
+-------------------------+--------------------+-----------------------------------------+
+---------------------------+--------------------+-----------------------------------------+
| HLSL Intrinsic | Parameters | SPIR-V Intrinsic |
+=========================+====================+=========================================+
+===========================+====================+=========================================+
| ``SetMeshOutputCounts`` | ``numVertices`` | ``PrimitiveCountNV numPrimitives`` |
| (Mesh shader) | ``numPrimitives`` | |
+-------------------------+--------------------+-----------------------------------------+
| | ``ThreadX`` | |
| ``DispatchMesh`` | ``ThreadY`` | ``OpControlBarrier`` |
| (Amplification shader) | ``ThreadZ`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
| | | |
| ``(Mesh shader)`` | ``numPrimitives`` | |
+---------------------------+--------------------+-----------------------------------------+
| ``DispatchMesh`` | ``ThreadX`` | ``OpControlBarrier`` |
| | | |
| ``(Amplification shader)``| ``ThreadY`` | ``TaskCountNV ThreadX*ThreadY*ThreadZ`` |
| | | |
| | ``ThreadZ`` | |
| | | |
| | ``MeshPayload`` | |
+-------------------------+--------------------+-----------------------------------------+
+---------------------------+--------------------+-----------------------------------------+
| *For DispatchMesh intrinsic, we also emit MeshPayload as output block with PerTaskNV decoration
| Note : For ``DispatchMesh`` intrinsic, we also emit ``MeshPayload`` as output block with ``PerTaskNV`` decoration
Mesh Interface Variables
~~~~~~~~~~~~~~~~~~~~~~~~
Interface variables are defined for Mesh shaders using HLSL modifiers.
Following table gives high level overview of the mapping:
| Interface variables are defined for Mesh shaders using HLSL modifiers.
| Following table gives high level overview of the mapping:
|
.. table:: Mapping from HLSL modifiers to SPIR-V definitions
@ -3165,9 +3171,11 @@ Following table gives high level overview of the mapping:
| HLSL modifier | SPIR-V definition |
+=================+=========================================================================+
| ``indices`` | Maps to SPIR-V intrinsic ``PrimitiveIndicesNV`` |
| | |
| | Defines SPIR-V Execution Mode ``OutputPrimitivesNV <array-size>`` |
+-----------------+-------------------------------------------------------------------------+
| ``vertices`` | Maps to per-vertex out attributes |
| | |
| | Defines existing SPIR-V Execution Mode ``OutputVertices <array-size>`` |
+-----------------+-------------------------------------------------------------------------+
| ``primitives`` | Maps to per-primitive out attributes with ``PerPrimitiveNV`` decoration |

Просмотреть файл

@ -0,0 +1,202 @@
// Run: %dxc -T ms_6_5 -E main
// CHECK: OpCapability MeshShadingNV
// CHECK: OpExtension "SPV_NV_mesh_shader"
// CHECK: OpEntryPoint MeshNV %main "main"
// CHECK: OpName %UserVertex "UserVertex"
struct UserVertex {
// CHECK: OpMemberName %UserVertex 0 "position"
// CHECK: OpMemberName %UserVertex 1 "texcoord"
// CHECK: OpMemberName %UserVertex 2 "color"
float3 position;
float2 texcoord;
float3 color;
};
// CHECK: OpName %Mesh "Mesh"
struct Mesh {
// CHECK: OpMemberName %Mesh 0 "firstSubmesh"
// CHECK: OpMemberName %Mesh 1 "submeshCount"
// CHECK: OpMemberName %Mesh 2 "dummy"
uint firstSubmesh;
uint submeshCount;
uint dummy[2];
};
// CHECK: OpName %SubMesh "SubMesh"
struct SubMesh {
// CHECK: OpMemberName %SubMesh 0 "vertexCount"
// CHECK: OpMemberName %SubMesh 1 "vertexOffset"
// CHECK: OpMemberName %SubMesh 2 "primitiveCount"
// CHECK: OpMemberName %SubMesh 3 "indexOffset"
// CHECK: OpMemberName %SubMesh 4 "boundingBox"
uint vertexCount;
uint vertexOffset;
uint primitiveCount;
uint indexOffset;
float4 boundingBox[8];
};
// CHECK: OpDecorate %userVertices DescriptorSet 0
// CHECK: OpDecorate %userVertices Binding 0
// CHECK: OpDecorate %userIndices DescriptorSet 0
// CHECK: OpDecorate %userIndices Binding 1
// CHECK: OpDecorate %meshes DescriptorSet 0
// CHECK: OpDecorate %meshes Binding 2
// CHECK: OpDecorate %submeshes DescriptorSet 0
// CHECK: OpDecorate %submeshes Binding 3
// CHECK: OpDecorate %UBO DescriptorSet 0
// CHECK: OpDecorate %UBO Binding 4
// CHECK: OpMemberDecorate %UserVertex 0 Offset 0
// CHECK: OpMemberDecorate %UserVertex 1 Offset 16
// CHECK: OpMemberDecorate %UserVertex 2 Offset 32
// CHECK: OpDecorate %_runtimearr_UserVertex ArrayStride 48
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_UserVertex 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_UserVertex BufferBlock
// CHECK: OpDecorate %_runtimearr_uint ArrayStride 4
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_uint 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_uint BufferBlock
// CHECK: OpMemberDecorate %Mesh 0 Offset 0
// CHECK: OpMemberDecorate %Mesh 1 Offset 4
// CHECK: OpMemberDecorate %Mesh 2 Offset 8
// CHECK: OpDecorate %_runtimearr_Mesh ArrayStride 16
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_Mesh 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_Mesh BufferBlock
// CHECK: OpMemberDecorate %SubMesh 0 Offset 0
// CHECK: OpMemberDecorate %SubMesh 1 Offset 4
// CHECK: OpMemberDecorate %SubMesh 2 Offset 8
// CHECK: OpMemberDecorate %SubMesh 3 Offset 12
// CHECK: OpMemberDecorate %SubMesh 4 Offset 16
// CHECK: OpDecorate %_runtimearr_SubMesh ArrayStride 144
// CHECK: OpMemberDecorate %type_RWStructuredBuffer_SubMesh 0 Offset 0
// CHECK: OpDecorate %type_RWStructuredBuffer_SubMesh BufferBlock
// CHECK: OpMemberDecorate %type_UBO 0 Offset 0
// CHECK: OpMemberDecorate %type_UBO 0 MatrixStride 16
// CHECK: OpMemberDecorate %type_UBO 0 ColMajor
// CHECK: OpDecorate %type_UBO Block
// CHECK: %UserVertex = OpTypeStruct %v3float %v2float %v3float
// CHECK: %_runtimearr_UserVertex = OpTypeRuntimeArray %UserVertex
// CHECK: %type_RWStructuredBuffer_UserVertex = OpTypeStruct %_runtimearr_UserVertex
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_UserVertex = OpTypePointer Uniform %type_RWStructuredBuffer_UserVertex
[[vk::binding(0, 0)]]
RWStructuredBuffer<UserVertex> userVertices;
// CHECK: %_runtimearr_uint = OpTypeRuntimeArray %uint
// CHECK: %type_RWStructuredBuffer_uint = OpTypeStruct %_runtimearr_uint
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_uint = OpTypePointer Uniform %type_RWStructuredBuffer_uint
[[vk::binding(1, 0)]]
RWStructuredBuffer<uint> userIndices;
// CHECK: %_arr_uint_uint_2 = OpTypeArray %uint %uint_2
// CHECK: %Mesh = OpTypeStruct %uint %uint %_arr_uint_uint_2
// CHECK: %_runtimearr_Mesh = OpTypeRuntimeArray %Mesh
// CHECK: %type_RWStructuredBuffer_Mesh = OpTypeStruct %_runtimearr_Mesh
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_Mesh = OpTypePointer Uniform %type_RWStructuredBuffer_Mesh
[[vk::binding(2, 0)]]
RWStructuredBuffer<Mesh> meshes;
// CHECK: %uint_8 = OpConstant %uint 8
// CHECK: %v4float = OpTypeVector %float 4
// CHECK: %_arr_v4float_uint_8 = OpTypeArray %v4float %uint_8
// CHECK: %SubMesh = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8
// CHECK: %_runtimearr_SubMesh = OpTypeRuntimeArray %SubMesh
// CHECK: %type_RWStructuredBuffer_SubMesh = OpTypeStruct %_runtimearr_SubMesh
// CHECK: %_ptr_Uniform_type_RWStructuredBuffer_SubMesh = OpTypePointer Uniform %type_RWStructuredBuffer_SubMesh
[[vk::binding(3, 0)]]
RWStructuredBuffer<SubMesh> submeshes;
// CHECK: %mat4v4float = OpTypeMatrix %v4float 4
// CHECK: %type_UBO = OpTypeStruct %mat4v4float
// CHECK: %_ptr_Uniform_type_UBO = OpTypePointer Uniform %type_UBO
[[vk::binding(4, 0)]]
cbuffer UBO {
row_major float4x4 mvp;
}
struct PerVertex {
float4 position : SV_Position;
float2 texcoord : TEXCOORD;
float3 color : COLOR;
};
struct PerPrimitive {
float4 primcolor : PCOLOR;
};
struct SubMeshes {
uint submeshID[256] : SUBMESH;
};
static const uint vertsPerPrim = 3U;
// CHECK: %userVertices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_UserVertex Uniform
// CHECK: %userIndices = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_uint Uniform
// CHECK: %meshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_Mesh Uniform
// CHECK: %submeshes = OpVariable %_ptr_Uniform_type_RWStructuredBuffer_SubMesh Uniform
// CHECK: %UBO = OpVariable %_ptr_Uniform_type_UBO Uniform
[outputtopology("triangle")]
[numthreads(32, 1, 1)]
void main(
out indices uint3 primIndices[128],
out vertices PerVertex verts[128],
out primitives PerPrimitive prims[128],
in payload SubMeshes taskmem,
in uint gid : SV_GroupID,
in uint tid : SV_GroupThreadID
)
{
uint task = taskmem.submeshID[gid];
// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
// CHECK: OpAccessChain %_ptr_Uniform_SubMesh %submeshes %int_0 [[task:%\d+]]
// CHECK: OpStore %submesh [[submeshVal:%\d+]]
SubMesh submesh = submeshes[task];
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_0
uint numPackedVertices = submesh.vertexCount;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_2
uint numPackedPrimitives = submesh.primitiveCount;
SetMeshOutputCounts(numPackedVertices, numPackedPrimitives);
for (uint i = 0U; i < numPackedVertices; i += 32U) {
uint vid = i + tid;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1
uint svid = vid + submesh.vertexOffset;
if (vid >= numPackedVertices) continue;
// CHECK: OpAccessChain %_ptr_Uniform_v2float %userVertices %int_0 [[svid_1:%\d+]] %int_1
verts[vid].texcoord = userVertices[svid].texcoord;
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_2:%\d+]] %int_2
verts[vid].color = userVertices[svid].color;
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[svid_0:%\d+]] %int_0
float3 position = userVertices[svid].position;
// CHECK: OpAccessChain %_ptr_Uniform_mat4v4float %UBO %int_0
verts[vid].position = mul(mvp, float4(position, 1.0));
}
GroupMemoryBarrier();
for (uint j = 0U; j < numPackedPrimitives; j += 32U) {
uint pid = j + tid;
uint didxoff = vertsPerPrim * pid;
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_3
uint sidxoff = submesh.indexOffset + didxoff;
if (pid >= numPackedPrimitives) continue;
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_0:%\d+]]
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_1:%\d+]]
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[sidxoff_2:%\d+]]
primIndices[pid] = uint3(userIndices[sidxoff], userIndices[sidxoff+1], userIndices[sidxoff+2]);
// CHECK: OpAccessChain %_ptr_Function_uint %submesh %int_1
// CHECK: OpAccessChain %_ptr_Uniform_uint %userIndices %int_0 [[ind:%\d+]]
uint providx = submesh.vertexOffset + userIndices[sidxoff + vertsPerPrim - 1U];
// CHECK: OpAccessChain %_ptr_Uniform_v3float %userVertices %int_0 [[providx:%\d+]] %int_2
prims[pid].primcolor = float4(userVertices[providx].color, 1.0);
}
}

Просмотреть файл

@ -0,0 +1,135 @@
// Run: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1
// CHECK: OpCapability MeshShadingNV
// CHECK: OpExtension "SPV_NV_mesh_shader"
// CHECK: OpEntryPoint TaskNV %main "main"
struct SubMesh {
uint vertexCount;
uint vertexOffset;
uint primitiveCount;
uint indexOffset;
float4 boundingBox[8];
};
struct Mesh {
uint firstSubmesh;
uint submeshCount;
uint dummy[2];
};
struct UserVertex {
float3 position;
float2 texcoord;
float3 color;
};
[[vk::binding(0, 0)]]
RWStructuredBuffer<UserVertex> userVertices;
[[vk::binding(1, 0)]]
RWStructuredBuffer<uint> userIndices;
[[vk::binding(2, 0)]]
RWStructuredBuffer<Mesh> meshes;
[[vk::binding(3, 0)]]
RWStructuredBuffer<SubMesh> submeshes;
[[vk::binding(4, 0)]]
cbuffer UBO {
row_major float4x4 mvp;
}
groupshared uint passedSubmeshes;
struct SubMeshes {
uint submeshID[256] : SUBMESH;
};
groupshared SubMeshes sharedSubMeshes;
// CHECK: %_arr_v4float_uint_8_0 = OpTypeArray %v4float %uint_8
// CHECK: %SubMesh_0 = OpTypeStruct %uint %uint %uint %uint %_arr_v4float_uint_8_0
// CHECK: %_ptr_Function_SubMesh_0 = OpTypePointer Function %SubMesh_0
// CHECK: [[funcType:%\d+]] = OpTypeFunction %bool %_ptr_Function_SubMesh_0
bool TestSubmesh(SubMesh submesh) {
uint clip = 0x0U;
for (uint bbv = 0U ; bbv < 8U; bbv++) {
float4 pos= mul(mvp, submesh.boundingBox[bbv]);
if (pos.x <= pos.w) clip |= 0x1U;
if (pos.y <= 0.3333 * pos.w) clip |= 0x2U;
if (pos.z <= pos.w) clip |= 0x4U;
if (pos.x >= -pos.w) clip |= 0x8U;
if (pos.y >= -pos.w) clip |= 0x10U;
if (pos.z >= -pos.w) clip |= 0x20U;
}
return (clip == 0x3FU);
}
[numthreads(32, 1, 1)]
void main(
in uint tid : SV_GroupThreadID,
in uint mid : SV_GroupID
)
{
uint firstSubmesh = meshes[mid].firstSubmesh;
uint submeshCount = meshes[mid].submeshCount;
passedSubmeshes = 0U;
GroupMemoryBarrier();
for (uint i = 0U; i < submeshCount; i += 32U) {
uint smid = firstSubmesh + i + tid;
if (smid >= firstSubmesh + submeshCount) continue;
// CHECK: %submesh = OpVariable %_ptr_Function_SubMesh_0 Function
// CHECK: %passed = OpVariable %_ptr_Function_bool Function
// CHECK: %param_var_submesh = OpVariable %_ptr_Function_SubMesh_0 Function
SubMesh submesh = submeshes[smid];
bool passed = true;
// CHECK: [[submeshValue:%\d+]] = OpLoad %SubMesh_0 %submesh
// CHECK: OpStore %param_var_submesh [[submeshValue]]
// CHECK: [[rv:%\d+]] = OpFunctionCall %bool %TestSubmesh %param_var_submesh
// CHECK: [[cond:%\d+]] = OpLogicalNot %bool [[rv]]
// CHECK: OpSelectionMerge %if_merge_0 None
// CHECK: OpBranchConditional [[cond]] %if_true_0 %if_merge_0
// CHECK: %if_true_0 = OpLabel
// CHECK: OpStore %passed %false
// CHECK: OpBranch %if_merge_0
// CHECK: %if_merge_0 = OpLabel
if (!TestSubmesh(submesh)) passed = false;
if (passed) {
uint ballot = WaveActiveBallot(passed).x;
uint laneMaskLT = (1 << WaveGetLaneIndex()) - 1;
uint lowerThreads = ballot & laneMaskLT;
uint slot = passedSubmeshes + WavePrefixCountBits(passed);
sharedSubMeshes.submeshID[slot] = smid;
if (lowerThreads == 0U) {
passedSubmeshes += WaveActiveCountBits(passed);
}
}
GroupMemoryBarrier();
}
DispatchMesh(passedSubmeshes, 1, 1, sharedSubMeshes);
}
/* bool TestSubmesh(SubMesh submesh) { ... } */
// CHECK: %TestSubmesh = OpFunction %bool None [[funcType]]
// CHECK: %submesh_0 = OpFunctionParameter %_ptr_Function_SubMesh_0
// CHECK: %bb_entry_0 = OpLabel
// CHECK: %clip = OpVariable %_ptr_Function_uint Function
// CHECK: %bbv = OpVariable %_ptr_Function_uint Function
// CHECK: %pos = OpVariable %_ptr_Function_v4float Function
// CHECK: %for_check_0 = OpLabel
// CHECK: %for_body_0 = OpLabel
// CHECK: %for_merge_0 = OpLabel
// CHECK: [[clipValue:%\d+]] = OpLoad %uint %clip
// CHECK: [[retValue:%\d+]] = OpIEqual %bool [[clipValue]] %uint_63
// CHECK: OpReturnValue [[retValue]]
// CHECK: OpFunctionEnd

Просмотреть файл

@ -2044,6 +2044,9 @@ TEST_F(FileTest, MeshShadingNVMeshLine) {
TEST_F(FileTest, MeshShadingNVMeshPoint) {
runFileTest("meshshading.nv.point.mesh.hlsl");
}
TEST_F(FileTest, MeshShadingNVMeshBuffer) {
runFileTest("meshshading.nv.buffer.mesh.hlsl");
}
TEST_F(FileTest, MeshShadingNVMeshError1) {
runFileTest("meshshading.nv.error1.mesh.hlsl", Expect::Failure);
}
@ -2089,6 +2092,10 @@ TEST_F(FileTest, MeshShadingNVMeshError14) {
TEST_F(FileTest, MeshShadingNVAmplification) {
runFileTest("meshshading.nv.amplification.hlsl");
}
TEST_F(FileTest, MeshShadingNVAmplificationFunCall) {
useVulkan1p1();
runFileTest("meshshading.nv.fncall.amplification.hlsl");
}
TEST_F(FileTest, MeshShadingNVAmplificationError1) {
runFileTest("meshshading.nv.error1.amplification.hlsl", Expect::Failure);
}