ExecutionTest Programmable Sample Offsets (#4285)

A texture containing an encoded value representing the X,Y location is
used to retrieve values at various samples using the 8 methods that
newly allow programmable values. Each result is compared to what is
expected based on the given coords and offsets.
This commit is contained in:
Greg Roth 2022-02-25 20:00:49 -08:00 коммит произвёл GitHub
Родитель 0a07209e4c
Коммит c44a4a9ff9
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
2 изменённых файлов: 435 добавлений и 0 удалений

Просмотреть файл

@ -581,6 +581,282 @@
]]>
</Shader>
</ShaderOp>
<ShaderOp Name="ProgOffset" PS="PS" VS="VS" CS="CS" AS="AS" MS="MS" TopologyType="TRIANGLE">
<RootSignature>
RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT),
DescriptorTable(SRV(t0,numDescriptors=1), UAV(u0), UAV(u1), UAV(u2)),
StaticSampler(s0, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_MIN_MAG_MIP_POINT),
StaticSampler(s1, addressU = TEXTURE_ADDRESS_WRAP, addressV = TEXTURE_ADDRESS_WRAP, filter = FILTER_COMPARISON_MIN_MAG_MIP_POINT)
</RootSignature>
<Resource Name="VBuffer" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" Topology="TRIANGLELIST">
{ { -1.0f, 1.0f, 0.0f }, { 0.0f, 0.0f } },
{ { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
{ { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
{ { -1.0f, -1.0f, 0.0f }, { 0.0f, 1.0f } },
{ { 1.0f, 1.0f, 0.0f }, { 1.0f, 0.0f } },
{ { 1.0f, -1.0f, 0.0f }, { 1.0f, 1.0f } }
</Resource>
<Resource Name="T0" Dimension="Texture2D" Width="1000" Height="1000" InitialResourceState="COPY_DEST" Init="ByName" Format="R32_FLOAT" />
<Resource Name="RTarget" Dimension="TEXTURE2D" Width="18" Height="18" Format="R32G32B32A32_FLOAT" Flags="ALLOW_RENDER_TARGET" InitialResourceState="COPY_DEST" />
<Resource Name="U0" Dimension="BUFFER" Width="11552"
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
Init="Zero" ReadBack="true" />
<Resource Name="U1" Dimension="BUFFER" Width="11552"
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
Init="Zero" ReadBack="true" />
<Resource Name="U2" Dimension="BUFFER" Width="11552"
Flags="ALLOW_UNORDERED_ACCESS" InitialResourceState="COPY_DEST"
Init="Zero" ReadBack="true" />
<RootValues>
<RootValue HeapName="ResHeap" />
</RootValues>
<DescriptorHeap Name="ResHeap" Type="CBV_SRV_UAV">
<Descriptor Name='T0' Kind='SRV' ResName='T0' />
<Descriptor Name='U0' Kind='UAV' ResName='U0'
NumElements="722" StructureByteStride="16" />
<Descriptor Name='U1' Kind='UAV' ResName='U1'
NumElements="722" StructureByteStride="16" />
<Descriptor Name='U2' Kind='UAV' ResName='U2'
NumElements="722" StructureByteStride="16" />
</DescriptorHeap>
<DescriptorHeap Name="RtvHeap" NumDescriptors="1" Type="RTV">
<Descriptor Name="RTarget" Kind="RTV"/>
</DescriptorHeap>
<InputElements>
<InputElement SemanticName="POSITION" Format="R32G32B32_FLOAT" AlignedByteOffset="0" />
<InputElement SemanticName="TEXCOORD" Format="R32G32_FLOAT" AlignedByteOffset="12" />
</InputElements>
<RenderTargets>
<RenderTarget Name="RTarget"/>
</RenderTargets>
<Shader Name="CS" Target="cs_6_5" EntryPoint="CSMain" Text="@PS"/>
<Shader Name="CS66" Target="cs_6_6" EntryPoint="CSMain" Text="@PS"/>
<Shader Name="CS67" Target="cs_6_7" EntryPoint="CSMain" Text="@PS"/>
<Shader Name="VS" Target="vs_6_5" EntryPoint="VSMain" Text="@PS"/>
<Shader Name="MS" Target="ms_6_5" EntryPoint="MSMain" Text="@PS"/>
<Shader Name="MS66" Target="ms_6_6" EntryPoint="MSMain" Text="@PS"/>
<Shader Name="MS67" Target="ms_6_7" EntryPoint="MSMain" Text="@PS"/>
<Shader Name="MS66D" Target="ms_6_6" EntryPoint="MSMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/>
<Shader Name="MS67D" Target="ms_6_7" EntryPoint="MSMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/>
<Shader Name="AS" Target="as_6_5" EntryPoint="ASMain" Text="@PS"/>
<Shader Name="AS66" Target="as_6_6" EntryPoint="ASMain" Text="@PS"/>
<Shader Name="AS67" Target="as_6_7" EntryPoint="ASMain" Text="@PS"/>
<Shader Name="AS66D" Target="as_6_6" EntryPoint="ASMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/>
<Shader Name="AS67D" Target="as_6_7" EntryPoint="ASMain" Arguments="/DDERIV_MESH_AMP=true" Text="@PS"/>
<Shader Name="PS67" Target="ps_6_7" EntryPoint="PSMain" Text="@PS"/>
<Shader Name="PS" Target="ps_6_5" EntryPoint="PSMain">
<![CDATA[
#define OFFSETS 18
#ifndef DERIV_MESH_AMP
#define DERIV_MESH_AMP false
#endif
struct PSInput {
float4 position : SV_POSITION;
float2 uv : TEXCOORD;
};
Texture2D<float> g_tex : register(t0);
RWStructuredBuffer<uint4> g_bufMain : register(u0);
RWStructuredBuffer<uint4> g_bufMesh : register(u1);
RWStructuredBuffer<uint4> g_bufAmp : register(u2);
SamplerState g_samp : register(s0);
SamplerComparisonState g_sampCmp : register(s1);
// It's not exactly a mask because of sign extension, but the concept
// is that it's treated like a 4-bit signed integer using the lowest 4 bits
int2 Mask4Offset(int2 offset) {
offset.x = (offset.x << 28) >> 28;
offset.y = (offset.y << 28) >> 28;
return offset;
}
// Tests Sample and SampleCmp variants with programmed offsets
uint4 DoTests( int2 coord, int2 offset, bool bDeriv ) {
#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)
// offset for purposes of the compare value, which should only acknowledge the lower 4 bits
int2 moffset = Mask4Offset(offset);
// The index that should correspond to this location is the expected value
float cmp = (coord.y + moffset.y)*1000 + coord.x + moffset.x;
// Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions
float2 fcoord = (coord + 0.5)/1000.0;
if (bDeriv) {
return uint4( g_tex.Sample(g_samp, fcoord, offset),
g_tex.SampleCmp(g_sampCmp, fcoord, cmp, offset),
g_tex.SampleCmpLevel(g_sampCmp, fcoord, cmp, 0, offset),
g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp, offset));
} else {
return uint4( -1, -1,
g_tex.SampleCmpLevel(g_sampCmp, fcoord, cmp, 0, offset),
g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp, offset));
}
#else
// Fake the offset by adding it to the integer coords
coord += Mask4Offset(offset);
// The index that should correspond to this location
float cmp = (coord.y)*1000 + coord.x;
// Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions
float2 fcoord = (coord + 0.5)/1000.0;
if (bDeriv) {
return uint4( g_tex.Sample(g_samp, fcoord),
g_tex.SampleCmp(g_sampCmp, fcoord, cmp),
g_tex.SampleLevel(g_samp, fcoord, 0) == cmp,
g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp));
} else {
return uint4( -1, -1,
g_tex.SampleLevel(g_samp, fcoord, 0) == cmp,
g_tex.SampleCmpLevelZero(g_sampCmp, fcoord, cmp));
}
#endif
}
// Tests Load and Sample* variants with programmed offsets
uint4 DoMoarTests( int2 coord, int2 offset, bool bDeriv ) {
// Load requires a uint3 unlike the rest
uint3 lcoord = uint3(coord, 0);
#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 7)
// Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions
float2 fcoord = (coord + 0.5)/1000.0;
if (bDeriv) {
return uint4( g_tex.Load(lcoord, offset),
g_tex.SampleBias(g_samp, fcoord, -1.0, offset),
g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0, offset),
g_tex.SampleLevel(g_samp, fcoord, 0, offset));
} else {
return uint4( g_tex.Load(lcoord, offset),
-1,
g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0, offset),
g_tex.SampleLevel(g_samp, fcoord, 0, offset));
}
#else
// Fake the offset by adding it to the integer coords
coord += Mask4Offset(offset);
// Load requires a uint3 unlike the rest
lcoord = uint3(coord, 0);
// Samples require float coords 0.0-1.0. Adding 0.5 prevents edge conditions
float2 fcoord = (coord + 0.5)/1000.0;
if (bDeriv) {
return uint4( g_tex.Load(lcoord),
g_tex.SampleBias(g_samp, fcoord, -1.0),
g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0),
g_tex.SampleLevel(g_samp, fcoord, 0));
} else {
return uint4( g_tex.Load(lcoord),
-1,
g_tex.SampleGrad(g_samp, fcoord, 1.0, 1.0),
g_tex.SampleLevel(g_samp, fcoord, 0));
}
#endif
}
static float4 g_Verts[6] = {
{ -1.0f, 1.0f, 0.0f, 1.0f },
{ 1.0f, 1.0f, 0.0f, 1.0f },
{ -1.0f, -1.0f, 0.0f, 1.0f },
{ -1.0f, -1.0f, 0.0f, 1.0f },
{ 1.0f, 1.0f, 0.0f, 1.0f },
{ 1.0f, -1.0f, 0.0f, 1.0f }};
static float2 g_UV[6] = {
{ 0.0f, 0.0f },
{ 1.0f, 0.0f },
{ 0.0f, 1.0f },
{ 0.0f, 1.0f },
{ 1.0f, 0.0f },
{ 1.0f, 1.0f }};
struct Payload {
uint nothing;
};
// Clearly these could be calculated in place, but it's illustrative to write them out
// These values were chosen to represent a selection of locations and the complete gamut of offsets
static const int coords[OFFSETS] = {100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950};
static const int offsets[OFFSETS] = {-9, -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8};
[NumThreads(2, 2, 1)]
void ASMain(uint3 id : SV_GroupThreadId) {
Payload payload;
// Bunching up work to compensate for small numthreads limits
for (int y = 0; y < 9; y++) {
for (int x = 0; x < 9; x++) {
float2 coord = float2(coords[9*id.x+x], coords[9*id.y+y]);
int2 offset = uint2(offsets[9*id.x+x], offsets[9*id.y+y]);
g_bufAmp[2*(18*(9*id.y + y) + 9*id.x + x) + 0] = DoTests(coord, offset, DERIV_MESH_AMP);
g_bufAmp[2*(18*(9*id.y + y) + 9*id.x + x) + 1] = DoMoarTests(coord, offset, DERIV_MESH_AMP);
}
}
payload.nothing = 0;
DispatchMesh(1, 1, 1, payload);
}
[NumThreads(2, 2, 1)]
[OutputTopology("triangle")]
void MSMain(
uint3 id : SV_GroupThreadId,
uint ix : SV_GroupIndex,
in payload Payload payload,
out vertices PSInput verts[6],
out indices uint3 tris[2]) {
SetMeshOutputCounts(6, 2);
// Assign static fullscreen 2 tri quad
if (ix == 0) {
for (uint i = 0; i < 6; ++i) {
verts[i].position = g_Verts[i];
verts[i].uv = g_UV[i];
}
}
if (ix < 2) {
tris[ix&1] = uint3((ix&1)*3, (ix&1)*3 + 1, (ix&1)*3 + 2);
}
// Bunching up work to compensate for small numthreads limits
for (int y = 0; y < 9; y++) {
for (int x = 0; x < 9; x++) {
float2 coord = float2(coords[9*id.x+x], coords[9*id.y+y]);
int2 offset = uint2(offsets[9*id.x+x], offsets[9*id.y+y]);
g_bufMesh[2*(18*(9*id.y + y) + 9*id.x + x) + 0] = DoTests(coord, offset, DERIV_MESH_AMP);
g_bufMesh[2*(18*(9*id.y + y) + 9*id.x + x) + 1] = DoMoarTests(coord, offset, DERIV_MESH_AMP);
}
}
}
PSInput VSMain(float3 position : POSITION, float2 uv : TEXCOORD) {
PSInput result;
result.position = float4(position, 1.0);
result.uv = uv;
return result;
}
float4 PSMain(PSInput input) : SV_TARGET {
uint ix = uint(input.uv.y * OFFSETS)*OFFSETS + uint(input.uv.x*OFFSETS);
// Retrieve coords and offsets based on texcoords
float2 coord = float2(coords[input.uv.x*OFFSETS], coords[input.uv.y*OFFSETS]);
uint2 offset = uint2(offsets[input.uv.x*OFFSETS], offsets[input.uv.y*OFFSETS]);
g_bufMain[2*ix] = DoTests(coord, offset, true);
g_bufMain[2*ix+1] = DoMoarTests(coord, offset, true);
return 1;
}
[NumThreads(OFFSETS, OFFSETS, 1)]
void CSMain(precise uint3 id : SV_GroupThreadId, precise uint ix : SV_GroupIndex) {
int2 coord = int2(coords[id.x], coords[id.y]);
int2 offset = int2(offsets[id.x], offsets[id.y]);
#if __SHADER_TARGET_MAJOR > 6 || (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 6)
g_bufMain[2*ix] = DoTests(coord, offset, true);
g_bufMain[2*ix+1] = DoMoarTests(coord, offset, true);
#else
g_bufMain[2*ix] = DoTests(coord, offset, false);
g_bufMain[2*ix+1] = DoMoarTests(coord, offset, false);
#endif
}
]]>
</Shader>
</ShaderOp>
<ShaderOp Name="OOB" PS="PS" VS="VS">
<RootSignature>RootFlags(ALLOW_INPUT_ASSEMBLER_INPUT_LAYOUT), CBV(b0), DescriptorTable(SRV(t0,numDescriptors=2))</RootSignature>
<Resource Name="CB0" Dimension="BUFFER" InitialResourceState="COPY_DEST" Init="FromBytes" TransitionTo="VERTEX_AND_CONSTANT_BUFFER">

Просмотреть файл

@ -304,6 +304,7 @@ public:
TEST_METHOD(PartialDerivTest);
TEST_METHOD(DerivativesTest);
TEST_METHOD(ComputeSampleTest);
TEST_METHOD(ATOProgOffset);
TEST_METHOD(AtomicsTest);
TEST_METHOD(Atomics64Test);
TEST_METHOD(AtomicsRawHeap64Test);
@ -3640,6 +3641,164 @@ TEST_F(ExecutionTest, ComputeSampleTest) {
}
}
// Used to determine how an out of bounds offset should be converted
#define CLAMPOFFSET(offset) ((offset<<28)>>28)
// Determine if the values in pPixels correspond to the expected locations encoded into a uint
// based on the coordinates and offsets that were provided.
void VerifyProgOffsetResults(unsigned *pPixels, bool bCheckDeriv) {
// Check that each element matches the expected value given the offset
unsigned ix = 0;
int coords[18] = {100, 150, 200, 250, 300, 350, 400, 450, 500, 550, 600, 650, 700, 750, 800, 850, 900, 950};
int offsets[18] = {CLAMPOFFSET(-9), -8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, CLAMPOFFSET(8)};
for (unsigned y = 0; y < _countof(coords); y++) {
for (unsigned x = 0; x < _countof(coords); x++) {
unsigned cmp = (coords[y] + offsets[y])*1000 + coords[x] + offsets[x];
if (bCheckDeriv) {
VERIFY_ARE_EQUAL(pPixels[2*4*ix+0], cmp); // Sample
VERIFY_ARE_EQUAL(pPixels[2*4*ix+1], 1U); // SampleCmp
}
VERIFY_ARE_EQUAL(pPixels[2*4*ix+2], 1U); // SampleCmpLevel
VERIFY_ARE_EQUAL(pPixels[2*4*ix+3], 1U); // SampleCmpLevelZero
VERIFY_ARE_EQUAL(pPixels[2*4*ix+4], cmp); // Load
if (bCheckDeriv) {
VERIFY_ARE_EQUAL(pPixels[2*4*ix+5], cmp); // SampleBias
}
VERIFY_ARE_EQUAL(pPixels[2*4*ix+6], cmp); // SampleGrad
VERIFY_ARE_EQUAL(pPixels[2*4*ix+7], cmp); // SampleLevel
ix++;
}
}
}
// Fills a 1000x1000 float texture with index values increasing in row-major order
// The shader then uses non-immediate offsets extending from -9 to 8 to access these using
// Load, Sample, SampleCmp and variants thereof.
// The test verifies that the locations accessed correspond to where they should.
TEST_F(ExecutionTest, ATOProgOffset) {
WEX::TestExecution::SetVerifyOutput verifySettings(WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures);
CComPtr<IStream> pStream;
ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream);
std::shared_ptr<st::ShaderOpSet> ShaderOpSet =
std::make_shared<st::ShaderOpSet>();
st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get());
st::ShaderOp *pShaderOp = ShaderOpSet->GetShaderOp("ProgOffset");
auto SampleInitFn = [&](LPCSTR Name, std::vector<BYTE> &Data, st::ShaderOp *pShaderOp) {
UNREFERENCED_PARAMETER(pShaderOp);
D3D12_RESOURCE_DESC &texDesc = pShaderOp->GetResourceByName(Name)->Desc;
UINT texWidth = (UINT)texDesc.Width;
UINT texHeight = (UINT)texDesc.Height;
size_t size = sizeof(float) * texWidth * texHeight;
Data.resize(size);
float *pPrimitives = (float *)Data.data();
int ix = 0;
for (size_t j = 0; j < texHeight; ++j) {
for (size_t i = 0; i < texWidth; ++i) {
pPrimitives[ix] = float(ix);
ix++;
}
}
};
bool bTestsSkipped = true;
D3D_SHADER_MODEL TestShaderModels[] = {D3D_SHADER_MODEL_6_5,
D3D_SHADER_MODEL_6_6,
D3D_SHADER_MODEL_6_7};
for (unsigned i = 0; i < _countof(TestShaderModels); i++) {
D3D_SHADER_MODEL sm = TestShaderModels[i];
CComPtr<ID3D12Device> pDevice;
if (!CreateDevice(&pDevice, sm, /*skipUnsupported*/false)) {
LogCommentFmt(L"Device does not support shader model 6.%1u",
((UINT)sm & 0x0f));
break;
}
bool bSupportMSASDeriv = DoesDeviceSupportMeshAmpDerivatives(pDevice);
bool bCheckDerivCS = sm >= D3D_SHADER_MODEL_6_6;
bool bCheckDerivMSAS = bCheckDerivCS && bSupportMSASDeriv;
if (bCheckDerivCS && !bSupportMSASDeriv) {
LogCommentFmt(L"Device does not support derivatives in Mesh and Amplification shaders");
}
switch (sm) {
case D3D_SHADER_MODEL_6_5:
pShaderOp->CS = pShaderOp->GetString("CS");
pShaderOp->PS = pShaderOp->GetString("PS");
pShaderOp->MS = pShaderOp->GetString("MS");
pShaderOp->AS = pShaderOp->GetString("AS");
break;
case D3D_SHADER_MODEL_6_6:
pShaderOp->CS = pShaderOp->GetString("CS66");
pShaderOp->PS = pShaderOp->GetString("PS");
if (bCheckDerivMSAS) {
pShaderOp->MS = pShaderOp->GetString("MS66D");
pShaderOp->AS = pShaderOp->GetString("AS66D");
} else {
pShaderOp->MS = pShaderOp->GetString("MS66");
pShaderOp->AS = pShaderOp->GetString("AS66");
}
break;
case D3D_SHADER_MODEL_6_7:
pShaderOp->CS = pShaderOp->GetString("CS67");
pShaderOp->PS = pShaderOp->GetString("PS67");
if (bCheckDerivMSAS) {
pShaderOp->MS = pShaderOp->GetString("MS67D");
pShaderOp->AS = pShaderOp->GetString("AS67D");
} else {
pShaderOp->MS = pShaderOp->GetString("MS67");
pShaderOp->AS = pShaderOp->GetString("AS67");
}
break;
}
// Test compute shader
std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet);
MappedData data;
test->Test->GetReadBackData("U0", &data);
VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivCS);
// Disable CS so graphics shaders go forward
pShaderOp->CS = nullptr;
if (DoesDeviceSupportMeshShaders(pDevice)) {
test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet);
// PS
test->Test->GetReadBackData("U0", &data);
VerifyProgOffsetResults((UINT*)data.data(), true);
// MS
test->Test->GetReadBackData("U1", &data);
VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivMSAS);
// AS
test->Test->GetReadBackData("U2", &data);
VerifyProgOffsetResults((UINT*)data.data(), bCheckDerivMSAS);
}
// Disable MS so PS goes forward
pShaderOp->MS = nullptr;
test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet);
test->Test->GetReadBackData("U0", &data);
VerifyProgOffsetResults((UINT*)data.data(), true);
bTestsSkipped = false;
}
if (bTestsSkipped) {
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
}
}
// Executing a simple binop to verify shadel model 6.1 support; runs with
// ShaderModel61.CoreRequirement
TEST_F(ExecutionTest, BasicShaderModel61) {