Fixes WaveIntrinsicsInPSTest by using right intrinsics to find active lanes (#102)

The prior version was using WaveGetLaneCount to determine active lanes.
This is incorrect for the following reasons.

First, it includes inactive lanes on the wave. This was not the intent.

Second, it compared this number with (presumed) active lanes in a quad.

The test is fixed to sum active lanes across a wave or quad instead.
This commit is contained in:
Marcelo Lopez Ruiz 2017-03-03 15:20:15 -08:00 коммит произвёл GitHub
Родитель d5dbc051f3
Коммит fa49ef58bb
1 изменённых файлов: 46 добавлений и 58 удалений

Просмотреть файл

@ -695,6 +695,35 @@ public:
const float ExecutionTest::ClearColor[4] = { 0.0f, 0.2f, 0.4f, 1.0f };
#define WAVE_INTRINSIC_DXBC_GUARD \
"#ifdef USING_DXBC\r\n" \
"uint WaveGetLaneIndex() { return 1; }\r\n" \
"uint WaveReadLaneFirst(uint u) { return u; }\r\n" \
"bool WaveIsFirstLane() { return true; }\r\n" \
"uint WaveGetLaneCount() { return 1; }\r\n" \
"uint WaveReadLaneAt(uint n, uint u) { return u; }\r\n" \
"bool WaveActiveAnyTrue(bool b) { return b; }\r\n" \
"bool WaveActiveAllTrue(bool b) { return false; }\r\n" \
"uint WaveActiveAllEqual(uint u) { return u; }\r\n" \
"uint4 WaveActiveBallot(bool b) { return 1; }\r\n" \
"uint WaveActiveCountBits(uint u) { return 1; }\r\n" \
"uint WaveActiveSum(uint u) { return 1; }\r\n" \
"uint WaveActiveProduct(uint u) { return 1; }\r\n" \
"uint WaveActiveBitAnd(uint u) { return 1; }\r\n" \
"uint WaveActiveBitOr(uint u) { return 1; }\r\n" \
"uint WaveActiveBitXor(uint u) { return 1; }\r\n" \
"uint WaveActiveMin(uint u) { return 1; }\r\n" \
"uint WaveActiveMax(uint u) { return 1; }\r\n" \
"uint WavePrefixCountBits(uint u) { return 1; }\r\n" \
"uint WavePrefixSum(uint u) { return 1; }\r\n" \
"uint WavePrefixProduct(uint u) { return 1; }\r\n" \
"uint QuadReadLaneAt(uint a, uint u) { return 1; }\r\n" \
"uint QuadReadAcrossX(uint u) { return 1; }\r\n" \
"uint QuadReadAcrossY(uint u) { return 1; }\r\n" \
"uint QuadReadAcrossDiagonal(uint u) { return 1; }\r\n" \
"#endif\r\n"
static void SetupComputeValuePattern(std::vector<uint32_t> &values, size_t count) {
values.resize(count); // one element per dispatch group, in bytes
for (size_t i = 0; i < count; ++i) {
@ -1089,28 +1118,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) {
int32_t i_pfSum, i_pfProd;
};
static const char pShader[] =
"#ifdef USING_DXBC\r\n"
"uint WaveGetLaneIndex() { return 1; }\r\n"
"uint WaveReadLaneFirst(uint u) { return u; }\r\n"
"bool WaveIsFirstLane() { return true; }\r\n"
"uint WaveGetLaneCount() { return 1; }\r\n"
"uint WaveReadLaneAt(uint n, uint u) { return u; }\r\n"
"bool WaveActiveAnyTrue(bool b) { return b; }\r\n"
"bool WaveActiveAllTrue(bool b) { return false; }\r\n"
"uint WaveActiveAllEqual(uint u) { return u; }\r\n"
"uint4 WaveActiveBallot(bool b) { return 1; }\r\n"
"uint WaveActiveCountBits(uint u) { return 1; }\r\n"
"uint WaveActiveSum(uint u) { return 1; }\r\n"
"uint WaveActiveProduct(uint u) { return 1; }\r\n"
"uint WaveActiveBitAnd(uint u) { return 1; }\r\n"
"uint WaveActiveBitOr(uint u) { return 1; }\r\n"
"uint WaveActiveBitXor(uint u) { return 1; }\r\n"
"uint WaveActiveMin(uint u) { return 1; }\r\n"
"uint WaveActiveMax(uint u) { return 1; }\r\n"
"uint WavePrefixCountBits(uint u) { return 1; }\r\n"
"uint WavePrefixSum(uint u) { return 1; }\r\n"
"uint WavePrefixProduct(uint u) { return 1; }\r\n"
"#endif\r\n"
WAVE_INTRINSIC_DXBC_GUARD
"struct PerThreadData {\r\n"
" uint id, flags, laneIndex, laneCount, firstLaneId, preds, firstlaneX, lane1X;\r\n"
" uint allBC, allSum, allProd, allAND, allOR, allXOR, allMin, allMax;\r\n"
@ -1444,7 +1452,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
XMFLOAT4 position;
uint32_t id, flags, laneIndex, laneCount, firstLaneId, sum1;
uint32_t id0, id1, id2, id3;
uint32_t acrossX, acrossY, acrossDiag;
uint32_t acrossX, acrossY, acrossDiag, quadActiveCount;
};
const UINT RTWidth = 128;
@ -1452,32 +1460,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
// Shaders.
static const char pShaders[] =
"#ifdef USING_DXBC\r\n"
"uint WaveGetLaneIndex() { return 1; }\r\n"
"uint WaveReadLaneFirst(uint u) { return u; }\r\n"
"bool WaveIsFirstLane() { return true; }\r\n"
"uint WaveGetLaneCount() { return 1; }\r\n"
"uint WaveReadLaneAt(uint n, uint u) { return u; }\r\n"
"bool WaveActiveAnyTrue(bool b) { return b; }\r\n"
"bool WaveActiveAllTrue(bool b) { return false; }\r\n"
"uint WaveActiveAllEqual(uint u) { return u; }\r\n"
"uint4 WaveActiveBallot(bool b) { return 1; }\r\n"
"uint WaveActiveCountBits(uint u) { return 1; }\r\n"
"uint WaveActiveSum(uint u) { return 1; }\r\n"
"uint WaveActiveProduct(uint u) { return 1; }\r\n"
"uint WaveActiveBitAnd(uint u) { return 1; }\r\n"
"uint WaveActiveBitOr(uint u) { return 1; }\r\n"
"uint WaveActiveBitXor(uint u) { return 1; }\r\n"
"uint WaveActiveMin(uint u) { return 1; }\r\n"
"uint WaveActiveMax(uint u) { return 1; }\r\n"
"uint WavePrefixCountBits(uint u) { return 1; }\r\n"
"uint WavePrefixSum(uint u) { return 1; }\r\n"
"uint WavePrefixProduct(uint u) { return 1; }\r\n"
"uint QuadReadLaneAt(uint a, uint u) { return 1; }\r\n"
"uint QuadReadAcrossX(uint u) { return 1; }\r\n"
"uint QuadReadAcrossY(uint u) { return 1; }\r\n"
"uint QuadReadAcrossDiagonal(uint u) { return 1; }\r\n"
"#endif\r\n"
WAVE_INTRINSIC_DXBC_GUARD
"struct PSInput {\r\n"
" float4 position : SV_POSITION;\r\n"
"};\r\n\r\n"
@ -1490,11 +1473,11 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
"typedef uint uint32_t;\r\n"
"uint pos_to_id(float4 pos) { return pos.x * 128 + pos.y; }\r\n"
"struct PerPixelData {\r\n"
"float4 position;\r\n"
" uint32_t id, flags, laneIndex, laneCount, firstLaneId, sum1; \r\n"
" float4 position;\r\n"
" uint32_t id, flags, laneIndex, laneCount, firstLaneId, sum1;\r\n"
" uint32_t id0, id1, id2, id3;\r\n"
" uint32_t acrossX, acrossY, acrossDiag; \r\n"
" };\r\n"
" uint32_t acrossX, acrossY, acrossDiag, quadActiveCount;\r\n"
"};\r\n"
"AppendStructuredBuffer<PerPixelData> g_sb : register(u1);\r\n"
"float4 PSMain(PSInput input) : SV_TARGET {\r\n"
" uint one = 1;\r\n"
@ -1514,6 +1497,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
" d.acrossX = QuadReadAcrossX(d.id);\r\n"
" d.acrossY = QuadReadAcrossY(d.id);\r\n"
" d.acrossDiag = QuadReadAcrossDiagonal(d.id);\r\n"
" d.quadActiveCount = one + QuadReadAcrossX(one) + QuadReadAcrossY(one) + QuadReadAcrossDiagonal(one);\r\n"
" g_sb.Append(d);\r\n"
" return 1;\r\n"
"};\r\n";
@ -1590,6 +1574,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
std::vector<PerPixelData> values;
values.resize(RTWidth * RTHeight * 2);
UINT valueSizeInBytes = values.size() * sizeof(PerPixelData);
memset(values.data(), 0, valueSizeInBytes);
CComPtr<ID3D12Resource> pUavResource;
CComPtr<ID3D12Resource> pUavReadBuffer;
CComPtr<ID3D12Resource> pUploadResource;
@ -1659,21 +1644,24 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
if (dxbc)
return;
uint32_t maxActiveLaneCount = 0;
uint32_t maxLaneCount = 0;
for (uint32_t i = 0; i < appendCount; ++i) {
maxActiveLaneCount = std::max(maxActiveLaneCount, values[i].sum1);
maxLaneCount = std::max(maxLaneCount, values[i].laneCount);
}
uint32_t peerOfHelperLanes = 0;
for (uint32_t i = 0; i < appendCount; ++i) {
if (values[i].laneCount != maxLaneCount) {
if (values[i].sum1 != maxActiveLaneCount) {
++peerOfHelperLanes;
}
}
LogCommentFmt(L"%u threads. Found waves of count %u. Found %u lanes that "
L"had helpers in their waves.",
appendCount, maxLaneCount, peerOfHelperLanes);
LogCommentFmt(
L"Found: %u threads. Waves reported up to %u total lanes, up "
L"to %u active lanes, and %u threads had helper/inactive lanes.",
appendCount, maxLaneCount, maxActiveLaneCount, peerOfHelperLanes);
// Group threads into quad invocations.
uint32_t singlePixelCount = 0;
@ -1781,7 +1769,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) {
VERIFY_ARE_EQUAL(d->acrossX, fnToLayoutData(isTop[i], !isLeft[i])->id);
VERIFY_ARE_EQUAL(d->acrossY, fnToLayoutData(!isTop[i], isLeft[i])->id);
VERIFY_ARE_EQUAL(d->acrossDiag, fnToLayoutData(!isTop[i], !isLeft[i])->id);
VERIFY_ARE_EQUAL(d->sum1, count);
VERIFY_ARE_EQUAL(d->quadActiveCount, count);
}
}
}