- Rename `Placeholder*` functions
- Rename `expectedMatrices`/`expectedLeftCols`/`expectedRightRows`
variables to `matrices`/`leftCols`/`rightRow` when they are used for
both the initial and the expected values
- Mark test as skipped when it is disabled by external TAEF parameter
- hide debugging-only `PrintMat` function under `#ifndef NDEBUG`

Fixes
[#5371](https://github.com/microsoft/DirectXShaderCompiler/issues/5371)
This commit is contained in:
Helena Kotas 2023-09-18 10:35:17 -07:00 коммит произвёл GitHub
Родитель bfa397ac9b
Коммит 3e9e771e15
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
1 изменённых файлов: 252 добавлений и 225 удалений

Просмотреть файл

@ -688,7 +688,7 @@ public:
template <class Ty> template <class Ty>
const wchar_t* BasicShaderModelTest_GetFormatString(); const wchar_t* BasicShaderModelTest_GetFormatString();
CComPtr<ID3D12Device> WaveMatrixTestCommon( CComPtr<ID3D12Device> WaveMatrixTestCommonSetup(
std::vector<int> &dimMs, std::vector<int> &dimNs, std::vector<int> &dimMs, std::vector<int> &dimNs,
std::shared_ptr<st::ShaderOpSet> &shaderOpSet) { std::shared_ptr<st::ShaderOpSet> &shaderOpSet) {
WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::SetVerifyOutput verifySettings(
@ -8004,7 +8004,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) {
} }
template <typename T1, typename T2, typename TYPE_ACC> template <typename T1, typename T2, typename TYPE_ACC>
void PlaceholderMultiplyAccumulate(int DIM_M, int DIM_N, int dim_k, void MatrixMultiplyAndAddMatrix(int DIM_M, int DIM_N, int dim_k,
T1 *leftMatrix, T2 *rightMatrix, T1 *leftMatrix, T2 *rightMatrix,
TYPE_ACC *resultMatrix) { TYPE_ACC *resultMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
@ -8047,15 +8047,15 @@ void PlaceholderMultiplyAccumulate(int DIM_M, int DIM_N, int dim_k,
} }
template <typename T1, typename T2, typename TYPE_ACC> template <typename T1, typename T2, typename TYPE_ACC>
void PlaceholderMultiply(int DIM_M, int DIM_N, int k, T1 *leftMatrix, void MatrixMultiplyByMatrix(int DIM_M, int DIM_N, int k, T1 *leftMatrix,
T2 *rightMatrix, TYPE_ACC *resultMatrix) { T2 *rightMatrix, TYPE_ACC *resultMatrix) {
memset(resultMatrix, 0, DIM_M * DIM_N * sizeof(TYPE_ACC)); memset(resultMatrix, 0, DIM_M * DIM_N * sizeof(TYPE_ACC));
PlaceholderMultiplyAccumulate<T1, T2, TYPE_ACC>(DIM_M, DIM_N, k, leftMatrix, MatrixMultiplyAndAddMatrix<T1, T2, TYPE_ACC>(DIM_M, DIM_N, k, leftMatrix,
rightMatrix, resultMatrix); rightMatrix, resultMatrix);
} }
template <typename T> template <typename T>
void PlaceholderAdd(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) { void MatrixAddMatrix(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
for (size_t i = 0; i < (size_t)(DIM_M * DIM_N); ++i) { for (size_t i = 0; i < (size_t)(DIM_M * DIM_N); ++i) {
@ -8070,7 +8070,7 @@ void PlaceholderAdd(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) {
} }
template <typename T> template <typename T>
void PlaceholderBroadcastAddLeftColAcc(int DIM_M, int DIM_N, T *leftCol, void MatrixAddColumn(int DIM_M, int DIM_N, T *leftCol,
T *resultMatrix) { T *resultMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
@ -8090,7 +8090,7 @@ void PlaceholderBroadcastAddLeftColAcc(int DIM_M, int DIM_N, T *leftCol,
} }
template <typename T> template <typename T>
void PlaceholderBroadcastAddRightRowAcc(int DIM_M, int DIM_N, T *rightRow, void MatrixAddRow(int DIM_M, int DIM_N, T *rightRow,
T *resultMatrix) { T *resultMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
@ -8109,7 +8109,7 @@ void PlaceholderBroadcastAddRightRowAcc(int DIM_M, int DIM_N, T *rightRow,
} }
template <typename TYPE_ACC, typename T> template <typename TYPE_ACC, typename T>
void PlaceholderSumAccumulateLeftColAcc(int DIM_M, int k, TYPE_ACC *leftCol, void MatrixSumColumns(int DIM_M, int k, TYPE_ACC *leftCol,
T *inMatrix) { T *inMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
using T_PROMOTED = typename std::conditional using T_PROMOTED = typename std::conditional
@ -8137,7 +8137,7 @@ void PlaceholderSumAccumulateLeftColAcc(int DIM_M, int k, TYPE_ACC *leftCol,
} }
template <typename TYPE_ACC, typename T> template <typename TYPE_ACC, typename T>
void PlaceholderSumAccumulateRightRowAcc(int DIM_N, int k, TYPE_ACC *rightRow, void MatrixSumRows(int DIM_N, int k, TYPE_ACC *rightRow,
T *inMatrix) { T *inMatrix) {
using namespace DirectX::PackedVector; using namespace DirectX::PackedVector;
using T_PROMOTED = typename std::conditional using T_PROMOTED = typename std::conditional
@ -8165,7 +8165,7 @@ void PlaceholderSumAccumulateRightRowAcc(int DIM_N, int k, TYPE_ACC *rightRow,
} }
template <typename T> template <typename T>
void PlaceholderScalarMultiplyAccumulator(int DIM_M, int DIM_N, T scalar, void MatrixMultiplyByScalar(int DIM_M, int DIM_N, T scalar,
T *resultMatrix) { T *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
for (int j = 0; j < DIM_N; ++j) { for (int j = 0; j < DIM_N; ++j) {
@ -8176,7 +8176,7 @@ void PlaceholderScalarMultiplyAccumulator(int DIM_M, int DIM_N, T scalar,
} }
template<> template<>
void PlaceholderScalarMultiplyAccumulator<DirectX::PackedVector::HALF>( void MatrixMultiplyByScalar<DirectX::PackedVector::HALF>(
int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *resultMatrix) { DirectX::PackedVector::HALF *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
@ -8190,7 +8190,7 @@ void PlaceholderScalarMultiplyAccumulator<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarDivideAccumulator(int DIM_M, int DIM_N, T scalar, void MatrixDivideByScalar(int DIM_M, int DIM_N, T scalar,
T *resultMatrix) { T *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
for (int j = 0; j < DIM_N; ++j) { for (int j = 0; j < DIM_N; ++j) {
@ -8201,7 +8201,7 @@ void PlaceholderScalarDivideAccumulator(int DIM_M, int DIM_N, T scalar,
} }
template<> template<>
void PlaceholderScalarDivideAccumulator<DirectX::PackedVector::HALF>( void MatrixDivideByScalar<DirectX::PackedVector::HALF>(
int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *resultMatrix) { DirectX::PackedVector::HALF *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
@ -8215,7 +8215,7 @@ void PlaceholderScalarDivideAccumulator<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarAddAccumulator(int DIM_M, int DIM_N, T scalar, void MatrixAddScalar(int DIM_M, int DIM_N, T scalar,
T *resultMatrix) { T *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
for (int j = 0; j < DIM_N; ++j) { for (int j = 0; j < DIM_N; ++j) {
@ -8226,7 +8226,7 @@ void PlaceholderScalarAddAccumulator(int DIM_M, int DIM_N, T scalar,
} }
template<> template<>
void PlaceholderScalarAddAccumulator<DirectX::PackedVector::HALF>( void MatrixAddScalar<DirectX::PackedVector::HALF>(
int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *resultMatrix) { DirectX::PackedVector::HALF *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
@ -8240,7 +8240,7 @@ void PlaceholderScalarAddAccumulator<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarSubtractAccumulator(int DIM_M, int DIM_N, T scalar, void MatrixSubtractScalar(int DIM_M, int DIM_N, T scalar,
T *resultMatrix) { T *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
for (int j = 0; j < DIM_N; ++j) { for (int j = 0; j < DIM_N; ++j) {
@ -8251,7 +8251,7 @@ void PlaceholderScalarSubtractAccumulator(int DIM_M, int DIM_N, T scalar,
} }
template<> template<>
void PlaceholderScalarSubtractAccumulator<DirectX::PackedVector::HALF>( void MatrixSubtractScalar<DirectX::PackedVector::HALF>(
int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *resultMatrix) { DirectX::PackedVector::HALF *resultMatrix) {
for (int i = 0; i < DIM_M; ++i) { for (int i = 0; i < DIM_M; ++i) {
@ -8265,14 +8265,14 @@ void PlaceholderScalarSubtractAccumulator<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarMultiplyRowCol(int DIM, T scalar, T *rowCol) { void VectorMultiplyByScalar(int DIM, T scalar, T *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
rowCol[i] *= scalar; rowCol[i] *= scalar;
} }
} }
template <> template <>
void PlaceholderScalarMultiplyRowCol<DirectX::PackedVector::HALF>( void VectorMultiplyByScalar<DirectX::PackedVector::HALF>(
int DIM, DirectX::PackedVector::HALF scalar, int DIM, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *rowCol) { DirectX::PackedVector::HALF *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
@ -8282,14 +8282,14 @@ void PlaceholderScalarMultiplyRowCol<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarDivideRowCol(int DIM, T scalar, T *rowCol) { void VectorDivideByScalar(int DIM, T scalar, T *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
rowCol[i] /= scalar; rowCol[i] /= scalar;
} }
} }
template <> template <>
void PlaceholderScalarDivideRowCol<DirectX::PackedVector::HALF>( void VectorDivideByScalar<DirectX::PackedVector::HALF>(
int DIM, DirectX::PackedVector::HALF scalar, int DIM, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *rowCol) { DirectX::PackedVector::HALF *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
@ -8299,14 +8299,14 @@ void PlaceholderScalarDivideRowCol<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarAddRowCol(int DIM, T scalar, T *rowCol) { void VectorAddScalar(int DIM, T scalar, T *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
rowCol[i] += scalar; rowCol[i] += scalar;
} }
} }
template <> template <>
void PlaceholderScalarAddRowCol<DirectX::PackedVector::HALF>( void VectorAddScalar<DirectX::PackedVector::HALF>(
int DIM, DirectX::PackedVector::HALF scalar, int DIM, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *rowCol) { DirectX::PackedVector::HALF *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
@ -8316,14 +8316,14 @@ void PlaceholderScalarAddRowCol<DirectX::PackedVector::HALF>(
} }
template <typename T> template <typename T>
void PlaceholderScalarSubtractRowCol(int DIM, T scalar, T *rowCol) { void VectorSubtractScalar(int DIM, T scalar, T *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
rowCol[i] -= scalar; rowCol[i] -= scalar;
} }
} }
template <> template <>
void PlaceholderScalarSubtractRowCol<DirectX::PackedVector::HALF>( void VectorSubtractScalar<DirectX::PackedVector::HALF>(
int DIM, DirectX::PackedVector::HALF scalar, int DIM, DirectX::PackedVector::HALF scalar,
DirectX::PackedVector::HALF *rowCol) { DirectX::PackedVector::HALF *rowCol) {
for (int i = 0; i < DIM; ++i) { for (int i = 0; i < DIM; ++i) {
@ -8378,6 +8378,8 @@ void ConvertRangeHalfToFloat(float *dst, DirectX::PackedVector::HALF *src,
} }
} }
#ifndef NDEBUG
// Fuction to print out a matrix, used for debugging
template <typename T> void PrintMat(T *mat, int rows = 16, int cols = 16) { template <typename T> void PrintMat(T *mat, int rows = 16, int cols = 16) {
std::cout << "====================\n"; std::cout << "====================\n";
for (int i = 0; i < rows; ++i) { for (int i = 0; i < rows; ++i) {
@ -8387,7 +8389,10 @@ template <typename T> void PrintMat(T *mat, int rows = 16, int cols = 16) {
else if (typeid(T) == typeid(signed char)) else if (typeid(T) == typeid(signed char))
std::cout << (signed)mat[i * cols + j] << ", "; std::cout << (signed)mat[i * cols + j] << ", ";
else if (typeid(T) == typeid(DirectX::PackedVector::HALF)) else if (typeid(T) == typeid(DirectX::PackedVector::HALF))
std::cout << ConvertFloat16ToFloat32(mat[i * cols + j]) << ", "; std::cout << ConvertFloat16ToFloat32(
static_cast<DirectX::PackedVector::HALF>(
mat[i * cols + j]))
<< ", ";
else else
std::cout << mat[i * cols + j] << ", "; std::cout << mat[i * cols + j] << ", ";
} }
@ -8396,6 +8401,17 @@ template <typename T> void PrintMat(T *mat, int rows = 16, int cols = 16) {
std::cout << "====================\n"; std::cout << "====================\n";
} }
// Force instantion to enable calling of PrintMat from debugger
template void PrintMat<float>(float *mat, int rows, int cols);
template void PrintMat<int32_t>(int32_t *mat, int rows, int cols);
template void PrintMat<unsigned char>(unsigned char *mat, int rows,
int cols);
template void PrintMat<signed char>(signed char *mat, int rows,
int cols);
template void
PrintMat<DirectX::PackedVector::HALF>(DirectX::PackedVector::HALF *mat,
int rows, int cols);
#endif
template <typename T> template <typename T>
void LoadStoreRowCol(int M, int N, bool LEFT, int MEM_TYPE, size_t start, void LoadStoreRowCol(int M, int N, bool LEFT, int MEM_TYPE, size_t start,
@ -9170,18 +9186,18 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
} }
// Generate expected outputs // Generate expected outputs
PlaceholderAdd<TYPE_ACC>(DIM_M, DIM_N, accumulatorMatrix.data(), MatrixAddMatrix<TYPE_ACC>(DIM_M, DIM_N, accumulatorMatrix.data(),
expectedMatrices[ADD_MATRIX].data()); expectedMatrices[ADD_MATRIX].data());
PlaceholderMultiply<T, T2, TYPE_ACC>(DIM_M, DIM_N, DIM_K, leftMatrix.data(), MatrixMultiplyByMatrix<T, T2, TYPE_ACC>(DIM_M, DIM_N, DIM_K, leftMatrix.data(),
rightMatrix.data(), rightMatrix.data(),
expectedMatrices[MULTIPLY].data()); expectedMatrices[MULTIPLY].data());
PlaceholderMultiplyAccumulate<T, T2, TYPE_ACC>( MatrixMultiplyAndAddMatrix<T, T2, TYPE_ACC>(
DIM_M, DIM_N, DIM_K, leftMatrix.data(), rightMatrix.data(), DIM_M, DIM_N, DIM_K, leftMatrix.data(), rightMatrix.data(),
expectedMatrices[MULTIPLY_ACCUMULATE].data()); expectedMatrices[MULTIPLY_ACCUMULATE].data());
PlaceholderBroadcastAddLeftColAcc<TYPE_ACC>( MatrixAddColumn<TYPE_ACC>(
DIM_M, DIM_N, leftCol.data(), DIM_M, DIM_N, leftCol.data(),
expectedMatrices[BROADCAST_ADD_LEFT_COL].data()); expectedMatrices[BROADCAST_ADD_LEFT_COL].data());
PlaceholderBroadcastAddRightRowAcc<TYPE_ACC>( MatrixAddRow<TYPE_ACC>(
DIM_M, DIM_N, rightRow.data(), DIM_M, DIM_N, rightRow.data(),
expectedMatrices[BROADCAST_ADD_RIGHT_ROW].data()); expectedMatrices[BROADCAST_ADD_RIGHT_ROW].data());
@ -9191,7 +9207,7 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
leftCol.size() * sizeof(leftCol[0])); leftCol.size() * sizeof(leftCol[0]));
// Sum accumulate the left input matrix onto the left col // Sum accumulate the left input matrix onto the left col
PlaceholderSumAccumulateLeftColAcc<TYPE_ACC, T>( MatrixSumColumns<TYPE_ACC, T>(
DIM_M, DIM_K, expectedRowCols[LEFT_COL_SUMACCUMULATE].data(), DIM_M, DIM_K, expectedRowCols[LEFT_COL_SUMACCUMULATE].data(),
leftMatrix.data()); leftMatrix.data());
@ -9200,7 +9216,7 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
rightRow.size() * sizeof(rightRow[0])); rightRow.size() * sizeof(rightRow[0]));
// Sum accumulate the right input matrix onto the right row // Sum accumulate the right input matrix onto the right row
PlaceholderSumAccumulateRightRowAcc<TYPE_ACC, T2>( MatrixSumRows<TYPE_ACC, T2>(
DIM_N, DIM_K, expectedRowCols[RIGHT_ROW_SUMACCUMULATE].data(), DIM_N, DIM_K, expectedRowCols[RIGHT_ROW_SUMACCUMULATE].data(),
rightMatrix.data()); rightMatrix.data());
@ -9356,7 +9372,7 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
WEX::TestExecution::RuntimeParameters::TryGetValue<int>( WEX::TestExecution::RuntimeParameters::TryGetValue<int>(
L"Wmma_DisableFragmentTests", disableFragmentTests); L"Wmma_DisableFragmentTests", disableFragmentTests);
// Convert scalars to template typ (This is not used in half test). // Convert scalars to template type (This is not used in half test).
std::vector<T> scalars(floatScalars.size()); std::vector<T> scalars(floatScalars.size());
for (size_t i = 0; i < scalars.size(); ++i) { for (size_t i = 0; i < scalars.size(); ++i) {
@ -9384,15 +9400,14 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
// We store left/right matrices in the same array so we just assume a // We store left/right matrices in the same array so we just assume a
// maximum size. This size applies to accumulators as well. // maximum size. This size applies to accumulators as well.
uint32_t numElements = DIM_M * DIM_N; uint32_t numElements = DIM_M * DIM_N;
std::vector<std::vector<T>> expectedMatrices( std::vector<std::vector<T>> matrices(SCALAR_NUM_OUTPUTS * scalars.size(),
SCALAR_NUM_OUTPUTS * scalars.size(),
std::vector<T>(numElements, (T)0)); std::vector<T>(numElements, (T)0));
std::vector<std::vector<T>> expectedLeftCols( std::vector<std::vector<T>> leftCols(SCALAR_NUM_OUTPUTS * scalars.size(),
SCALAR_NUM_OUTPUTS * scalars.size(), std::vector<T>(DIM_M, (T)0)); std::vector<T>(DIM_M, (T)0));
std::vector<std::vector<T>> expectedRightRows( std::vector<std::vector<T>> rightRows(SCALAR_NUM_OUTPUTS * scalars.size(),
SCALAR_NUM_OUTPUTS * scalars.size(), std::vector<T>(DIM_N, (T)0)); std::vector<T>(DIM_N, (T)0));
// Generate inputs and place into the expected outputs array for now // Generate inputs
for (size_t i = 0; i < scalars.size(); ++i) { for (size_t i = 0; i < scalars.size(); ++i) {
for (size_t j = 0; j < SCALAR_NUM_OUTPUTS; ++j) { for (size_t j = 0; j < SCALAR_NUM_OUTPUTS; ++j) {
size_t curr = i * SCALAR_NUM_OUTPUTS + j; size_t curr = i * SCALAR_NUM_OUTPUTS + j;
@ -9401,13 +9416,16 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
size_t end = numElements - start; size_t end = numElements - start;
if (typeid(T) == typeid(DirectX::PackedVector::HALF)) { if (typeid(T) == typeid(DirectX::PackedVector::HALF)) {
GenerateMatrix<T>(expectedMatrices[curr].data(), numElements, (float)start, (float)end); GenerateMatrix<T>(matrices[curr].data(), numElements, (float)start,
GenerateMatrix<T>(expectedLeftCols[curr].data(), DIM_M, (float)start, (float)end); (float)end);
GenerateMatrix<T>(expectedRightRows[curr].data(), DIM_N, (float)start, (float)end); GenerateMatrix<T>(leftCols[curr].data(), DIM_M, (float)start,
(float)end);
GenerateMatrix<T>(rightRows[curr].data(), DIM_N, (float)start,
(float)end);
} else { } else {
GenerateMatrix<T>(expectedMatrices[curr].data(), numElements, (T)start, (T)end); GenerateMatrix<T>(matrices[curr].data(), numElements, (T)start, (T)end);
GenerateMatrix<T>(expectedLeftCols[curr].data(), DIM_M, (T)start, (T)end); GenerateMatrix<T>(leftCols[curr].data(), DIM_M, (T)start, (T)end);
GenerateMatrix<T>(expectedRightRows[curr].data(), DIM_N, (T)start, (T)end); GenerateMatrix<T>(rightRows[curr].data(), DIM_N, (T)start, (T)end);
} }
} }
} }
@ -9415,18 +9433,26 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
if (typeid(T) == typeid(HALF)) { if (typeid(T) == typeid(HALF)) {
tolerance = 3; tolerance = 3;
Validation_type = L"ulp"; Validation_type = L"ulp";
expectedMatrices[0][0] = expectedLeftCols[0][0] = expectedRightRows[0][0] = ConvertFloat32ToFloat16(std::numeric_limits<float>::infinity()); matrices[0][0] = leftCols[0][0] = rightRows[0][0] =
expectedMatrices[1][0] = expectedLeftCols[1][0] = expectedRightRows[1][0] = ConvertFloat32ToFloat16(-std::numeric_limits<float>::infinity()); ConvertFloat32ToFloat16(std::numeric_limits<float>::infinity());
expectedMatrices[2][0] = expectedLeftCols[2][0] = expectedRightRows[2][0] = ConvertFloat32ToFloat16(std::numeric_limits<float>::quiet_NaN()); matrices[1][0] = leftCols[1][0] = rightRows[1][0] =
expectedMatrices[3][0] = expectedLeftCols[3][0] = expectedRightRows[3][0] = ConvertFloat32ToFloat16(-0.0f); ConvertFloat32ToFloat16(-std::numeric_limits<float>::infinity());
expectedMatrices[4][0] = expectedLeftCols[4][0] = expectedRightRows[4][0] = ConvertFloat32ToFloat16(std::numeric_limits<float>::denorm_min()); matrices[2][0] = leftCols[2][0] = rightRows[2][0] =
} ConvertFloat32ToFloat16(std::numeric_limits<float>::quiet_NaN());
else if (typeid(T) == typeid(float)) { matrices[3][0] = leftCols[3][0] = rightRows[3][0] =
expectedMatrices[0][0] = expectedLeftCols[0][0] = expectedRightRows[0][0] = (T)std::numeric_limits<float>::infinity(); ConvertFloat32ToFloat16(-0.0f);
expectedMatrices[1][0] = expectedLeftCols[1][0] = expectedRightRows[1][0] = (T)-std::numeric_limits<float>::infinity(); matrices[4][0] = leftCols[4][0] = rightRows[4][0] =
expectedMatrices[2][0] = expectedLeftCols[2][0] = expectedRightRows[2][0] = (T)std::numeric_limits<float>::quiet_NaN(); ConvertFloat32ToFloat16(std::numeric_limits<float>::denorm_min());
expectedMatrices[3][0] = expectedLeftCols[3][0] = expectedRightRows[3][0] = (T)-0.0f; } else if (typeid(T) == typeid(float)) {
expectedMatrices[4][0] = expectedLeftCols[4][0] = expectedRightRows[4][0] = std::numeric_limits<T>::denorm_min(); matrices[0][0] = leftCols[0][0] = rightRows[0][0] =
(T)std::numeric_limits<float>::infinity();
matrices[1][0] = leftCols[1][0] = rightRows[1][0] =
(T)-std::numeric_limits<float>::infinity();
matrices[2][0] = leftCols[2][0] = rightRows[2][0] =
(T)std::numeric_limits<float>::quiet_NaN();
matrices[3][0] = leftCols[3][0] = rightRows[3][0] = (T)-0.0f;
matrices[4][0] = leftCols[4][0] = rightRows[4][0] =
std::numeric_limits<T>::denorm_min();
} }
std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse( std::shared_ptr<ShaderOpTestResult> test = RunShaderOpTestAfterParse(
@ -9440,82 +9466,76 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
} }
} else if (0 == _stricmp(Name, "g_bufInAccumulator")) { } else if (0 == _stricmp(Name, "g_bufInAccumulator")) {
// Copy input values to buffer // Copy input values to buffer
size_t mtxSize = size_t mtxSize = matrices[0].size() * sizeof(*matrices[0].data());
expectedMatrices[0].size() * sizeof(*expectedMatrices[0].data()); for (size_t i = 0; i < matrices.size(); ++i) {
for (size_t i = 0; i < expectedMatrices.size(); ++i) { memcpy(Data.data() + mtxSize * i, matrices[i].data(), mtxSize);
memcpy(Data.data() + mtxSize * i, expectedMatrices[i].data(),
mtxSize);
} }
// Process CPU side input values into expected values // Process CPU side input values in place into expected values
for (size_t i = 0; i < scalars.size(); ++i) { for (size_t i = 0; i < scalars.size(); ++i) {
PlaceholderScalarMultiplyAccumulator<T>( MatrixMultiplyByScalar<T>(
DIM_M, DIM_N, scalars[i], DIM_M, DIM_N, scalars[i],
expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data());
PlaceholderScalarDivideAccumulator<T>( MatrixDivideByScalar<T>(
DIM_M, DIM_N, scalars[i], DIM_M, DIM_N, scalars[i],
expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data());
PlaceholderScalarAddAccumulator<T>( MatrixAddScalar<T>(
DIM_M, DIM_N, scalars[i], DIM_M, DIM_N, scalars[i],
expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data());
PlaceholderScalarSubtractAccumulator<T>( MatrixSubtractScalar<T>(
DIM_M, DIM_N, scalars[i], DIM_M, DIM_N, scalars[i],
expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data());
FillMatrix<T>( FillMatrix<T>(matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(),
expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(),
DIM_M * DIM_N, scalars[i]); DIM_M * DIM_N, scalars[i]);
} }
} else if (0 == _stricmp(Name, "g_bufInLeftColAcc")) { } else if (0 == _stricmp(Name, "g_bufInLeftColAcc")) {
// Copy input values to buffer // Copy input values to buffer
size_t lcSize = size_t lcSize = leftCols[0].size() * sizeof(*leftCols[0].data());
expectedLeftCols[0].size() * sizeof(*expectedLeftCols[0].data()); for (size_t i = 0; i < leftCols.size(); ++i) {
for (size_t i = 0; i < expectedLeftCols.size(); ++i) { memcpy(Data.data() + lcSize * i, leftCols[i].data(), lcSize);
memcpy(Data.data() + lcSize * i, expectedLeftCols[i].data(),
lcSize);
} }
// Process CPU side input values into expected values // Process CPU side input values in place into expected values
for (size_t i = 0; i < scalars.size(); ++i) { for (size_t i = 0; i < scalars.size(); ++i) {
PlaceholderScalarMultiplyRowCol<T>( VectorMultiplyByScalar<T>(
DIM_M, scalars[i], DIM_M, scalars[i],
expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data());
PlaceholderScalarDivideRowCol<T>( VectorDivideByScalar<T>(
DIM_M, scalars[i], DIM_M, scalars[i],
expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data());
PlaceholderScalarAddRowCol<T>( VectorAddScalar<T>(
DIM_M, scalars[i], DIM_M, scalars[i],
expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data());
PlaceholderScalarSubtractRowCol<T>( VectorSubtractScalar<T>(
DIM_M, scalars[i], DIM_M, scalars[i],
expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data());
FillMatrix<T>( FillMatrix<T>(leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(),
expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(),
DIM_M, scalars[i]); DIM_M, scalars[i]);
} }
} else if (0 == _stricmp(Name, "g_bufInRightRowAcc")) { } else if (0 == _stricmp(Name, "g_bufInRightRowAcc")) {
// Copy input values to buffer // Copy input values to buffer
size_t rrSize = expectedRightRows[0].size() * sizeof(*expectedRightRows[0].data()); size_t rrSize = rightRows[0].size() * sizeof(*rightRows[0].data());
for (size_t i = 0; i < expectedRightRows.size(); ++i) { for (size_t i = 0; i < rightRows.size(); ++i) {
memcpy(Data.data() + rrSize * i, expectedRightRows[i].data(), rrSize); memcpy(Data.data() + rrSize * i, rightRows[i].data(), rrSize);
} }
// Process CPU side input values into expected values // Process CPU side input values in place into expected values
for (size_t i = 0; i < scalars.size(); ++i) { for (size_t i = 0; i < scalars.size(); ++i) {
PlaceholderScalarMultiplyRowCol<T>( VectorMultiplyByScalar<T>(
DIM_N, scalars[i], DIM_N, scalars[i],
expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data());
PlaceholderScalarDivideRowCol<T>( VectorDivideByScalar<T>(
DIM_N, scalars[i], DIM_N, scalars[i],
expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data());
PlaceholderScalarAddRowCol<T>( VectorAddScalar<T>(
DIM_N, scalars[i], DIM_N, scalars[i],
expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data());
PlaceholderScalarSubtractRowCol<T>( VectorSubtractScalar<T>(
DIM_N, scalars[i], DIM_N, scalars[i],
expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data());
FillMatrix<T>( FillMatrix<T>(
expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), DIM_N,
DIM_N, scalars[i]); scalars[i]);
} }
} else { } else {
std::fill(Data.begin(), Data.end(), (BYTE)0); std::fill(Data.begin(), Data.end(), (BYTE)0);
@ -9540,18 +9560,17 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
// For verifying that both waves produce the same output // For verifying that both waves produce the same output
T *readBackRightRowData2 = T *readBackRightRowData2 =
readBackRightRowData + readBackRightRowData + rightRows.size() * rightRows[0].size();
expectedRightRows.size() * expectedRightRows[0].size();
T *readBackLeftColData2 = T *readBackLeftColData2 =
readBackLeftColData + readBackLeftColData + leftCols.size() * leftCols[0].size();
expectedLeftCols.size() * expectedLeftCols[0].size();
T *readBackMatrixData2 = T *readBackMatrixData2 =
readBackMatrixData + expectedMatrices.size() * expectedMatrices[0].size(); readBackMatrixData + matrices.size() * matrices[0].size();
WEX::TestExecution::DisableVerifyExceptions dve; WEX::TestExecution::DisableVerifyExceptions dve;
for (size_t i = 0; i < expectedMatrices.size(); ++i) { for (size_t i = 0; i < matrices.size(); ++i) {
auto &expectedMatrix = expectedMatrices[i]; auto &expectedMatrix = matrices[i];
std::string comment = std::string("Matrix/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; std::string comment =
std::string("Matrix/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":";
WEX::Logging::Log::Comment(CA2W(comment.c_str())); WEX::Logging::Log::Comment(CA2W(comment.c_str()));
VerifyArrayWithExpectedValue(readBackMatrixData, expectedMatrix.data(), VerifyArrayWithExpectedValue(readBackMatrixData, expectedMatrix.data(),
@ -9568,9 +9587,10 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
} }
if (disableFragmentTests == 0) { if (disableFragmentTests == 0) {
for (size_t i = 0; i < expectedLeftCols.size(); ++i) { for (size_t i = 0; i < leftCols.size(); ++i) {
auto &expectedLeftColAcc = expectedLeftCols[i]; auto &expectedLeftColAcc = leftCols[i];
std::string comment = std::string("LeftCol/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; std::string comment = std::string("LeftCol/") +
scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":";
WEX::Logging::Log::Comment(CA2W(comment.c_str())); WEX::Logging::Log::Comment(CA2W(comment.c_str()));
VerifyArrayWithExpectedValue( VerifyArrayWithExpectedValue(
@ -9586,9 +9606,10 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr<ID3D12Device> pDevice,
readBackLeftColData2 += expectedLeftColAcc.size(); readBackLeftColData2 += expectedLeftColAcc.size();
} }
for (size_t i = 0; i < expectedRightRows.size(); ++i) { for (size_t i = 0; i < rightRows.size(); ++i) {
auto &expectedRightRowAcc = expectedRightRows[i]; auto &expectedRightRowAcc = rightRows[i];
std::string comment = std::string("RightRow/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; std::string comment = std::string("RightRow/") +
scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":";
WEX::Logging::Log::Comment(CA2W(comment.c_str())); WEX::Logging::Log::Comment(CA2W(comment.c_str()));
VerifyArrayWithExpectedValue( VerifyArrayWithExpectedValue(
@ -9628,21 +9649,29 @@ TEST_F(ExecutionTest, WaveMatrixLoadStoreTests) {
std::vector<int> dimNs; std::vector<int> dimNs;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet; std::shared_ptr<st::ShaderOpSet> ShaderOpSet;
CComPtr<ID3D12Device> pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); CComPtr<ID3D12Device> pDevice = WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet);
if (pDevice == nullptr) { if (pDevice == nullptr) {
return; return;
} }
PCWSTR validationType = L"epsilon"; // Check if the tests are enabled
double tolerance = 0; // 0 tolerance for load store int disableLoadStoreTests = 0;
WEX::TestExecution::RuntimeParameters::TryGetValue<int>(
L"Wmma_DisableLoadStoreTests", disableLoadStoreTests);
std::vector<int> memTypes = {BUFFER, GROUPSHARED}; if (disableLoadStoreTests == 1) {
LogCommentFmt(L"Wave matrix load store tests are disabled, skipping.");
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
return;
}
// Parse mem types // Parse mem types
std::vector<int> memTypes = {BUFFER, GROUPSHARED};
std::wstring split; std::wstring split;
WEX::Common::String memTypeList; WEX::Common::String memTypeList;
WEX::TestExecution::RuntimeParameters::TryGetValue(L"Wmma_MemType", memTypeList); WEX::TestExecution::RuntimeParameters::TryGetValue(L"Wmma_MemType",
memTypeList);
if (!memTypeList.IsEmpty()) { if (!memTypeList.IsEmpty()) {
memTypeList.ToLower(); memTypeList.ToLower();
memTypes.clear(); memTypes.clear();
@ -9659,15 +9688,10 @@ TEST_F(ExecutionTest, WaveMatrixLoadStoreTests) {
} }
} }
///////////// // Run matrix load store tests for supported types
// LOAD STORE PCWSTR validationType = L"epsilon";
///////////// double tolerance = 0; // 0 tolerance for load store
int disableLoadStoreTests = 0;
WEX::TestExecution::RuntimeParameters::TryGetValue<int>(
L"Wmma_DisableLoadStoreTests", disableLoadStoreTests);
if (disableLoadStoreTests == 0) {
for (int dimM : dimMs) { for (int dimM : dimMs) {
for (int dimN : dimNs) { for (int dimN : dimNs) {
for (int memType : memTypes) { for (int memType : memTypes) {
@ -9690,7 +9714,6 @@ TEST_F(ExecutionTest, WaveMatrixLoadStoreTests) {
} }
} }
} }
}
TEST_F(ExecutionTest, WaveMatrixScalarTests) { TEST_F(ExecutionTest, WaveMatrixScalarTests) {
using namespace WMMA; using namespace WMMA;
@ -9699,24 +9722,27 @@ TEST_F(ExecutionTest, WaveMatrixScalarTests) {
std::vector<int> dimMs; std::vector<int> dimMs;
std::vector<int> dimNs; std::vector<int> dimNs;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet; std::shared_ptr<st::ShaderOpSet> ShaderOpSet;
CComPtr<ID3D12Device> pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); CComPtr<ID3D12Device> pDevice =
WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet);
if (pDevice == nullptr) { if (pDevice == nullptr) {
return; return;
} }
PCWSTR validationType = L"epsilon"; // Check if the tests are enabled
double tolerance = 0.008;
//////////
// SCALAR
//////////
int disableScalarTests = 0; int disableScalarTests = 0;
WEX::TestExecution::RuntimeParameters::TryGetValue<int>( WEX::TestExecution::RuntimeParameters::TryGetValue<int>(
L"Wmma_DisableScalarTests", disableScalarTests); L"Wmma_DisableScalarTests", disableScalarTests);
if (disableScalarTests == 0) { if (disableScalarTests == 1) {
LogCommentFmt(L"Wave matrix scalar tests are disabled, skipping.");
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
return;
}
// Run the matrix scalar tests for supported types
PCWSTR validationType = L"epsilon";
double tolerance = 0.008;
std::vector<float> scalars = { -100.0f, 20.0f, -50.0f, -0.0f, 0.0f, 42.0f }; std::vector<float> scalars = { -100.0f, 20.0f, -50.0f, -0.0f, 0.0f, 42.0f };
for (uint32_t dimM : dimMs) { for (uint32_t dimM : dimMs) {
@ -9749,7 +9775,6 @@ TEST_F(ExecutionTest, WaveMatrixScalarTests) {
} }
} }
} }
}
TEST_F(ExecutionTest, WaveMatrixMathTests) { TEST_F(ExecutionTest, WaveMatrixMathTests) {
using namespace WMMA; using namespace WMMA;
@ -9758,24 +9783,27 @@ TEST_F(ExecutionTest, WaveMatrixMathTests) {
std::vector<int> dimMs; std::vector<int> dimMs;
std::vector<int> dimNs; std::vector<int> dimNs;
std::shared_ptr<st::ShaderOpSet> ShaderOpSet; std::shared_ptr<st::ShaderOpSet> ShaderOpSet;
CComPtr<ID3D12Device> pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); CComPtr<ID3D12Device> pDevice = WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet);
if (pDevice == nullptr) { if (pDevice == nullptr) {
return; return;
} }
PCWSTR validationType = L"epsilon"; // Check if the tests are enabled
double tolerance = 0.008;
//////////
// MATH TEST
//////////
int disableMathTests = 0; int disableMathTests = 0;
WEX::TestExecution::RuntimeParameters::TryGetValue<int>( WEX::TestExecution::RuntimeParameters::TryGetValue<int>(
L"Wmma_DisableMathTests", disableMathTests); L"Wmma_DisableMathTests", disableMathTests);
if (disableMathTests == 0) { if (disableMathTests == 1) {
LogCommentFmt(L"Wave matrix math tests are disabled, skipping.");
WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped);
return;
}
// Run the matrix math tests for supported types
PCWSTR validationType = L"epsilon";
double tolerance = 0.008;
for (uint32_t dimM : dimMs) { for (uint32_t dimM : dimMs) {
for (uint32_t dimN : dimNs) { for (uint32_t dimN : dimNs) {
WaveMatrixMathTest<float, float, float>( WaveMatrixMathTest<float, float, float>(
@ -9802,7 +9830,6 @@ TEST_F(ExecutionTest, WaveMatrixMathTests) {
} }
} }
} }
}
TEST_F(ExecutionTest, DotTest) { TEST_F(ExecutionTest, DotTest) {
WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::SetVerifyOutput verifySettings(