diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 66c7a215f..3e87dde52 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -688,7 +688,7 @@ public: template const wchar_t* BasicShaderModelTest_GetFormatString(); - CComPtr WaveMatrixTestCommon( + CComPtr WaveMatrixTestCommonSetup( std::vector &dimMs, std::vector &dimNs, std::shared_ptr &shaderOpSet) { WEX::TestExecution::SetVerifyOutput verifySettings( @@ -8004,7 +8004,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { } template -void PlaceholderMultiplyAccumulate(int DIM_M, int DIM_N, int dim_k, +void MatrixMultiplyAndAddMatrix(int DIM_M, int DIM_N, int dim_k, T1 *leftMatrix, T2 *rightMatrix, TYPE_ACC *resultMatrix) { using namespace DirectX::PackedVector; @@ -8047,15 +8047,15 @@ void PlaceholderMultiplyAccumulate(int DIM_M, int DIM_N, int dim_k, } template -void PlaceholderMultiply(int DIM_M, int DIM_N, int k, T1 *leftMatrix, +void MatrixMultiplyByMatrix(int DIM_M, int DIM_N, int k, T1 *leftMatrix, T2 *rightMatrix, TYPE_ACC *resultMatrix) { memset(resultMatrix, 0, DIM_M * DIM_N * sizeof(TYPE_ACC)); - PlaceholderMultiplyAccumulate(DIM_M, DIM_N, k, leftMatrix, + MatrixMultiplyAndAddMatrix(DIM_M, DIM_N, k, leftMatrix, rightMatrix, resultMatrix); } template -void PlaceholderAdd(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) { +void MatrixAddMatrix(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) { using namespace DirectX::PackedVector; for (size_t i = 0; i < (size_t)(DIM_M * DIM_N); ++i) { @@ -8070,7 +8070,7 @@ void PlaceholderAdd(int DIM_M, int DIM_N, T *matrixToAdd, T *resultMatrix) { } template -void PlaceholderBroadcastAddLeftColAcc(int DIM_M, int DIM_N, T *leftCol, +void MatrixAddColumn(int DIM_M, int DIM_N, T *leftCol, T *resultMatrix) { using namespace DirectX::PackedVector; @@ -8090,7 +8090,7 @@ void PlaceholderBroadcastAddLeftColAcc(int DIM_M, int DIM_N, T *leftCol, } template -void PlaceholderBroadcastAddRightRowAcc(int DIM_M, int DIM_N, T *rightRow, +void MatrixAddRow(int DIM_M, int DIM_N, T *rightRow, T *resultMatrix) { using namespace DirectX::PackedVector; @@ -8109,7 +8109,7 @@ void PlaceholderBroadcastAddRightRowAcc(int DIM_M, int DIM_N, T *rightRow, } template -void PlaceholderSumAccumulateLeftColAcc(int DIM_M, int k, TYPE_ACC *leftCol, +void MatrixSumColumns(int DIM_M, int k, TYPE_ACC *leftCol, T *inMatrix) { using namespace DirectX::PackedVector; using T_PROMOTED = typename std::conditional @@ -8137,7 +8137,7 @@ void PlaceholderSumAccumulateLeftColAcc(int DIM_M, int k, TYPE_ACC *leftCol, } template -void PlaceholderSumAccumulateRightRowAcc(int DIM_N, int k, TYPE_ACC *rightRow, +void MatrixSumRows(int DIM_N, int k, TYPE_ACC *rightRow, T *inMatrix) { using namespace DirectX::PackedVector; using T_PROMOTED = typename std::conditional @@ -8165,7 +8165,7 @@ void PlaceholderSumAccumulateRightRowAcc(int DIM_N, int k, TYPE_ACC *rightRow, } template -void PlaceholderScalarMultiplyAccumulator(int DIM_M, int DIM_N, T scalar, +void MatrixMultiplyByScalar(int DIM_M, int DIM_N, T scalar, T *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { for (int j = 0; j < DIM_N; ++j) { @@ -8176,7 +8176,7 @@ void PlaceholderScalarMultiplyAccumulator(int DIM_M, int DIM_N, T scalar, } template<> -void PlaceholderScalarMultiplyAccumulator( +void MatrixMultiplyByScalar( int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { @@ -8190,7 +8190,7 @@ void PlaceholderScalarMultiplyAccumulator( } template -void PlaceholderScalarDivideAccumulator(int DIM_M, int DIM_N, T scalar, +void MatrixDivideByScalar(int DIM_M, int DIM_N, T scalar, T *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { for (int j = 0; j < DIM_N; ++j) { @@ -8201,7 +8201,7 @@ void PlaceholderScalarDivideAccumulator(int DIM_M, int DIM_N, T scalar, } template<> -void PlaceholderScalarDivideAccumulator( +void MatrixDivideByScalar( int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { @@ -8215,7 +8215,7 @@ void PlaceholderScalarDivideAccumulator( } template -void PlaceholderScalarAddAccumulator(int DIM_M, int DIM_N, T scalar, +void MatrixAddScalar(int DIM_M, int DIM_N, T scalar, T *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { for (int j = 0; j < DIM_N; ++j) { @@ -8226,7 +8226,7 @@ void PlaceholderScalarAddAccumulator(int DIM_M, int DIM_N, T scalar, } template<> -void PlaceholderScalarAddAccumulator( +void MatrixAddScalar( int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { @@ -8240,7 +8240,7 @@ void PlaceholderScalarAddAccumulator( } template -void PlaceholderScalarSubtractAccumulator(int DIM_M, int DIM_N, T scalar, +void MatrixSubtractScalar(int DIM_M, int DIM_N, T scalar, T *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { for (int j = 0; j < DIM_N; ++j) { @@ -8251,7 +8251,7 @@ void PlaceholderScalarSubtractAccumulator(int DIM_M, int DIM_N, T scalar, } template<> -void PlaceholderScalarSubtractAccumulator( +void MatrixSubtractScalar( int DIM_M, int DIM_N, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *resultMatrix) { for (int i = 0; i < DIM_M; ++i) { @@ -8265,14 +8265,14 @@ void PlaceholderScalarSubtractAccumulator( } template -void PlaceholderScalarMultiplyRowCol(int DIM, T scalar, T *rowCol) { +void VectorMultiplyByScalar(int DIM, T scalar, T *rowCol) { for (int i = 0; i < DIM; ++i) { rowCol[i] *= scalar; } } template <> -void PlaceholderScalarMultiplyRowCol( +void VectorMultiplyByScalar( int DIM, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *rowCol) { for (int i = 0; i < DIM; ++i) { @@ -8282,14 +8282,14 @@ void PlaceholderScalarMultiplyRowCol( } template -void PlaceholderScalarDivideRowCol(int DIM, T scalar, T *rowCol) { +void VectorDivideByScalar(int DIM, T scalar, T *rowCol) { for (int i = 0; i < DIM; ++i) { rowCol[i] /= scalar; } } template <> -void PlaceholderScalarDivideRowCol( +void VectorDivideByScalar( int DIM, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *rowCol) { for (int i = 0; i < DIM; ++i) { @@ -8299,14 +8299,14 @@ void PlaceholderScalarDivideRowCol( } template -void PlaceholderScalarAddRowCol(int DIM, T scalar, T *rowCol) { +void VectorAddScalar(int DIM, T scalar, T *rowCol) { for (int i = 0; i < DIM; ++i) { rowCol[i] += scalar; } } template <> -void PlaceholderScalarAddRowCol( +void VectorAddScalar( int DIM, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *rowCol) { for (int i = 0; i < DIM; ++i) { @@ -8316,14 +8316,14 @@ void PlaceholderScalarAddRowCol( } template -void PlaceholderScalarSubtractRowCol(int DIM, T scalar, T *rowCol) { +void VectorSubtractScalar(int DIM, T scalar, T *rowCol) { for (int i = 0; i < DIM; ++i) { rowCol[i] -= scalar; } } template <> -void PlaceholderScalarSubtractRowCol( +void VectorSubtractScalar( int DIM, DirectX::PackedVector::HALF scalar, DirectX::PackedVector::HALF *rowCol) { for (int i = 0; i < DIM; ++i) { @@ -8378,6 +8378,8 @@ void ConvertRangeHalfToFloat(float *dst, DirectX::PackedVector::HALF *src, } } +#ifndef NDEBUG +// Fuction to print out a matrix, used for debugging template void PrintMat(T *mat, int rows = 16, int cols = 16) { std::cout << "====================\n"; for (int i = 0; i < rows; ++i) { @@ -8387,7 +8389,10 @@ template void PrintMat(T *mat, int rows = 16, int cols = 16) { else if (typeid(T) == typeid(signed char)) std::cout << (signed)mat[i * cols + j] << ", "; else if (typeid(T) == typeid(DirectX::PackedVector::HALF)) - std::cout << ConvertFloat16ToFloat32(mat[i * cols + j]) << ", "; + std::cout << ConvertFloat16ToFloat32( + static_cast( + mat[i * cols + j])) + << ", "; else std::cout << mat[i * cols + j] << ", "; } @@ -8396,6 +8401,17 @@ template void PrintMat(T *mat, int rows = 16, int cols = 16) { std::cout << "====================\n"; } +// Force instantion to enable calling of PrintMat from debugger +template void PrintMat(float *mat, int rows, int cols); +template void PrintMat(int32_t *mat, int rows, int cols); +template void PrintMat(unsigned char *mat, int rows, + int cols); +template void PrintMat(signed char *mat, int rows, + int cols); +template void +PrintMat(DirectX::PackedVector::HALF *mat, + int rows, int cols); +#endif template void LoadStoreRowCol(int M, int N, bool LEFT, int MEM_TYPE, size_t start, @@ -9170,18 +9186,18 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr pDevice, } // Generate expected outputs - PlaceholderAdd(DIM_M, DIM_N, accumulatorMatrix.data(), + MatrixAddMatrix(DIM_M, DIM_N, accumulatorMatrix.data(), expectedMatrices[ADD_MATRIX].data()); - PlaceholderMultiply(DIM_M, DIM_N, DIM_K, leftMatrix.data(), + MatrixMultiplyByMatrix(DIM_M, DIM_N, DIM_K, leftMatrix.data(), rightMatrix.data(), expectedMatrices[MULTIPLY].data()); - PlaceholderMultiplyAccumulate( + MatrixMultiplyAndAddMatrix( DIM_M, DIM_N, DIM_K, leftMatrix.data(), rightMatrix.data(), expectedMatrices[MULTIPLY_ACCUMULATE].data()); - PlaceholderBroadcastAddLeftColAcc( + MatrixAddColumn( DIM_M, DIM_N, leftCol.data(), expectedMatrices[BROADCAST_ADD_LEFT_COL].data()); - PlaceholderBroadcastAddRightRowAcc( + MatrixAddRow( DIM_M, DIM_N, rightRow.data(), expectedMatrices[BROADCAST_ADD_RIGHT_ROW].data()); @@ -9191,7 +9207,7 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr pDevice, leftCol.size() * sizeof(leftCol[0])); // Sum accumulate the left input matrix onto the left col - PlaceholderSumAccumulateLeftColAcc( + MatrixSumColumns( DIM_M, DIM_K, expectedRowCols[LEFT_COL_SUMACCUMULATE].data(), leftMatrix.data()); @@ -9200,7 +9216,7 @@ void WaveMatrixMathTest(int DIM_M, int DIM_N, CComPtr pDevice, rightRow.size() * sizeof(rightRow[0])); // Sum accumulate the right input matrix onto the right row - PlaceholderSumAccumulateRightRowAcc( + MatrixSumRows( DIM_N, DIM_K, expectedRowCols[RIGHT_ROW_SUMACCUMULATE].data(), rightMatrix.data()); @@ -9356,7 +9372,7 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, WEX::TestExecution::RuntimeParameters::TryGetValue( L"Wmma_DisableFragmentTests", disableFragmentTests); - // Convert scalars to template typ (This is not used in half test). + // Convert scalars to template type (This is not used in half test). std::vector scalars(floatScalars.size()); for (size_t i = 0; i < scalars.size(); ++i) { @@ -9384,15 +9400,14 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, // We store left/right matrices in the same array so we just assume a // maximum size. This size applies to accumulators as well. uint32_t numElements = DIM_M * DIM_N; - std::vector> expectedMatrices( - SCALAR_NUM_OUTPUTS * scalars.size(), - std::vector(numElements, (T)0)); - std::vector> expectedLeftCols( - SCALAR_NUM_OUTPUTS * scalars.size(), std::vector(DIM_M, (T)0)); - std::vector> expectedRightRows( - SCALAR_NUM_OUTPUTS * scalars.size(), std::vector(DIM_N, (T)0)); + std::vector> matrices(SCALAR_NUM_OUTPUTS * scalars.size(), + std::vector(numElements, (T)0)); + std::vector> leftCols(SCALAR_NUM_OUTPUTS * scalars.size(), + std::vector(DIM_M, (T)0)); + std::vector> rightRows(SCALAR_NUM_OUTPUTS * scalars.size(), + std::vector(DIM_N, (T)0)); - // Generate inputs and place into the expected outputs array for now + // Generate inputs for (size_t i = 0; i < scalars.size(); ++i) { for (size_t j = 0; j < SCALAR_NUM_OUTPUTS; ++j) { size_t curr = i * SCALAR_NUM_OUTPUTS + j; @@ -9401,13 +9416,16 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, size_t end = numElements - start; if (typeid(T) == typeid(DirectX::PackedVector::HALF)) { - GenerateMatrix(expectedMatrices[curr].data(), numElements, (float)start, (float)end); - GenerateMatrix(expectedLeftCols[curr].data(), DIM_M, (float)start, (float)end); - GenerateMatrix(expectedRightRows[curr].data(), DIM_N, (float)start, (float)end); + GenerateMatrix(matrices[curr].data(), numElements, (float)start, + (float)end); + GenerateMatrix(leftCols[curr].data(), DIM_M, (float)start, + (float)end); + GenerateMatrix(rightRows[curr].data(), DIM_N, (float)start, + (float)end); } else { - GenerateMatrix(expectedMatrices[curr].data(), numElements, (T)start, (T)end); - GenerateMatrix(expectedLeftCols[curr].data(), DIM_M, (T)start, (T)end); - GenerateMatrix(expectedRightRows[curr].data(), DIM_N, (T)start, (T)end); + GenerateMatrix(matrices[curr].data(), numElements, (T)start, (T)end); + GenerateMatrix(leftCols[curr].data(), DIM_M, (T)start, (T)end); + GenerateMatrix(rightRows[curr].data(), DIM_N, (T)start, (T)end); } } } @@ -9415,18 +9433,26 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, if (typeid(T) == typeid(HALF)) { tolerance = 3; Validation_type = L"ulp"; - expectedMatrices[0][0] = expectedLeftCols[0][0] = expectedRightRows[0][0] = ConvertFloat32ToFloat16(std::numeric_limits::infinity()); - expectedMatrices[1][0] = expectedLeftCols[1][0] = expectedRightRows[1][0] = ConvertFloat32ToFloat16(-std::numeric_limits::infinity()); - expectedMatrices[2][0] = expectedLeftCols[2][0] = expectedRightRows[2][0] = ConvertFloat32ToFloat16(std::numeric_limits::quiet_NaN()); - expectedMatrices[3][0] = expectedLeftCols[3][0] = expectedRightRows[3][0] = ConvertFloat32ToFloat16(-0.0f); - expectedMatrices[4][0] = expectedLeftCols[4][0] = expectedRightRows[4][0] = ConvertFloat32ToFloat16(std::numeric_limits::denorm_min()); - } - else if (typeid(T) == typeid(float)) { - expectedMatrices[0][0] = expectedLeftCols[0][0] = expectedRightRows[0][0] = (T)std::numeric_limits::infinity(); - expectedMatrices[1][0] = expectedLeftCols[1][0] = expectedRightRows[1][0] = (T)-std::numeric_limits::infinity(); - expectedMatrices[2][0] = expectedLeftCols[2][0] = expectedRightRows[2][0] = (T)std::numeric_limits::quiet_NaN(); - expectedMatrices[3][0] = expectedLeftCols[3][0] = expectedRightRows[3][0] = (T)-0.0f; - expectedMatrices[4][0] = expectedLeftCols[4][0] = expectedRightRows[4][0] = std::numeric_limits::denorm_min(); + matrices[0][0] = leftCols[0][0] = rightRows[0][0] = + ConvertFloat32ToFloat16(std::numeric_limits::infinity()); + matrices[1][0] = leftCols[1][0] = rightRows[1][0] = + ConvertFloat32ToFloat16(-std::numeric_limits::infinity()); + matrices[2][0] = leftCols[2][0] = rightRows[2][0] = + ConvertFloat32ToFloat16(std::numeric_limits::quiet_NaN()); + matrices[3][0] = leftCols[3][0] = rightRows[3][0] = + ConvertFloat32ToFloat16(-0.0f); + matrices[4][0] = leftCols[4][0] = rightRows[4][0] = + ConvertFloat32ToFloat16(std::numeric_limits::denorm_min()); + } else if (typeid(T) == typeid(float)) { + matrices[0][0] = leftCols[0][0] = rightRows[0][0] = + (T)std::numeric_limits::infinity(); + matrices[1][0] = leftCols[1][0] = rightRows[1][0] = + (T)-std::numeric_limits::infinity(); + matrices[2][0] = leftCols[2][0] = rightRows[2][0] = + (T)std::numeric_limits::quiet_NaN(); + matrices[3][0] = leftCols[3][0] = rightRows[3][0] = (T)-0.0f; + matrices[4][0] = leftCols[4][0] = rightRows[4][0] = + std::numeric_limits::denorm_min(); } std::shared_ptr test = RunShaderOpTestAfterParse( @@ -9440,82 +9466,76 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, } } else if (0 == _stricmp(Name, "g_bufInAccumulator")) { // Copy input values to buffer - size_t mtxSize = - expectedMatrices[0].size() * sizeof(*expectedMatrices[0].data()); - for (size_t i = 0; i < expectedMatrices.size(); ++i) { - memcpy(Data.data() + mtxSize * i, expectedMatrices[i].data(), - mtxSize); + size_t mtxSize = matrices[0].size() * sizeof(*matrices[0].data()); + for (size_t i = 0; i < matrices.size(); ++i) { + memcpy(Data.data() + mtxSize * i, matrices[i].data(), mtxSize); } - // Process CPU side input values into expected values + // Process CPU side input values in place into expected values for (size_t i = 0; i < scalars.size(); ++i) { - PlaceholderScalarMultiplyAccumulator( + MatrixMultiplyByScalar( DIM_M, DIM_N, scalars[i], - expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); - PlaceholderScalarDivideAccumulator( + matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); + MatrixDivideByScalar( DIM_M, DIM_N, scalars[i], - expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); - PlaceholderScalarAddAccumulator( + matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); + MatrixAddScalar( DIM_M, DIM_N, scalars[i], - expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); - PlaceholderScalarSubtractAccumulator( + matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); + MatrixSubtractScalar( DIM_M, DIM_N, scalars[i], - expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); - FillMatrix( - expectedMatrices[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), - DIM_M * DIM_N, scalars[i]); + matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); + FillMatrix(matrices[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), + DIM_M * DIM_N, scalars[i]); } } else if (0 == _stricmp(Name, "g_bufInLeftColAcc")) { // Copy input values to buffer - size_t lcSize = - expectedLeftCols[0].size() * sizeof(*expectedLeftCols[0].data()); - for (size_t i = 0; i < expectedLeftCols.size(); ++i) { - memcpy(Data.data() + lcSize * i, expectedLeftCols[i].data(), - lcSize); + size_t lcSize = leftCols[0].size() * sizeof(*leftCols[0].data()); + for (size_t i = 0; i < leftCols.size(); ++i) { + memcpy(Data.data() + lcSize * i, leftCols[i].data(), lcSize); } - // Process CPU side input values into expected values + // Process CPU side input values in place into expected values for (size_t i = 0; i < scalars.size(); ++i) { - PlaceholderScalarMultiplyRowCol( + VectorMultiplyByScalar( DIM_M, scalars[i], - expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); - PlaceholderScalarDivideRowCol( + leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); + VectorDivideByScalar( DIM_M, scalars[i], - expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); - PlaceholderScalarAddRowCol( + leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); + VectorAddScalar( DIM_M, scalars[i], - expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); - PlaceholderScalarSubtractRowCol( + leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); + VectorSubtractScalar( DIM_M, scalars[i], - expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); - FillMatrix( - expectedLeftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), - DIM_M, scalars[i]); + leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); + FillMatrix(leftCols[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), + DIM_M, scalars[i]); } } else if (0 == _stricmp(Name, "g_bufInRightRowAcc")) { // Copy input values to buffer - size_t rrSize = expectedRightRows[0].size() * sizeof(*expectedRightRows[0].data()); - for (size_t i = 0; i < expectedRightRows.size(); ++i) { - memcpy(Data.data() + rrSize * i, expectedRightRows[i].data(), rrSize); + size_t rrSize = rightRows[0].size() * sizeof(*rightRows[0].data()); + for (size_t i = 0; i < rightRows.size(); ++i) { + memcpy(Data.data() + rrSize * i, rightRows[i].data(), rrSize); } - // Process CPU side input values into expected values + // Process CPU side input values in place into expected values for (size_t i = 0; i < scalars.size(); ++i) { - PlaceholderScalarMultiplyRowCol( + VectorMultiplyByScalar( DIM_N, scalars[i], - expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); - PlaceholderScalarDivideRowCol( + rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_MUL].data()); + VectorDivideByScalar( DIM_N, scalars[i], - expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); - PlaceholderScalarAddRowCol( + rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_DIV].data()); + VectorAddScalar( DIM_N, scalars[i], - expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); - PlaceholderScalarSubtractRowCol( + rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_ADD].data()); + VectorSubtractScalar( DIM_N, scalars[i], - expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); + rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_SUB].data()); FillMatrix( - expectedRightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), - DIM_N, scalars[i]); + rightRows[i * SCALAR_NUM_OUTPUTS + SCALAR_FILL].data(), DIM_N, + scalars[i]); } } else { std::fill(Data.begin(), Data.end(), (BYTE)0); @@ -9540,18 +9560,17 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, // For verifying that both waves produce the same output T *readBackRightRowData2 = - readBackRightRowData + - expectedRightRows.size() * expectedRightRows[0].size(); + readBackRightRowData + rightRows.size() * rightRows[0].size(); T *readBackLeftColData2 = - readBackLeftColData + - expectedLeftCols.size() * expectedLeftCols[0].size(); + readBackLeftColData + leftCols.size() * leftCols[0].size(); T *readBackMatrixData2 = - readBackMatrixData + expectedMatrices.size() * expectedMatrices[0].size(); + readBackMatrixData + matrices.size() * matrices[0].size(); WEX::TestExecution::DisableVerifyExceptions dve; - for (size_t i = 0; i < expectedMatrices.size(); ++i) { - auto &expectedMatrix = expectedMatrices[i]; - std::string comment = std::string("Matrix/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; + for (size_t i = 0; i < matrices.size(); ++i) { + auto &expectedMatrix = matrices[i]; + std::string comment = + std::string("Matrix/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; WEX::Logging::Log::Comment(CA2W(comment.c_str())); VerifyArrayWithExpectedValue(readBackMatrixData, expectedMatrix.data(), @@ -9568,9 +9587,10 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, } if (disableFragmentTests == 0) { - for (size_t i = 0; i < expectedLeftCols.size(); ++i) { - auto &expectedLeftColAcc = expectedLeftCols[i]; - std::string comment = std::string("LeftCol/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; + for (size_t i = 0; i < leftCols.size(); ++i) { + auto &expectedLeftColAcc = leftCols[i]; + std::string comment = std::string("LeftCol/") + + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; WEX::Logging::Log::Comment(CA2W(comment.c_str())); VerifyArrayWithExpectedValue( @@ -9586,9 +9606,10 @@ void WaveMatrixScalarTest(int DIM_M, int DIM_N, CComPtr pDevice, readBackLeftColData2 += expectedLeftColAcc.size(); } - for (size_t i = 0; i < expectedRightRows.size(); ++i) { - auto &expectedRightRowAcc = expectedRightRows[i]; - std::string comment = std::string("RightRow/") + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; + for (size_t i = 0; i < rightRows.size(); ++i) { + auto &expectedRightRowAcc = rightRows[i]; + std::string comment = std::string("RightRow/") + + scalarEnumStrs[i % SCALAR_NUM_OUTPUTS] + ":"; WEX::Logging::Log::Comment(CA2W(comment.c_str())); VerifyArrayWithExpectedValue( @@ -9628,21 +9649,29 @@ TEST_F(ExecutionTest, WaveMatrixLoadStoreTests) { std::vector dimNs; std::shared_ptr ShaderOpSet; - CComPtr pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); + CComPtr pDevice = WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet); if (pDevice == nullptr) { return; } - - PCWSTR validationType = L"epsilon"; - double tolerance = 0; // 0 tolerance for load store - std::vector memTypes = {BUFFER, GROUPSHARED}; + // Check if the tests are enabled + int disableLoadStoreTests = 0; + WEX::TestExecution::RuntimeParameters::TryGetValue( + L"Wmma_DisableLoadStoreTests", disableLoadStoreTests); + + if (disableLoadStoreTests == 1) { + LogCommentFmt(L"Wave matrix load store tests are disabled, skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; + } // Parse mem types + std::vector memTypes = {BUFFER, GROUPSHARED}; std::wstring split; WEX::Common::String memTypeList; - WEX::TestExecution::RuntimeParameters::TryGetValue(L"Wmma_MemType", memTypeList); + WEX::TestExecution::RuntimeParameters::TryGetValue(L"Wmma_MemType", + memTypeList); if (!memTypeList.IsEmpty()) { memTypeList.ToLower(); memTypes.clear(); @@ -9659,34 +9688,28 @@ TEST_F(ExecutionTest, WaveMatrixLoadStoreTests) { } } - ///////////// - // LOAD STORE - ///////////// + // Run matrix load store tests for supported types + PCWSTR validationType = L"epsilon"; + double tolerance = 0; // 0 tolerance for load store - int disableLoadStoreTests = 0; - WEX::TestExecution::RuntimeParameters::TryGetValue( - L"Wmma_DisableLoadStoreTests", disableLoadStoreTests); - - if (disableLoadStoreTests == 0) { - for (int dimM : dimMs) { - for (int dimN : dimNs) { - for (int memType : memTypes) { - WaveMatrixLoadStoreTest( - dimM, dimN, memType, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixLoadStoreTest( - dimM, dimN, memType, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixLoadStoreTest( - dimM, dimN, memType, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixLoadStoreTest( - dimM, dimN, memType, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixLoadStoreTest( - dimM, dimN, memType, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - } + for (int dimM : dimMs) { + for (int dimN : dimNs) { + for (int memType : memTypes) { + WaveMatrixLoadStoreTest( + dimM, dimN, memType, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixLoadStoreTest( + dimM, dimN, memType, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixLoadStoreTest( + dimM, dimN, memType, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixLoadStoreTest( + dimM, dimN, memType, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixLoadStoreTest( + dimM, dimN, memType, pDevice, ShaderOpSet, m_support, + validationType, tolerance); } } } @@ -9697,56 +9720,58 @@ TEST_F(ExecutionTest, WaveMatrixScalarTests) { using namespace DirectX::PackedVector; std::vector dimMs; - std::vector dimNs; + std::vector dimNs; std::shared_ptr ShaderOpSet; - CComPtr pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); - + CComPtr pDevice = + WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet); + if (pDevice == nullptr) { return; } - PCWSTR validationType = L"epsilon"; - double tolerance = 0.008; - - ////////// - // SCALAR - ////////// - + // Check if the tests are enabled int disableScalarTests = 0; WEX::TestExecution::RuntimeParameters::TryGetValue( L"Wmma_DisableScalarTests", disableScalarTests); - if (disableScalarTests == 0) { - std::vector scalars = { -100.0f, 20.0f, -50.0f, -0.0f, 0.0f, 42.0f }; + if (disableScalarTests == 1) { + LogCommentFmt(L"Wave matrix scalar tests are disabled, skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; + } - for (uint32_t dimM : dimMs) { - for (uint32_t dimN : dimNs) { - std::string hlslType = "float32_t"; - WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, - hlslType, + // Run the matrix scalar tests for supported types + PCWSTR validationType = L"epsilon"; + double tolerance = 0.008; + std::vector scalars = { -100.0f, 20.0f, -50.0f, -0.0f, 0.0f, 42.0f }; + + for (uint32_t dimM : dimMs) { + for (uint32_t dimN : dimNs) { + std::string hlslType = "float32_t"; + WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, + hlslType, + validationType, tolerance, scalars); + + // hlslType is used for the CheckFeatureSupport query. + // Only one of the two below scalar tests will run, depending on the + // accumulator precision returned by CheckFeatureSupport. + hlslType = "float16_t"; + WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, + hlslType, + validationType, tolerance, scalars); + WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, + hlslType, + validationType, tolerance, scalars); + + hlslType = "uint8_t4_packed"; + WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, + m_support, hlslType, validationType, tolerance, scalars); - // hlslType is used for the CheckFeatureSupport query. - // Only one of the two below scalar tests will run, depending on the - // accumulator precision returned by CheckFeatureSupport. - hlslType = "float16_t"; - WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, - hlslType, + hlslType = "int8_t4_packed"; + WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, + m_support, hlslType, validationType, tolerance, scalars); - WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, m_support, - hlslType, - validationType, tolerance, scalars); - - hlslType = "uint8_t4_packed"; - WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, - m_support, hlslType, - validationType, tolerance, scalars); - - hlslType = "int8_t4_packed"; - WaveMatrixScalarTest(dimM, dimN, pDevice, ShaderOpSet, - m_support, hlslType, - validationType, tolerance, scalars); - } } } } @@ -9758,48 +9783,50 @@ TEST_F(ExecutionTest, WaveMatrixMathTests) { std::vector dimMs; std::vector dimNs; std::shared_ptr ShaderOpSet; - CComPtr pDevice = WaveMatrixTestCommon(dimMs, dimNs, ShaderOpSet); + CComPtr pDevice = WaveMatrixTestCommonSetup(dimMs, dimNs, ShaderOpSet); if (pDevice == nullptr) { return; } - PCWSTR validationType = L"epsilon"; - double tolerance = 0.008; - - ////////// - // MATH TEST - ////////// - + // Check if the tests are enabled int disableMathTests = 0; WEX::TestExecution::RuntimeParameters::TryGetValue( L"Wmma_DisableMathTests", disableMathTests); - if (disableMathTests == 0) { - for (uint32_t dimM : dimMs) { - for (uint32_t dimN : dimNs) { - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - WaveMatrixMathTest( - dimM, dimN, pDevice, ShaderOpSet, m_support, - validationType, tolerance); - } + if (disableMathTests == 1) { + LogCommentFmt(L"Wave matrix math tests are disabled, skipping."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; + } + + // Run the matrix math tests for supported types + PCWSTR validationType = L"epsilon"; + double tolerance = 0.008; + + for (uint32_t dimM : dimMs) { + for (uint32_t dimN : dimNs) { + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); + WaveMatrixMathTest( + dimM, dimN, pDevice, ShaderOpSet, m_support, + validationType, tolerance); } } }