added tensor test(s) to MathPerformanceTests
This commit is contained in:
Родитель
a67273394e
Коммит
a172c89111
|
@ -108,13 +108,12 @@ __device__ __forceinline__ void StoreValues<4, float>(const float src[4], float*
|
||||||
template <typename T>
|
template <typename T>
|
||||||
__device__ __forceinline__ T Shuffle(T input, int srcLane)
|
__device__ __forceinline__ T Shuffle(T input, int srcLane)
|
||||||
{
|
{
|
||||||
#ifdef __CUDA_ARCH__
|
#if __CUDA_ARCH__ >= 300
|
||||||
// shfl is supported only on Kepler+
|
// shfl is supported only on Kepler+
|
||||||
static_assert(__CUDA_ARCH__ >= 300, "CNTK only supports only Kepler GPU architecture or newer");
|
|
||||||
return cub::ShuffleIndex(input, srcLane);
|
return cub::ShuffleIndex(input, srcLane);
|
||||||
#else
|
#else
|
||||||
assert(false);
|
assert(false);
|
||||||
return input;
|
return input; // keep compiler happy
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,7 +52,7 @@
|
||||||
<PropertyGroup Label="UserMacros" />
|
<PropertyGroup Label="UserMacros" />
|
||||||
<PropertyGroup Condition="$(DebugBuild)">
|
<PropertyGroup Condition="$(DebugBuild)">
|
||||||
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
|
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
|
||||||
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_30,sm_30</CudaCodeGen>
|
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_20,sm_20;compute_30,sm_30</CudaCodeGen>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="$(ReleaseBuild)">
|
<PropertyGroup Condition="$(ReleaseBuild)">
|
||||||
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>
|
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>
|
||||||
|
|
|
@ -14,6 +14,8 @@
|
||||||
#pragma warning(push)
|
#pragma warning(push)
|
||||||
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
|
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
|
||||||
|
|
||||||
|
template<class ElemType> struct TensorTest;
|
||||||
|
|
||||||
// This class is exported from the Math.dll.
|
// This class is exported from the Math.dll.
|
||||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||||
|
|
||||||
|
@ -149,6 +151,7 @@ private:
|
||||||
|
|
||||||
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
|
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
|
||||||
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
||||||
|
friend struct ::TensorTest<ElemType>;
|
||||||
|
|
||||||
// -------------------------------------------------------------------
|
// -------------------------------------------------------------------
|
||||||
// sob members
|
// sob members
|
||||||
|
|
|
@ -5,14 +5,17 @@
|
||||||
// MathPerformanceTests.cpp : Defines the entry point for the console application.
|
// MathPerformanceTests.cpp : Defines the entry point for the console application.
|
||||||
//
|
//
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#define NOMINMAX
|
//#define NOMINMAX
|
||||||
#include "Windows.h"
|
//#include "Windows.h"
|
||||||
|
#include "Matrix.h"
|
||||||
|
#include "CPUMatrix.h"
|
||||||
|
#include "TensorView.h"
|
||||||
|
#include "Sequences.h"
|
||||||
#include <chrono>
|
#include <chrono>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include "Matrix.h"
|
#include <algorithm>
|
||||||
#include "CPUMatrix.h"
|
|
||||||
#include "Sequences.h"
|
|
||||||
using namespace Microsoft::MSR::CNTK;
|
using namespace Microsoft::MSR::CNTK;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
@ -378,6 +381,63 @@ void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
|
||||||
cout << "CPUMatrix/Matrix ratio is: " << cpu_avg / m_avg << " seconds" << endl;
|
cout << "CPUMatrix/Matrix ratio is: " << cpu_avg / m_avg << " seconds" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// simple test suite for TensorView
|
||||||
|
// - this is meant for performance optimization
|
||||||
|
// - correctness is defined as same result between GPU and CPU
|
||||||
|
template <class ElemType>
|
||||||
|
struct TensorTest
|
||||||
|
{
|
||||||
|
// helper to create a randomly initialized tensor object
|
||||||
|
static TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId)
|
||||||
|
{
|
||||||
|
let numElements = shape.GetNumElements();
|
||||||
|
|
||||||
|
// random init
|
||||||
|
mt19937 rng(randomSeed);
|
||||||
|
uniform_real_distribution<float> nd(-1, 1);
|
||||||
|
vector<ElemType> init(numElements);
|
||||||
|
generate(begin(init), end(init), [&] { return nd(rng); });
|
||||||
|
|
||||||
|
// create storage object (one-column matrix)
|
||||||
|
let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
|
||||||
|
|
||||||
|
// create TensorView
|
||||||
|
return TensorView<ElemType>(sob, shape);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename FN>
|
||||||
|
static void OneTensorTest(const char* what, const FN& fn)
|
||||||
|
{
|
||||||
|
cout << "Tensor test '" << what << "': ";
|
||||||
|
|
||||||
|
// run on GPU and CPU
|
||||||
|
let resultGPU = fn(0);
|
||||||
|
let resultCPU = fn(-1);
|
||||||
|
|
||||||
|
// compare
|
||||||
|
let isSame = resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), 1e-3f);
|
||||||
|
cout << (isSame ? "succeeded." : "FAILED (GPU and CPU results differ).") << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// main entry point (misusing the constructor)
|
||||||
|
/*void*/ TensorTest()
|
||||||
|
{
|
||||||
|
OneTensorTest("bias gradient", [](DEVICEID_TYPE deviceId) -> TensorView<ElemType>
|
||||||
|
{
|
||||||
|
let N = 2048u;
|
||||||
|
let T = 1024u;
|
||||||
|
int randomSeed = 1;
|
||||||
|
let gradient = CreateTensor(TensorShape{ N, T }, randomSeed++, deviceId);
|
||||||
|
auto bias = CreateTensor(TensorShape(N), randomSeed++, deviceId);
|
||||||
|
//gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
|
||||||
|
//bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
|
||||||
|
bias.DoCopyOf(1, gradient, 1);
|
||||||
|
//bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
|
||||||
|
return bias;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <class ElemType>
|
template <class ElemType>
|
||||||
void MandSTest(int count, int devId)
|
void MandSTest(int count, int devId)
|
||||||
{
|
{
|
||||||
|
@ -437,6 +497,8 @@ void MandSTest(int count, int devId)
|
||||||
|
|
||||||
int wmain()
|
int wmain()
|
||||||
{
|
{
|
||||||
|
TensorTest<float>();
|
||||||
|
|
||||||
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
|
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
|
||||||
|
|
||||||
TestRnnForwardPropSRP<float>();
|
TestRnnForwardPropSRP<float>();
|
||||||
|
|
|
@ -114,10 +114,11 @@
|
||||||
<ClInclude Include="targetver.h" />
|
<ClInclude Include="targetver.h" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
|
||||||
<ClCompile Include="MathPerformanceTests.cpp" />
|
<ClCompile Include="MathPerformanceTests.cpp" />
|
||||||
<ClCompile Include="stdafx.cpp">
|
<ClCompile Include="stdafx.cpp">
|
||||||
<PrecompiledHeader>Create</PrecompiledHeader>
|
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
</Project>
|
</Project>
|
Загрузка…
Ссылка в новой задаче