added tensor test(s) to MathPerformanceTests
This commit is contained in:
Родитель
a67273394e
Коммит
a172c89111
|
@ -108,13 +108,12 @@ __device__ __forceinline__ void StoreValues<4, float>(const float src[4], float*
|
|||
template <typename T>
|
||||
__device__ __forceinline__ T Shuffle(T input, int srcLane)
|
||||
{
|
||||
#ifdef __CUDA_ARCH__
|
||||
#if __CUDA_ARCH__ >= 300
|
||||
// shfl is supported only on Kepler+
|
||||
static_assert(__CUDA_ARCH__ >= 300, "CNTK only supports only Kepler GPU architecture or newer");
|
||||
return cub::ShuffleIndex(input, srcLane);
|
||||
#else
|
||||
assert(false);
|
||||
return input;
|
||||
return input; // keep compiler happy
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="$(DebugBuild)">
|
||||
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
|
||||
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_30,sm_30</CudaCodeGen>
|
||||
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_20,sm_20;compute_30,sm_30</CudaCodeGen>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="$(ReleaseBuild)">
|
||||
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
#pragma warning(push)
|
||||
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
|
||||
|
||||
template<class ElemType> struct TensorTest;
|
||||
|
||||
// This class is exported from the Math.dll.
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -149,6 +151,7 @@ private:
|
|||
|
||||
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
|
||||
Matrix<ElemType>& GetSOB() { return *m_sob; }
|
||||
friend struct ::TensorTest<ElemType>;
|
||||
|
||||
// -------------------------------------------------------------------
|
||||
// sob members
|
||||
|
|
|
@ -5,14 +5,17 @@
|
|||
// MathPerformanceTests.cpp : Defines the entry point for the console application.
|
||||
//
|
||||
#include "stdafx.h"
|
||||
#define NOMINMAX
|
||||
#include "Windows.h"
|
||||
//#define NOMINMAX
|
||||
//#include "Windows.h"
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "TensorView.h"
|
||||
#include "Sequences.h"
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "Matrix.h"
|
||||
#include "CPUMatrix.h"
|
||||
#include "Sequences.h"
|
||||
#include <algorithm>
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
|
@ -378,6 +381,63 @@ void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
|
|||
cout << "CPUMatrix/Matrix ratio is: " << cpu_avg / m_avg << " seconds" << endl;
|
||||
}
|
||||
|
||||
// simple test suite for TensorView
|
||||
// - this is meant for performance optimization
|
||||
// - correctness is defined as same result between GPU and CPU
|
||||
template <class ElemType>
|
||||
struct TensorTest
|
||||
{
|
||||
// helper to create a randomly initialized tensor object
|
||||
static TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId)
|
||||
{
|
||||
let numElements = shape.GetNumElements();
|
||||
|
||||
// random init
|
||||
mt19937 rng(randomSeed);
|
||||
uniform_real_distribution<float> nd(-1, 1);
|
||||
vector<ElemType> init(numElements);
|
||||
generate(begin(init), end(init), [&] { return nd(rng); });
|
||||
|
||||
// create storage object (one-column matrix)
|
||||
let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
|
||||
|
||||
// create TensorView
|
||||
return TensorView<ElemType>(sob, shape);
|
||||
}
|
||||
|
||||
template<typename FN>
|
||||
static void OneTensorTest(const char* what, const FN& fn)
|
||||
{
|
||||
cout << "Tensor test '" << what << "': ";
|
||||
|
||||
// run on GPU and CPU
|
||||
let resultGPU = fn(0);
|
||||
let resultCPU = fn(-1);
|
||||
|
||||
// compare
|
||||
let isSame = resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), 1e-3f);
|
||||
cout << (isSame ? "succeeded." : "FAILED (GPU and CPU results differ).") << endl;
|
||||
}
|
||||
|
||||
// main entry point (misusing the constructor)
|
||||
/*void*/ TensorTest()
|
||||
{
|
||||
OneTensorTest("bias gradient", [](DEVICEID_TYPE deviceId) -> TensorView<ElemType>
|
||||
{
|
||||
let N = 2048u;
|
||||
let T = 1024u;
|
||||
int randomSeed = 1;
|
||||
let gradient = CreateTensor(TensorShape{ N, T }, randomSeed++, deviceId);
|
||||
auto bias = CreateTensor(TensorShape(N), randomSeed++, deviceId);
|
||||
//gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
|
||||
//bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
|
||||
bias.DoCopyOf(1, gradient, 1);
|
||||
//bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
|
||||
return bias;
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
template <class ElemType>
|
||||
void MandSTest(int count, int devId)
|
||||
{
|
||||
|
@ -437,6 +497,8 @@ void MandSTest(int count, int devId)
|
|||
|
||||
int wmain()
|
||||
{
|
||||
TensorTest<float>();
|
||||
|
||||
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
|
||||
|
||||
TestRnnForwardPropSRP<float>();
|
||||
|
|
|
@ -114,6 +114,7 @@
|
|||
<ClInclude Include="targetver.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
|
||||
<ClCompile Include="MathPerformanceTests.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<PrecompiledHeader>Create</PrecompiledHeader>
|
||||
|
|
Загрузка…
Ссылка в новой задаче