added tensor test(s) to MathPerformanceTests

This commit is contained in:
Frank Seide 2016-06-17 12:10:37 -07:00 коммит произвёл Ivan Rodriguez
Родитель a67273394e
Коммит a172c89111
5 изменённых файлов: 75 добавлений и 10 удалений

Просмотреть файл

@ -108,13 +108,12 @@ __device__ __forceinline__ void StoreValues<4, float>(const float src[4], float*
template <typename T>
__device__ __forceinline__ T Shuffle(T input, int srcLane)
{
#ifdef __CUDA_ARCH__
#if __CUDA_ARCH__ >= 300
// shfl is supported only on Kepler+
static_assert(__CUDA_ARCH__ >= 300, "CNTK only supports only Kepler GPU architecture or newer");
return cub::ShuffleIndex(input, srcLane);
#else
assert(false);
return input;
return input; // keep compiler happy
#endif
}

Просмотреть файл

@ -52,7 +52,7 @@
<PropertyGroup Label="UserMacros" />
<PropertyGroup Condition="$(DebugBuild)">
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_30,sm_30</CudaCodeGen>
<CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_20,sm_20;compute_30,sm_30</CudaCodeGen>
</PropertyGroup>
<PropertyGroup Condition="$(ReleaseBuild)">
<CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>

Просмотреть файл

@ -14,6 +14,8 @@
#pragma warning(push)
#pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
template<class ElemType> struct TensorTest;
// This class is exported from the Math.dll.
namespace Microsoft { namespace MSR { namespace CNTK {
@ -149,6 +151,7 @@ private:
const Matrix<ElemType>& GetSOB() const { return *m_sob; }
Matrix<ElemType>& GetSOB() { return *m_sob; }
friend struct ::TensorTest<ElemType>;
// -------------------------------------------------------------------
// sob members

Просмотреть файл

@ -5,14 +5,17 @@
// MathPerformanceTests.cpp : Defines the entry point for the console application.
//
#include "stdafx.h"
#define NOMINMAX
#include "Windows.h"
//#define NOMINMAX
//#include "Windows.h"
#include "Matrix.h"
#include "CPUMatrix.h"
#include "TensorView.h"
#include "Sequences.h"
#include <chrono>
#include <iostream>
#include <vector>
#include "Matrix.h"
#include "CPUMatrix.h"
#include "Sequences.h"
#include <algorithm>
using namespace Microsoft::MSR::CNTK;
using namespace std;
@ -378,6 +381,63 @@ void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
cout << "CPUMatrix/Matrix ratio is: " << cpu_avg / m_avg << " seconds" << endl;
}
// simple test suite for TensorView
// - this is meant for performance optimization
// - correctness is defined as same result between GPU and CPU
template <class ElemType>
struct TensorTest
{
// helper to create a randomly initialized tensor object
static TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId)
{
let numElements = shape.GetNumElements();
// random init
mt19937 rng(randomSeed);
uniform_real_distribution<float> nd(-1, 1);
vector<ElemType> init(numElements);
generate(begin(init), end(init), [&] { return nd(rng); });
// create storage object (one-column matrix)
let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
// create TensorView
return TensorView<ElemType>(sob, shape);
}
template<typename FN>
static void OneTensorTest(const char* what, const FN& fn)
{
cout << "Tensor test '" << what << "': ";
// run on GPU and CPU
let resultGPU = fn(0);
let resultCPU = fn(-1);
// compare
let isSame = resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), 1e-3f);
cout << (isSame ? "succeeded." : "FAILED (GPU and CPU results differ).") << endl;
}
// main entry point (misusing the constructor)
/*void*/ TensorTest()
{
OneTensorTest("bias gradient", [](DEVICEID_TYPE deviceId) -> TensorView<ElemType>
{
let N = 2048u;
let T = 1024u;
int randomSeed = 1;
let gradient = CreateTensor(TensorShape{ N, T }, randomSeed++, deviceId);
auto bias = CreateTensor(TensorShape(N), randomSeed++, deviceId);
//gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
//bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
bias.DoCopyOf(1, gradient, 1);
//bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
return bias;
});
}
};
template <class ElemType>
void MandSTest(int count, int devId)
{
@ -437,6 +497,8 @@ void MandSTest(int count, int devId)
int wmain()
{
TensorTest<float>();
ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
TestRnnForwardPropSRP<float>();

Просмотреть файл

@ -114,10 +114,11 @@
<ClInclude Include="targetver.h" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
<ClCompile Include="MathPerformanceTests.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader>Create</PrecompiledHeader>
</ClCompile>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
</Project>
</Project>