added tensor test(s) to MathPerformanceTests

2016-06-17 12:10:37 -07:00 · 2016-06-17 12:10:37 -07:00 · a172c89111
--- a/Source/Math/CntkBatchNormalization.cuh
+++ b/Source/Math/CntkBatchNormalization.cuh
@ -108,13 +108,12 @@ __device__ __forceinline__ void StoreValues<4, float>(const float src[4], float*
 template <typename T>
 __device__ __forceinline__ T Shuffle(T input, int srcLane)
 {
-#ifdef __CUDA_ARCH__
+#if __CUDA_ARCH__ >= 300
    // shfl is supported only on Kepler+
    static_assert(__CUDA_ARCH__ >= 300, "CNTK only supports only Kepler GPU architecture or newer");
    return cub::ShuffleIndex(input, srcLane);
 #else
    assert(false);
-    return input;
+    return input; // keep compiler happy
 #endif
 }
--- a/Source/Math/MathCUDA.vcxproj
+++ b/Source/Math/MathCUDA.vcxproj
@ -52,7 +52,7 @@
  <PropertyGroup Label="UserMacros" />
  <PropertyGroup Condition="$(DebugBuild)">
    <CudaCodeGen>$(CNTK_CUDA_CODEGEN_DEBUG)</CudaCodeGen>
-    <CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_30,sm_30</CudaCodeGen>
+    <CudaCodeGen Condition="'$(CudaCodeGen)'==''">compute_20,sm_20;compute_30,sm_30</CudaCodeGen>
  </PropertyGroup>
  <PropertyGroup Condition="$(ReleaseBuild)">
    <CudaCodeGen>$(CNTK_CUDA_CODEGEN_RELEASE)</CudaCodeGen>
--- a/Source/Math/TensorView.h
+++ b/Source/Math/TensorView.h
@ -14,6 +14,8 @@
 #pragma warning(push)
 #pragma warning(disable : 4251) // needs to have dll-interface to be used by clients of... caused by TensorView::m_shape which is only private. We use the same compiler everywhere.
 template<class ElemType> struct TensorTest;
 // This class is exported from the Math.dll.
 namespace Microsoft { namespace MSR { namespace CNTK {
@ -149,6 +151,7 @@ private:
    const Matrix<ElemType>& GetSOB() const { return *m_sob; }
    Matrix<ElemType>&       GetSOB()       { return *m_sob; }
    friend struct ::TensorTest<ElemType>;
    // -------------------------------------------------------------------
    // sob members
--- a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.cpp
+++ b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.cpp
@ -5,14 +5,17 @@
 // MathPerformanceTests.cpp : Defines the entry point for the console application.
 //
 #include "stdafx.h"
-#define NOMINMAX
+//#define NOMINMAX
-#include "Windows.h"
+//#include "Windows.h"
 #include "Matrix.h"
 #include "CPUMatrix.h"
 #include "TensorView.h"
 #include "Sequences.h"
 #include <chrono>
 #include <iostream>
 #include <vector>
-#include "Matrix.h"
+#include <algorithm>
-#include "CPUMatrix.h"
+
 #include "Sequences.h"
 using namespace Microsoft::MSR::CNTK;
 using namespace std;
@ -378,6 +381,63 @@ void SquareMultiplyAndAdd10TimesAvgTest(int n, int count)
    cout << "CPUMatrix/Matrix ratio is: " << cpu_avg / m_avg << " seconds" << endl;
 }
 // simple test suite for TensorView
 //  - this is meant for performance optimization
 //  - correctness is defined as same result between GPU and CPU
 template <class ElemType>
 struct TensorTest
 {
    // helper to create a randomly initialized tensor object
    static TensorView<ElemType> CreateTensor(TensorShape shape, int randomSeed, DEVICEID_TYPE deviceId)
    {
        let numElements = shape.GetNumElements();
        // random init
        mt19937 rng(randomSeed);
        uniform_real_distribution<float> nd(-1, 1);
        vector<ElemType> init(numElements);
        generate(begin(init), end(init), [&] { return nd(rng); });
        // create storage object (one-column matrix)
        let sob = make_shared<Matrix<ElemType>>(numElements/*rows*/, 1/*cols*/, init.data(), deviceId);
        // create TensorView
        return TensorView<ElemType>(sob, shape);
    }
    template<typename FN>
    static void OneTensorTest(const char* what, const FN& fn)
    {
        cout << "Tensor test '" << what << "': ";
        // run on GPU and CPU
        let resultGPU = fn(0);
        let resultCPU = fn(-1);
        // compare
        let isSame = resultGPU.GetSOB().IsEqualTo(resultCPU.GetSOB(), 1e-3f);
        cout << (isSame ? "succeeded." : "FAILED (GPU and CPU results differ).") << endl;
    }
    // main entry point (misusing the constructor)
    /*void*/ TensorTest()
    {
        OneTensorTest("bias gradient", [](DEVICEID_TYPE deviceId) -> TensorView<ElemType>
        {
            let N = 2048u;
            let T = 1024u;
            int randomSeed = 1;
            let  gradient = CreateTensor(TensorShape{ N, T }, randomSeed++, deviceId);
            auto bias     = CreateTensor(TensorShape(N),      randomSeed++, deviceId);
            //gradient.GetSOB().Print("incoming gradient", 0, 9, 0, 9);
            //bias.GetSOB().Print("bias gradient", 0, 9, 0, 9);
            bias.DoCopyOf(1, gradient, 1);
            //bias.GetSOB().Print("updated bias gradient", 0, 9, 0, 9);
            return bias;
        });
    }
 };
 template <class ElemType>
 void MandSTest(int count, int devId)
 {
@ -437,6 +497,8 @@ void MandSTest(int count, int devId)
 int wmain()
 {
    TensorTest<float>();
    ColumnSliceMultAndAddTest<float>(2048, 2048, 256, 0);
    TestRnnForwardPropSRP<float>();
--- a/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj
+++ b/Tests/UnitTests/MathPerformanceTests/MathPerformanceTests.vcxproj
@ -114,10 +114,11 @@
    <ClInclude Include="targetver.h" />
  </ItemGroup>
  <ItemGroup>
    <ClCompile Include="..\..\..\Source\Common\ExceptionWithCallStack.cpp" />
    <ClCompile Include="MathPerformanceTests.cpp" />
    <ClCompile Include="stdafx.cpp">
      <PrecompiledHeader>Create</PrecompiledHeader>
    </ClCompile>
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
-</Project>
+</Project>