merged from master. Undid the ClassificationError baseline updates due to merge conflicts

This commit is contained in:
Frank Seide 2016-08-22 14:36:28 -07:00
Родитель 1e68b3c289 8493f118da
Коммит 5b969bac70
380 изменённых файлов: 292772 добавлений и 1787443 удалений

Просмотреть файл

@ -34,48 +34,29 @@
<UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip> <UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip>
</PropertyGroup> </PropertyGroup>
<Choose> <PropertyGroup>
<When Condition="Exists('$(ACML_PATH)')"> <MathLibrary>MKL</MathLibrary>
<PropertyGroup> <CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
<MathLibrary>ACML</MathLibrary> <CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
<MathLibraryName>ACML</MathLibraryName> <MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
<MathIncludePath>$(ACML_PATH)\include</MathIncludePath> <MathDefine>USE_MKL</MathDefine>
<MathLibraryPath>$(ACML_PATH)\lib</MathLibraryPath> </PropertyGroup>
<MathLinkLibrary>libacml_mp_dll.lib</MathLinkLibrary> <PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
<MathDelayLoad>libacml_mp_dll.dll</MathDelayLoad> <MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathPostBuildCopyPattern>$(ACML_PATH)\lib\*.dll</MathPostBuildCopyPattern> <MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
<UnitTestDlls>$(OutDir)libacml_mp_dll.dll;$(OutDir)libifcoremd.dll;$(OutDir)libifportmd.dll;$(OutDir)libiomp*.dll;$(OutDir)libmmd.dll;$(OutDir)svml_dispmd.dll;</UnitTestDlls> <MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
<MathDefine>USE_ACML</MathDefine> <MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
</PropertyGroup> <MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
</When> <UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
</PropertyGroup>
<!-- See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl on how to configure to build CNTK with MKL --> <PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
<When Condition="'$(CNTK_MKL)' == '1'"> <MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<PropertyGroup> <MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
<MathLibrary>MKL</MathLibrary> <MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion> <MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath> <MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath> <UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
<MathDefine>USE_MKL</MathDefine> </PropertyGroup>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
</PropertyGroup>
</When>
</Choose>
<PropertyGroup Condition="$(UseZip)"> <PropertyGroup Condition="$(UseZip)">
<ZipInclude>$(ZLIB_PATH)\include;$(ZLIB_PATH)\lib\libzip\include;</ZipInclude> <ZipInclude>$(ZLIB_PATH)\include;$(ZLIB_PATH)\lib\libzip\include;</ZipInclude>

Просмотреть файл

@ -1150,6 +1150,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Examples\E
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\UnitTests\BrainScriptTests\BrainScriptTests.vcxproj", "{9F999212-AFC5-4EAC-AA78-F7247D46C456}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\UnitTests\BrainScriptTests\BrainScriptTests.vcxproj", "{9F999212-AFC5-4EAC-AA78-F7247D46C456}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A} {86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection EndProjectSection
EndProject EndProject

Просмотреть файл

@ -9,8 +9,6 @@
# that provides # that provides
# BUILDTYPE= One of release or debug # BUILDTYPE= One of release or debug
# defaults to release # defaults to release
# ACML_PATH= path to ACML library installation
# only needed if MATHLIB=acml
# MKL_PATH= path to CNTK custom MKL installation # MKL_PATH= path to CNTK custom MKL installation
# only needed if MATHLIB=mkl # only needed if MATHLIB=mkl
# CNTK_CUSTOM_MKL_VERSION=2 # CNTK_CUSTOM_MKL_VERSION=2
@ -21,8 +19,8 @@
# defaults to /usr/include/nvidia/gdk # defaults to /usr/include/nvidia/gdk
# GDK_NVML_LIB_PATH= path to CUDA GDK (stub) library path, so $(GDK_NVML_LIB_PATH)/libnvidia-ml.so exists # GDK_NVML_LIB_PATH= path to CUDA GDK (stub) library path, so $(GDK_NVML_LIB_PATH)/libnvidia-ml.so exists
# defaults to /usr/src/gdk/nvml/lib # defaults to /usr/src/gdk/nvml/lib
# MATHLIB= One of acml or mkl # MATHLIB= mkl
# defaults to acml # defaults to mkl
# CUDA_PATH= Path to CUDA # CUDA_PATH= Path to CUDA
# If not specified, GPU will not be enabled # If not specified, GPU will not be enabled
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists # CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
@ -60,8 +58,8 @@ BUILDTYPE=release
endif endif
ifndef MATHLIB ifndef MATHLIB
$(info DEFAULTING MATHLIB=acml) $(info DEFAULTING MATHLIB=mkl)
MATHLIB = acml MATHLIB = mkl
endif endif
#### Configure based on options above #### Configure based on options above
@ -137,13 +135,6 @@ else
COMMON_FLAGS +=-DCPUONLY COMMON_FLAGS +=-DCPUONLY
endif endif
ifeq ("$(MATHLIB)","acml")
INCLUDEPATH += $(ACML_PATH)/include
LIBPATH += $(ACML_PATH)/lib
LIBS += -lacml_mp -liomp5 -lm -lpthread
COMMON_FLAGS += -DUSE_ACML
endif
ifeq ("$(MATHLIB)","mkl") ifeq ("$(MATHLIB)","mkl")
INCLUDEPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/include INCLUDEPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/include
LIBS += -lm LIBS += -lm
@ -418,6 +409,10 @@ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \ Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \ Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \
Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \ Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp \
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp \
Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC))) CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))
@ -933,22 +928,24 @@ UNITTEST_BRAINSCRIPT_SRC = \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \ $(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \ $(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ParserTests.cpp \ $(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ParserTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ComputationNetworkTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/stdafx.cpp $(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/stdafx.cpp
UNITTEST_BRAINSCRIPT_SRC+=$(COMMON_SRC) UNITTEST_BRAINSCRIPT_SRC += $(COMPUTATION_NETWORK_LIB_SRC)
UNITTEST_BRAINSCRIPT_SRC += $(SEQUENCE_TRAINING_LIB_SRC)
UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC)) UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC)))
UNITTEST_BRAINSCRIPT := $(BINDIR)/brainscripttests UNITTEST_BRAINSCRIPT := $(BINDIR)/brainscripttests
ALL += $(UNITTEST_BRAINSCRIPT) ALL += $(UNITTEST_BRAINSCRIPT)
SRC += $(UNITTEST_BRAINSCRIPT_SRC) SRC += $(UNITTEST_BRAINSCRIPT_SRC)
$(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) $(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR) @echo $(SEPARATOR)
@mkdir -p $(dir $@) @mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE) @echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl $(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH)
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT) unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT)

Просмотреть файл

@ -72,18 +72,6 @@ void DoTrain(const ConfigRecordType& config)
bool makeMode = config(L"makeMode", true); bool makeMode = config(L"makeMode", true);
DEVICEID_TYPE deviceId = DeviceFromConfig(config); DEVICEID_TYPE deviceId = DeviceFromConfig(config);
// determine the network-creation function
// We have several ways to create that network.
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
auto dataReader = CreateObject<DataReader>(config, L"reader");
shared_ptr<DataReader> cvDataReader;
if (config.Exists(L"cvReader"))
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
shared_ptr<SGD<ElemType>> optimizer; shared_ptr<SGD<ElemType>> optimizer;
if (config.Exists(L"optimizer")) if (config.Exists(L"optimizer"))
{ {
@ -95,8 +83,39 @@ void DoTrain(const ConfigRecordType& config)
optimizer = make_shared<SGD<ElemType>>(configSGD); optimizer = make_shared<SGD<ElemType>>(configSGD);
} }
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
int startEpoch = optimizer->DetermineStartEpoch(makeMode);
if (startEpoch == optimizer->GetMaxEpochs())
{
LOGPRINTF(stderr, "No further training is necessary.\n");
return;
}
wstring modelFileName = optimizer->GetModelNameForEpoch(int(startEpoch) - 1);
bool loadNetworkFromCheckpoint = startEpoch >= 0;
fprintf(stderr, "\n");
if (loadNetworkFromCheckpoint)
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
else
LOGPRINTF(stderr, "Creating virgin network.\n");
// determine the network-creation function
// We have several ways to create that network.
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
// create or load from checkpoint
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
auto dataReader = CreateObject<DataReader>(config, L"reader");
shared_ptr<DataReader> cvDataReader;
if (config.Exists(L"cvReader"))
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
optimizer->InitMPI(MPIWrapper::GetInstance()); optimizer->InitMPI(MPIWrapper::GetInstance());
optimizer->Train(createNetworkFn, deviceId, dataReader.get(), cvDataReader.get(), makeMode); optimizer->Train(net, deviceId, dataReader.get(), cvDataReader.get(), startEpoch, loadNetworkFromCheckpoint);
} }
namespace Microsoft { namespace MSR { namespace ScriptableObjects { namespace Microsoft { namespace MSR { namespace ScriptableObjects {
@ -189,9 +208,8 @@ void DoDumpNodes(const ConfigParameters& config)
if (!printValues && !printMetadata) if (!printValues && !printMetadata)
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump"); InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
ComputationNetwork net(CPUDEVICE); // always use CPU ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
net.Load<ElemType>(modelPath); // TODO: we have a function now to combine this and the previous line net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
net.DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
} }
template void DoDumpNodes<float>(const ConfigParameters& config); template void DoDumpNodes<float>(const ConfigParameters& config);

Просмотреть файл

@ -10,7 +10,7 @@
#include "stdafx.h" #include "stdafx.h"
#ifdef _WIN32 #ifdef _WIN32
#include <crtdbg.h> #include <crtdbg.h>
#endif #endif
#include "Basics.h" #include "Basics.h"
#include "Actions.h" #include "Actions.h"

Просмотреть файл

@ -53,8 +53,6 @@ if "%p_CNTK_MKL%" == "1" (
) else ( ) else (
echo #define _MATHLIB_ "mkl">> buildinfo.h$$ echo #define _MATHLIB_ "mkl">> buildinfo.h$$
) )
) else (
echo #define _MATHLIB_ "acml">> buildinfo.h$$
) )
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$ echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$

Просмотреть файл

@ -7,6 +7,12 @@
#pragma once #pragma once
#ifdef SWIG
#define final
#define explicit
#define static_assert(condition, message)
#endif
#include "CNTKLibraryInternals.h" #include "CNTKLibraryInternals.h"
#include <memory> #include <memory>
@ -14,10 +20,12 @@
#include <array> #include <array>
#include <stdarg.h> #include <stdarg.h>
#include <assert.h> #include <assert.h>
#include <map>
#include <unordered_map> #include <unordered_map>
#include <unordered_set> #include <unordered_set>
#include <string> #include <string>
#include <sstream> #include <sstream>
#include <iosfwd>
#include<algorithm> #include<algorithm>
namespace CNTK namespace CNTK
@ -236,7 +244,7 @@ namespace CNTK
} }
/// ///
/// Creates and returns a new shape contructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions. /// Creates and returns a new shape constructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
/// ///
NDShape AppendShape(const NDShape& shape) const NDShape AppendShape(const NDShape& shape) const
{ {
@ -665,35 +673,52 @@ namespace CNTK
/// ///
/// Denotes an Axis of a Variable and is used for specifying the axes parameters of certain Functions such as reductions. /// Denotes an Axis of a Variable and is used for specifying the axes parameters of certain Functions such as reductions.
/// Besides the static axes corresponding to each of the axes of the Variable's shape, Input and Output Variables /// Besides the static axes corresponding to each of the axes of the Variable's shape, Variables of kind 'Input' and any
/// also have one or more dynamic axes (corresponding to the sequence dimensions) and one implicit batch axis denoting the axes /// 'Output' Variables dependent on an 'Input' Variable also have 2 additional dynamic axes whose dimensions are known only
/// along which multiple sequences are batched in the Values corresponding to the variable when performing computations. /// when the Variable is bound to actual data during compute (viz. sequence axis and batch axis denoting the axis along which
/// multiple sequences are batched)
/// ///
class Axis final class Axis final
{ {
CNTK_API static const std::wstring s_staticAxisNamePrefix;
public: public:
/// ///
/// Construct an Axis object denoting a static axis with the specified index. /// Construct an Axis object denoting a static axis with the specified index.
/// ///
Axis(size_t staticAxisIdx) explicit Axis(size_t staticAxisIdx)
: m_staticAxisIdx(staticAxisIdx) : m_staticAxisIdx(staticAxisIdx)
{ {
const wchar_t* staticAxisNamePrefix = L"staticAxis_"; m_name = s_staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
m_name = staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
} }
/// ///
/// Construct a dynamic axis with the specified name. /// Construct a dynamic axis with the specified name.
/// ///
Axis(const std::wstring& name) explicit Axis(const std::wstring& name)
: m_staticAxisIdx(SIZE_MAX), m_name(name) : m_staticAxisIdx(SIZE_MAX), m_name(name)
{ {
if (m_name.length() > s_staticAxisNamePrefix.length())
{
auto prefix = m_name.substr(0, s_staticAxisNamePrefix.length());
auto suffix = m_name.substr(s_staticAxisNamePrefix.length(), m_name.length() - s_staticAxisNamePrefix.length());
if (prefix == s_staticAxisNamePrefix)
{
if (suffix == L"0")
*this = Axis(0);
else
{
auto suffixVal = std::stoul(suffix);
if (suffixVal != 0)
*this = Axis(suffixVal);
}
}
}
} }
/// ///
/// Returns a boolean indicating if 'this' Axis corresponds to a static axis /// Returns a boolean indicating if 'this' Axis corresponds to a static axis
/// ///
bool IsStaticAxis() const { return m_staticAxisIdx == SIZE_MAX; } bool IsStaticAxis() const { return m_staticAxisIdx != SIZE_MAX; }
/// ///
/// Returns the axis index if 'this' Axis is a static axis. Throws an exception otherwise. /// Returns the axis index if 'this' Axis is a static axis. Throws an exception otherwise.
@ -714,12 +739,7 @@ namespace CNTK
/// ///
/// Static Axis object representing the batch axis. /// Static Axis object representing the batch axis.
/// ///
CNTK_API static const Axis& BatchAxis(); CNTK_API static const Axis& DefaultBatchAxis();
///
/// Special Axis object denoting all the axes of the Value object in whose context it is used.
///
CNTK_API static const Axis& AllAxes();
/// ///
/// Name of 'this' axis /// Name of 'this' axis
@ -753,7 +773,20 @@ namespace CNTK
{ {
return !(first == second); return !(first == second);
} }
}
namespace std {
template <> struct hash<CNTK::Axis>
{
size_t operator()(const CNTK::Axis& x) const
{
return std::hash<std::wstring>()(x.Name());
}
};
}
namespace CNTK
{
/// ///
/// Enumeration type denoting the kind of a symbolic Variable object /// Enumeration type denoting the kind of a symbolic Variable object
/// ///
@ -780,47 +813,76 @@ namespace CNTK
template <typename T> template <typename T>
friend struct std::hash; friend struct std::hash;
CNTK_API static const std::vector<Axis> s_defaultInputVariableDynamicAxes;
public: public:
/// ///
/// Create an 'Input' Variable. /// Create an 'Input' Variable.
/// ///
Variable(const NDShape& shape, CNTK::DataType dataType) Variable(const NDShape& shape, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, dataType, L"") : Variable(shape, dataType, L"", dynamicAxes)
{} {}
/// ///
/// Create an 'Input' Variable. /// Create an 'Input' Variable.
/// ///
Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name) Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, dataType, std::wstring(name)) : Variable(shape, dataType, std::wstring(name), dynamicAxes)
{} {}
/// ///
/// Create an 'Input' Variable. /// Create an 'Input' Variable.
/// ///
Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name) Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, false, name) : Variable(shape, false, dataType, name, dynamicAxes)
{} {}
/// ///
/// Create an 'Input' Variable denoting sparse data. /// Create an 'Input' Variable denoting sparse data.
/// ///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name = L"") Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, isSparse, name) : Variable(shape, isSparse, dataType, false, L"", dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data.
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, std::wstring(name), dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data.
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, false, name, dynamicAxes)
{} {}
/// ///
/// Create an 'Input' Variable and specify if gradients are to be computed for this input /// Create an 'Input' Variable and specify if gradients are to be computed for this input
/// ///
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"") Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, false, name) : Variable(shape, dataType, needsGradient, std::wstring(name), dynamicAxes)
{}
///
/// Create an 'Input' Variable and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, false, dataType, needsGradient, name, dynamicAxes)
{} {}
/// ///
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input /// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
/// ///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"") Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, isSparse, name) : Variable(shape, isSparse, dataType, needsGradient, L"", dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, dynamicAxes, isSparse, name)
{} {}
/// ///
@ -860,7 +922,7 @@ namespace CNTK
/// ///
/// Returns a boolean value indicating if 'this' variable denotes sparse data /// Returns a boolean value indicating if 'this' variable denotes sparse data
/// ///
bool IsSparse() const { return (m_dataFields->m_isSparse); } bool IsSparse() const { return m_dataFields->m_isSparse; }
/// ///
/// Returns a boolean value indicating if 'this' variable is an Input /// Returns a boolean value indicating if 'this' variable is an Input
@ -941,6 +1003,14 @@ namespace CNTK
VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector<Axis>& dynamicAxes, bool isSparse, const std::wstring& name) VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector<Axis>& dynamicAxes, bool isSparse, const std::wstring& name)
: m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_dynamicAxes(dynamicAxes), m_isSparse(isSparse), m_name(name) : m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_dynamicAxes(dynamicAxes), m_isSparse(isSparse), m_name(name)
{ {
// Validate that each of the dynamic axes are unique
std::unordered_set<Axis> uniqueDynamicAxis;
for (auto& currentDynamicAxis : dynamicAxes)
{
auto retVal = uniqueDynamicAxis.insert(currentDynamicAxis);
if (!retVal.second)
InvalidArgument("Dynamic axis named %S is specified more than once for Variable object", currentDynamicAxis.Name().c_str());
}
} }
private: private:
@ -1079,7 +1149,7 @@ namespace CNTK
/// Contruct a Placeholder with the specified NDShape /// Contruct a Placeholder with the specified NDShape
/// ///
explicit Placeholder(const NDShape& shape, const std::wstring& name = L"") explicit Placeholder(const NDShape& shape, const std::wstring& name = L"")
: Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, {Axis::DefaultDynamicAxis()}, name) : Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() }, name)
{} {}
/// ///
@ -1097,13 +1167,15 @@ namespace CNTK
} }
namespace std { namespace std {
template <> struct hash<CNTK::Axis>
template <> struct hash<CNTK::NDShape>
{ {
size_t operator()(const CNTK::Axis& x) const size_t operator()(const CNTK::NDShape& x) const
{ {
return std::hash<std::wstring>()(x.Name()); return std::hash<std::wstring>()(x.AsString());
} }
}; };
template <> struct hash<CNTK::Variable> template <> struct hash<CNTK::Variable>
{ {
@ -1441,6 +1513,21 @@ namespace CNTK
/// ///
CNTK_API FunctionPtr Softmax(const Variable& operand, const std::wstring& name = L""); CNTK_API FunctionPtr Softmax(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in hardmax operation on specified tensor input operand
///
CNTK_API FunctionPtr Hardmax(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in transpose dimensions operation on specified tensor input operand
///
CNTK_API FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name = L"");
///
/// Create an instance of the slice operation on specified tensor input operand
///
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
/// ///
/// Create an instance of the CNTK built-in elementwise tensor addition operation with the specified input operands. /// Create an instance of the CNTK built-in elementwise tensor addition operation with the specified input operands.
/// ///
@ -1497,6 +1584,13 @@ namespace CNTK
/// ///
CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L""); CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in matrix multiplication operation with the transpose of the left input operand
/// and the specified right operand. Only accepts left operands of ranks 1 or 2.
/// TODO: Specify the constraints on the shapes of the operands.
///
CNTK_API FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
/// ///
/// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands. /// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
/// ///
@ -1518,7 +1612,6 @@ namespace CNTK
/// ///
CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L""); CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L"");
//CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, Axis axis, const std::wstring& name = L"");
/// ///
/// Create an instance of the CNTK built-in operation for getting the future value along the lone dynamic axis of the specified operand. /// Create an instance of the CNTK built-in operation for getting the future value along the lone dynamic axis of the specified operand.
@ -1532,6 +1625,16 @@ namespace CNTK
/// ///
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L""); CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in sum reduction operation on specified tensor input operand along the specified axis
///
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in LogSum reduction operation on specified tensor input operand along the specified axis
///
CNTK_API FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
/// ///
/// Per dimension mean-variance normalization of the specified input operand. /// Per dimension mean-variance normalization of the specified input operand.
/// ///
@ -1630,6 +1733,7 @@ namespace CNTK
NDShape, NDShape,
Vector, Vector,
Dictionary, Dictionary,
NDArrayView,
}; };
static const char* TypeName(Type type) static const char* TypeName(Type type)
@ -1654,6 +1758,8 @@ namespace CNTK
return "Vector"; return "Vector";
case Type::Dictionary: case Type::Dictionary:
return "Dictionary"; return "Dictionary";
case Type::NDArrayView:
return "NDArrayView";
default: default:
LogicError("Unknown DictionaryValue::Type"); LogicError("Unknown DictionaryValue::Type");
} }
@ -1687,13 +1793,21 @@ namespace CNTK
DictionaryValue(const wchar_t* value) DictionaryValue(const wchar_t* value)
: DictionaryValue(std::wstring(value)) : DictionaryValue(std::wstring(value))
{} {}
// Due to SWIG we had to flatten this template for vector<DictionaryValue>
DictionaryValue(const std::vector<CNTK::DictionaryValue>& value) : m_valueType(GetValueType<std::vector<CNTK::DictionaryValue>>())
{
AllocateDataPtr(value);
}
template <typename T> template <typename T>
DictionaryValue(const T& value) : m_valueType(GetValueType<T>()) DictionaryValue(const T& value) : m_valueType(GetValueType<T>())
{ {
static_assert(std::is_same<T, NDShape>::value || static_assert((std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value || std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value || std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value, std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType"); "Unsupported ValueType");
AllocateDataPtr(value); AllocateDataPtr(value);
@ -1706,6 +1820,12 @@ namespace CNTK
*this = other; *this = other;
} }
DictionaryValue(DictionaryValue&& other) : m_valueType(Type::Bool)
{
// The m_valueType must have been set to a non-ptr type to prevent an attempt to interpret
// the underlying underlying uninitialized value as a ptr and free it.
*this = std::move(other);
}
DictionaryValue& operator=(const DictionaryValue& other) DictionaryValue& operator=(const DictionaryValue& other)
{ {
if (this != &other) if (this != &other)
@ -1723,11 +1843,33 @@ namespace CNTK
AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>()); AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>());
else if (other.m_valueType == Type::Dictionary) else if (other.m_valueType == Type::Dictionary)
AllocateDataPtr(other.GetValue<Dictionary>()); AllocateDataPtr(other.GetValue<Dictionary>());
else if (other.m_valueType == Type::NDArrayView)
AllocateDataPtr(other.GetValue<NDArrayView>());
} }
return *this; return *this;
} }
DictionaryValue& operator=(DictionaryValue&& other)
{
FreeDataPtr();
m_valueType = other.m_valueType;
m_data = other.m_data;
if (other.m_valueType == Type::String ||
other.m_valueType == Type::NDShape ||
other.m_valueType == Type::Vector ||
other.m_valueType == Type::Dictionary ||
other.m_valueType == Type::NDArrayView)
{
other.m_data.m_ptr = nullptr;
}
other.m_valueType = Type::None;
return *this;
}
~DictionaryValue() ~DictionaryValue()
{ {
FreeDataPtr(); FreeDataPtr();
@ -1764,7 +1906,8 @@ namespace CNTK
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value || template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value || std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value || std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value>::type* = nullptr> std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value>::type* = nullptr>
const T& GetValue() const const T& GetValue() const
{ {
VerifyType<T>(); VerifyType<T>();
@ -1781,21 +1924,25 @@ namespace CNTK
return m_valueType; return m_valueType;
} }
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us); CNTK_API bool operator==(const DictionaryValue& other) const;
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us); CNTK_API bool operator!=(const DictionaryValue& other) const;
friend CNTK_API std::istream& operator>>(std::istream& stream, DictionaryValue& us);
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const DictionaryValue& us);
private: private:
template <typename T> template <typename T>
static Type GetValueType() static Type GetValueType()
{ {
static_assert(std::is_same<T, bool>::value || static_assert((std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value || std::is_same<T, size_t>::value ||
std::is_same<T, float>::value || std::is_same<T, float>::value ||
std::is_same<T, double>::value || std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value || std::is_same<T, std::wstring>::value ||
std::is_same<T, NDShape>::value || std::is_same<T, NDShape>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value || std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value, std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType"); "Unsupported ValueType");
if (std::is_same<T, bool>::value) return Type::Bool; if (std::is_same<T, bool>::value) return Type::Bool;
@ -1806,6 +1953,7 @@ namespace CNTK
if (std::is_same<T, NDShape>::value) return Type::NDShape; if (std::is_same<T, NDShape>::value) return Type::NDShape;
if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector; if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector;
if (std::is_same<T, Dictionary>::value) return Type::Dictionary; if (std::is_same<T, Dictionary>::value) return Type::Dictionary;
if (std::is_same<T, NDArrayView>::value) return Type::NDArrayView;
} }
template <typename T> template <typename T>
@ -1831,6 +1979,8 @@ namespace CNTK
FreePtrAsType<std::vector<DictionaryValue>>(); FreePtrAsType<std::vector<DictionaryValue>>();
else if (m_valueType == Type::Dictionary) else if (m_valueType == Type::Dictionary)
FreePtrAsType<Dictionary>(); FreePtrAsType<Dictionary>();
else if (m_valueType == Type::Dictionary)
FreePtrAsType<NDArrayView>();
} }
Type m_valueType; Type m_valueType;
@ -1884,9 +2034,11 @@ namespace CNTK
return Contains(key.c_str()); return Contains(key.c_str());
} }
CNTK_API bool operator==(const Dictionary& other) const;
CNTK_API bool operator!=(const Dictionary& other) const;
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us); friend CNTK_API std::istream& operator>>(std::istream& stream, Dictionary& us);
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us); friend CNTK_API std::ostream& operator<<(std::ostream& stream, const Dictionary& us);
private: private:
std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData; std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData;
@ -1924,6 +2076,9 @@ namespace CNTK
/// ///
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {} CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {}
///
/// Destruct this Learner.
///
virtual ~Learner() {} virtual ~Learner() {}
protected: protected:
@ -1935,37 +2090,127 @@ namespace CNTK
}; };
///
/// A collection of key-value pairs that represents training parameter schedule in
/// terms of the number of processed samples.
/// This class provides a number of convenience constructors to allow easy conversion
/// from a single value, a vector of values and a list of pairs to the training schedule.
///
template <typename T>
class TrainingParameterSchedule
{
public:
///
/// Create a schedule with a constant parameter value.
///
TrainingParameterSchedule(T value)
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
{}
///
/// Create a schedule where the parameter changes its value every 'unit' samples:
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training.
///
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 1;
for (const auto& value : schedule)
{
m_schedule[m_unit * i++] = value;
}
}
///
/// Create a schedule using the list of key-value pairs, where the key specifies
/// the number of 'units' the parameter should maintain the corresponding value.
/// The value from the last pair is used repeatedly until the end of training.
/// For example, {{1, 0.05}, {2, 0.1}, {1, 0.005}} and unit = 100, corresponds to
/// a schedule where the value of '0.05' is used for the first 100 samples, then
/// '0.1' is used for the second 200 samples, after which the values is switched
/// to '0.005'.
///
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
///
/// Returns a value corresponding to the absolute sample count from the beginning of training.
///
CNTK_API const T& operator[](size_t samleCount) const;
private:
std::map<size_t, T> m_schedule;
size_t m_unit;
};
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
typedef TrainingParameterSchedule<double> MomentumsPerSample;
/// ///
/// Create an instance of the CNTK built-in SGD learner. /// Create an instance of the CNTK built-in SGD learner.
/// ///
/// TODO: add additional SGD parameters here (a collection of learning rate values) CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters,
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters, double learningRatePerSample); const LearningRatesPerSample& learningRates);
/// ///
/// Create an instance of the CNTK built-in Momentum SGD learner. /// Create an instance of the CNTK built-in Momentum SGD learner.
/// ///
/// TODO: add additional Momentum parameters here (a collection of momentum rate values) CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters,
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters); const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
/// ///
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner. /// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
/// ///
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters); CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
/// ///
/// Create an instance of the CNTK built-in AdaGrad learner. /// Create an instance of the CNTK built-in AdaGrad learner.
/// ///
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier = true); CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier = true);
/// ///
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner. /// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
/// ///
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters); CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
/// ///
/// Create an instance of the CNTK built-in RMSProp learner. /// Create an instance of the CNTK built-in RMSProp learner.
/// ///
CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters, CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma, double gamma,
double inc, double inc,
double dec, double dec,
@ -1975,7 +2220,7 @@ namespace CNTK
/// ///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model /// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
/// using the specified learners and training data either explicilty supplied as Value objects or from /// using the specified learners and training data either explicitly supplied as Value objects or from
/// a MinibatchSource object. /// a MinibatchSource object.
/// ///
class Trainer class Trainer
@ -2063,7 +2308,7 @@ namespace CNTK
}; };
/// ///
/// Abstraction for generating minbatches of samples for training/evaluation. /// Abstraction for generating minibatches of samples for training/evaluation.
/// ///
class MinibatchSource : public std::enable_shared_from_this<MinibatchSource> class MinibatchSource : public std::enable_shared_from_this<MinibatchSource>
{ {
@ -2079,10 +2324,14 @@ namespace CNTK
/// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual /// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual
/// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the /// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the
/// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream. /// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream.
/// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function. /// The return value indicates if the MinibatchSource will return any further data in subsequent calls of this function.
/// ///
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits, virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0; const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0;
///
/// Destruct this MinibatchSource.
///
virtual ~MinibatchSource() {}
// TODO: Methods to save and restore from checkpoints // TODO: Methods to save and restore from checkpoints

Просмотреть файл

@ -7,6 +7,12 @@
#pragma once #pragma once
#ifdef SWIG
#define final
#define explicit
#define static_assert(condition, message)
#endif
#ifdef _WIN32 #ifdef _WIN32
#ifdef CNTKV2LIBRARYDLL #ifdef CNTKV2LIBRARYDLL
#define CNTK_API __declspec(dllexport) #define CNTK_API __declspec(dllexport)
@ -47,8 +53,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElementType> template <typename ElementType>
class ComputationNode; class ComputationNode;
class File;
}}} }}}
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation // TODO: The following should be reconciled with the equivalent code in the CNTK implementation
@ -133,7 +137,7 @@ namespace CNTK
#define NOT_IMPLEMENTED \ #define NOT_IMPLEMENTED \
{ \ { \
fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \ fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \ CNTK::LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
} }
#endif #endif
} }
@ -144,6 +148,7 @@ namespace CNTK
class CompositeFunction; class CompositeFunction;
class Function; class Function;
class Variable; class Variable;
class Axis;
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure // Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
// that objects are deleted on the same side of the library DLL where they are allocated // that objects are deleted on the same side of the library DLL where they are allocated
@ -174,4 +179,15 @@ namespace CNTK
class MinibatchSource; class MinibatchSource;
typedef std::shared_ptr<MinibatchSource> MinibatchSourcePtr; typedef std::shared_ptr<MinibatchSource> MinibatchSourcePtr;
namespace Internal
{
CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L"");
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");
CNTK_API FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name = L"");
}
} }

Просмотреть файл

@ -15,6 +15,7 @@
#include "RecurrentNodes.h" #include "RecurrentNodes.h"
#include "EvaluationNodes.h" #include "EvaluationNodes.h"
#include "TrainingNodes.h" #include "TrainingNodes.h"
#include "ReshapingNodes.h"
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
@ -32,6 +33,7 @@ namespace CNTK
Variable var; Variable var;
NDShape varShape = AsNDShape(node->GetSampleLayout()); NDShape varShape = AsNDShape(node->GetSampleLayout());
// The CNTK sample layouts may have trailing axes with dimension size of 1 which are automatically // The CNTK sample layouts may have trailing axes with dimension size of 1 which are automatically
// added when converting from NDShape to CNTK internal TensorShapes and are not present in the original // added when converting from NDShape to CNTK internal TensorShapes and are not present in the original
// shapes specified by the user. These should be truncated. // shapes specified by the user. These should be truncated.
@ -57,11 +59,10 @@ namespace CNTK
if (node->HasMBLayout()) if (node->HasMBLayout())
{ {
// TODO: Currently only default dynamic axis is supported // TODO: Currently only default dynamic axis is supported
const std::wstring defaultCNTKDynamicAxisName = L""; auto inputNodeInternalDynamicAxisName = inputNode->GetRequestedDynamicAxis();
if (inputNode->GetRequestedDynamicAxis() != defaultCNTKDynamicAxisName) std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
LogicError("Found dynamic axis named '%S' while currently only default dynamic axis named '%S' is supported!", node->GetMBLayout()->GetAxisName(), defaultCNTKDynamicAxisName.c_str());
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName()); var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName(), inputVarDynamicAxes);
} }
else else
{ {
@ -121,6 +122,40 @@ namespace CNTK
opType = PrimitiveOpType::Reciprocal; opType = PrimitiveOpType::Reciprocal;
else if (node->OperationName() == OperationNameOf(SoftmaxNode)) else if (node->OperationName() == OperationNameOf(SoftmaxNode))
opType = PrimitiveOpType::Softmax; opType = PrimitiveOpType::Softmax;
else if (node->OperationName() == OperationNameOf(HardmaxNode))
opType = PrimitiveOpType::Hardmax;
else if (node->OperationName() == OperationNameOf(TransposeDimensionsNode))
{
auto transposeDimensionsNode = node->As<TransposeDimensionsNode<ElementType>>();
primitiveFunctionConfigParameters[L"axis1"] = (size_t)transposeDimensionsNode->Axis1();
primitiveFunctionConfigParameters[L"axis2"] = (size_t)transposeDimensionsNode->Axis2();
opType = PrimitiveOpType::TransposeAxes;
}
else if (node->OperationName() == OperationNameOf(WhereNode))
{
auto whereNode = node->As<WhereNode<ElementType>>();
auto internalDynamicAxisName = whereNode->DynamicAxisName();
std::vector<Axis> dynamicAxes = DynamicAxesFromInternalDynamicAxisName(internalDynamicAxisName);
std::vector<std::wstring> dynamicAxesNames;
for (auto axis : dynamicAxes)
dynamicAxesNames.push_back(axis.Name());
primitiveFunctionConfigParameters[L"newDynamicAxes"] = AsDictionaryValueVector(dynamicAxesNames);
opType = PrimitiveOpType::Where;
}
else if (node->OperationName() == OperationNameOf(SliceNode))
{
auto sliceNode = node->As<SliceNode<ElementType>>();
primitiveFunctionConfigParameters[L"axis"] = Axis(sliceNode->Axis() - 1).Name();
primitiveFunctionConfigParameters[L"beginIndex"] = sliceNode->BeginIndex();
primitiveFunctionConfigParameters[L"endIndex"] = sliceNode->EndIndex();
opType = PrimitiveOpType::Slice;
}
else if (node->OperationName() == OperationNameOf(SumElementsNode))
opType = PrimitiveOpType::SumAll;
else if (node->OperationName() == OperationNameOf(PlusNode)) else if (node->OperationName() == OperationNameOf(PlusNode))
opType = PrimitiveOpType::Plus; opType = PrimitiveOpType::Plus;
else if (node->OperationName() == OperationNameOf(MinusNode)) else if (node->OperationName() == OperationNameOf(MinusNode))
@ -139,11 +174,23 @@ namespace CNTK
opType = PrimitiveOpType::Greater; opType = PrimitiveOpType::Greater;
else if (node->OperationName() == OperationNameOf(GreaterEqualNode)) else if (node->OperationName() == OperationNameOf(GreaterEqualNode))
opType = PrimitiveOpType::GreaterEqual; opType = PrimitiveOpType::GreaterEqual;
else if (node->OperationName() == OperationNameOf(PackedIndexNode))
opType = PrimitiveOpType::PackedIndex;
else if (node->OperationName() == OperationNameOf(GatherPackedNode))
{
std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::GatherPacked;
}
else if (node->OperationName() == OperationNameOf(TimesNode)) else if (node->OperationName() == OperationNameOf(TimesNode))
{ {
primitiveFunctionConfigParameters[L"numOutputAxes"] = DictionaryValue((size_t)node->As<TimesNode<ElementType>>()->OutputRank()); primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TimesNode<ElementType>>()->OutputRank();
opType = PrimitiveOpType::Times; opType = PrimitiveOpType::Times;
} }
else if (node->OperationName() == OperationNameOf(TransposeTimesNode))
{
primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TransposeTimesNode<ElementType>>()->OutputRank();
opType = PrimitiveOpType::TransposeTimes;
}
else if (node->OperationName() == OperationNameOf(PastValueNode)) else if (node->OperationName() == OperationNameOf(PastValueNode))
{ {
if (inputVars.size() == 1) if (inputVars.size() == 1)
@ -151,7 +198,7 @@ namespace CNTK
auto initialStateVar = Constant({}, node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId())); auto initialStateVar = Constant({}, node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.insert(inputVars.begin(), initialStateVar); inputVars.insert(inputVars.begin(), initialStateVar);
} }
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<PastValueNode<ElementType>>()->TimeStep()); primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<PastValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::PastValue; opType = PrimitiveOpType::PastValue;
} }
else if (node->OperationName() == OperationNameOf(FutureValueNode)) else if (node->OperationName() == OperationNameOf(FutureValueNode))
@ -161,7 +208,7 @@ namespace CNTK
auto initialStateVar = Constant({}, node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId())); auto initialStateVar = Constant({}, node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.insert(inputVars.begin(), initialStateVar); inputVars.insert(inputVars.begin(), initialStateVar);
} }
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<FutureValueNode<ElementType>>()->TimeStep()); primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::FutureValue; opType = PrimitiveOpType::FutureValue;
} }
else if (node->OperationName() == OperationNameOf(SquareErrorNode)) else if (node->OperationName() == OperationNameOf(SquareErrorNode))
@ -176,8 +223,14 @@ namespace CNTK
std::swap(inputVars[0], inputVars[1]); std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::ClassificationError; opType = PrimitiveOpType::ClassificationError;
} }
else if (node->OperationName() == OperationNameOf(SumElementsNode)) else if (node->OperationName() == OperationNameOf(ReduceElementsNode))
opType = PrimitiveOpType::ReduceSum; {
auto reduceElementsNode = node->As<ReduceElementsNode<ElementType>>();
primitiveFunctionConfigParameters[L"CNTKInternalReductionAxisIndex"] = (size_t)reduceElementsNode->ReductionAxis();
primitiveFunctionConfigParameters[L"ReductionOpName"] = reduceElementsNode->ReductionOpName();
opType = PrimitiveOpType::ReduceElements;
}
else if (node->OperationName() == OperationNameOf(ConvolutionNode)) else if (node->OperationName() == OperationNameOf(ConvolutionNode))
{ {
auto convolutionNode = node->As<ConvolutionNode<ElementType>>(); auto convolutionNode = node->As<ConvolutionNode<ElementType>>();

Просмотреть файл

@ -14,21 +14,17 @@ namespace CNTK
return GPUDevice(0); return GPUDevice(0);
} }
/*static*/ const std::wstring Axis::s_staticAxisNamePrefix = L"staticAxis_";
/*static*/ const Axis& Axis::DefaultDynamicAxis() /*static*/ const Axis& Axis::DefaultDynamicAxis()
{ {
static Axis s_defaultDynamicAxis(L"defaultDynamicAxis"); static Axis s_defaultDynamicAxis(L"defaultDynamicAxis");
return s_defaultDynamicAxis; return s_defaultDynamicAxis;
} }
/*static*/ const Axis& Axis::BatchAxis() /*static*/ const Axis& Axis::DefaultBatchAxis()
{ {
static Axis s_batchAxis(L"batchAxis"); static Axis s_batchAxis(L"defaultBatchAxis");
return s_batchAxis; return s_batchAxis;
} }
/*static*/ const Axis& Axis::AllAxes()
{
static Axis s_allAxes(L"allAxes");
return s_allAxes;
}
} }

Просмотреть файл

@ -10,6 +10,10 @@
#include "Utils.h" #include "Utils.h"
#include "ComputationNode.h" #include "ComputationNode.h"
#include "ReshapingNodes.h" #include "ReshapingNodes.h"
#include "EvaluationNodes.h"
#include "TrainingNodes.h"
#include "LinearAlgebraNodes.h"
#include "InputAndParamNodes.h"
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
@ -72,6 +76,17 @@ namespace CNTK
} }
} }
/*static*/ const std::wstring PrimitiveFunction::InternalSumReductionOpName = L"Sum";
/*static*/ const std::wstring PrimitiveFunction::InternalLogSumReductionOpName = L"LogSum";
/*static*/ const std::wstring PrimitiveFunction::InternalMeanReductionOpName = L"Mean";
/*static*/ const std::wstring PrimitiveFunction::InternalMaxReductionOpName = L"Max";
/*static*/ const std::wstring PrimitiveFunction::InternalMinReductionOpName = L"Min";
/*static*/ const std::wstring PrimitiveFunction::InternalAllReductionOpName = L"All";
/*static*/ const std::wstring PrimitiveFunction::InternalAnyReductionOpName = L"Any";
/*static*/ std::wstring CompositeFunction::s_internalDefaultDynamicAxisName = L"";
/*static*/ std::wstring CompositeFunction::s_internalNoSequenceAxisName = L"noSequenceAxis";
// Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables // Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables
// should have been replaced before performing any Forward compute of 'this' Function. // should have been replaced before performing any Forward compute of 'this' Function.
/*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements, /*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
@ -122,22 +137,46 @@ namespace CNTK
computationNodePtr->SetLearningRateMultiplier(0.0); computationNodePtr->SetLearningRateMultiplier(0.0);
NDArrayViewPtr value = variable.IsConstant() ? Constant(variable).Value() : Parameter(variable).Value(); NDArrayViewPtr value = variable.IsConstant() ? Constant(variable).Value() : Parameter(variable).Value();
auto matrix = variable.IsConstant() ? value->GetMatrix<ElementType>()->AsReference() : value->GetWritableMatrix<ElementType>()->AsReference(); std::shared_ptr<const Matrix<ElementType>> valueMatrix = variable.IsConstant() ? value->GetMatrix<ElementType>() : value->GetWritableMatrix<ElementType>();
computationNodePtr->Value() = std::move(matrix); if (variable.IsParameter() || (valueMatrix->GetDeviceId() == network->GetDeviceId()))
computationNodePtr->Value() = valueMatrix->AsReference();
else
{
Matrix<ElementType> clonedMatrix(valueMatrix->GetNumRows(), valueMatrix->GetNumCols(), network->GetDeviceId(), valueMatrix->GetMatrixType(), valueMatrix->GetFormat());
clonedMatrix.AssignValuesOf(*valueMatrix);
computationNodePtr->Value() = std::move(clonedMatrix);
}
} }
else if (variable.IsInput()) else if (variable.IsInput())
{ {
// TODO: Support inputs with > 1 dynamic axes // TODO: Input variables currently are required to have the default batch axis
if (variable.DynamicAxes().size() != 1) auto dynamicAxes = variable.DynamicAxes();
LogicError("Currently only Input variables with one dynamic axis are supported"); auto foundDefaultBatchAxis = std::find(dynamicAxes.begin(), dynamicAxes.end(), Axis::DefaultBatchAxis());
if (foundDefaultBatchAxis == dynamicAxes.end())
LogicError("Currently Input Variables are required to have the DefaultBatchAxis as one of their dynamic axes");
auto dynamicAxis = variable.DynamicAxes()[0]; if (dynamicAxes.back() != Axis::DefaultBatchAxis())
if (dynamicAxis != Axis::DefaultDynamicAxis()) LogicError("Currently Input Variables are required to have the DefaultBatchAxis as their last dynamic axes");
LogicError("Currently only Input variables with DefaultDynamicAxis are supported");
if (IsSparseInput(variable)) // TODO: Support inputs with > 1 dynamic axes
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape())); if ((dynamicAxes.size() < 1) || (dynamicAxes.size() > 2))
LogicError("Currently only Input variables with 1 or 2 dynamic axis are supported");
std::wstring internalDynamicAxisName;
if (dynamicAxes.size() == 1)
internalDynamicAxisName = s_internalNoSequenceAxisName;
else if (dynamicAxes[0] == Axis::DefaultDynamicAxis())
internalDynamicAxisName = s_internalDefaultDynamicAxisName;
else else
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape())); internalDynamicAxisName = dynamicAxes[0].Name();
if (!internalDynamicAxisName.empty())
network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {});
if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
else
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
if (variable.NeedsGradient()) if (variable.NeedsGradient())
{ {
@ -219,11 +258,29 @@ namespace CNTK
computationNodePtr = builder.Reciprocal(input0Node, function->Name()); computationNodePtr = builder.Reciprocal(input0Node, function->Name());
break; break;
case PrimitiveOpType::Softmax: case PrimitiveOpType::Softmax:
if (functionInputs[0].Shape().NumAxes() > 1)
InvalidArgument("Softmax operation can only be applied to a 1D input");
computationNodePtr = builder.Softmax(input0Node, function->Name()); computationNodePtr = builder.Softmax(input0Node, function->Name());
break; break;
case PrimitiveOpType::Hardmax:
computationNodePtr = builder.Hardmax(input0Node, function->Name());
break;
case PrimitiveOpType::TransposeAxes:
{
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
// The axis ids passed to the internal CNTK TransposeDimensionsNode are 1 based instead of 0 based
computationNodePtr = New<TransposeDimensionsNode<ElementType>>(network->GetDeviceId(), function->Name(), (int)(axis1.StaticAxisIndex() + 1), (int)(axis2.StaticAxisIndex() + 1));
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
case PrimitiveOpType::Where:
{
auto dynamicAxes = variable.DynamicAxes();
auto internalCNTKWhereNodeDynamicAxisName = (dynamicAxes == std::vector<Axis>({ Axis::DefaultBatchAxis() })) ? CompositeFunction::s_internalNoSequenceAxisName : dynamicAxes[0].Name();
computationNodePtr = New<WhereNode<ElementType>>(network->GetDeviceId(), function->Name(), internalCNTKWhereNodeDynamicAxisName);
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
case PrimitiveOpType::Pooling: case PrimitiveOpType::Pooling:
{ {
PoolingType poolingType = (PoolingType)(functionConfig[L"poolingType"].GetValue<size_t>()); PoolingType poolingType = (PoolingType)(functionConfig[L"poolingType"].GetValue<size_t>());
@ -235,6 +292,9 @@ namespace CNTK
computationNodePtr = builder.Pooling(input0Node, AsCNTKPoolKind(poolingType), AsTensorShape(poolingWindowsShape, true), AsTensorShape(strides, true), autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), ImageLayoutKind::CHW, function->Name()); computationNodePtr = builder.Pooling(input0Node, AsCNTKPoolKind(poolingType), AsTensorShape(poolingWindowsShape, true), AsTensorShape(strides, true), autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), ImageLayoutKind::CHW, function->Name());
break; break;
} }
case PrimitiveOpType::SumAll:
computationNodePtr = builder.Sum(input0Node, function->Name());
break;
case PrimitiveOpType::Plus: case PrimitiveOpType::Plus:
computationNodePtr = builder.Plus(input0Node, input1Node, function->Name()); computationNodePtr = builder.Plus(input0Node, input1Node, function->Name());
break; break;
@ -268,6 +328,12 @@ namespace CNTK
computationNodePtr = builder.Times(input0Node, input1Node, numOutputAxes, function->Name()); computationNodePtr = builder.Times(input0Node, input1Node, numOutputAxes, function->Name());
break; break;
} }
case PrimitiveOpType::TransposeTimes:
{
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
computationNodePtr = network->AddNodeToNetAndAttachInputs(New<TransposeTimesNode<ElementType>>(network->GetDeviceId(), function->Name(), numOutputAxes), { input0Node, input1Node });
break;
}
case PrimitiveOpType::Convolution: case PrimitiveOpType::Convolution:
{ {
NDShape outputMapCount, kernelShape; NDShape outputMapCount, kernelShape;
@ -296,35 +362,25 @@ namespace CNTK
{ {
Variable initialStateVar = functionInputs[0]; Variable initialStateVar = functionInputs[0];
Variable inputOperandVar = functionInputs[1]; Variable inputOperandVar = functionInputs[1];
// TODO: Current we only support a scalar initial state
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
if (inputOperandVar.Shape().NumAxes() != 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 static axis");
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
if (inputOperandVar.DynamicAxes().size() != 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 dynamic axis");
// Get the intial state of the PastValue/FutureValue operation // Get the intial state of the PastValue/FutureValue operation
ElementType initStateValue; ElementType initStateValue;
NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice()); NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice());
tempView.CopyFrom(*Constant(initialStateVar).Value()); tempView.CopyFrom(*Constant(initialStateVar).Value());
size_t stepSize = primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>();
if (op == PrimitiveOpType::PastValue) if (op == PrimitiveOpType::PastValue)
computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name()); computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
else else
computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name()); computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
break; break;
} }
case PrimitiveOpType::ReduceSum: case PrimitiveOpType::ReduceElements:
{ {
// TODO: Use the new ReduceElements node instead of the legacy SumElements node for reduction. Currently ReduceElements has incorrect MBLayout inference. auto CNTKInternalReductionAxisIndex = (int)functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
//computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), L"Sum", 0), { input0Node }); auto reductionOpName = functionConfig[L"ReductionOpName"].GetValue<std::wstring>();
computationNodePtr = builder.Sum(input0Node, function->Name()); computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), reductionOpName, CNTKInternalReductionAxisIndex), { input0Node });
break; break;
} }
case PrimitiveOpType::BatchNormalization: case PrimitiveOpType::BatchNormalization:
@ -353,6 +409,25 @@ namespace CNTK
computationNodePtr = variableToNodeMap[variable]; computationNodePtr = variableToNodeMap[variable];
break; break;
case PrimitiveOpType::PackedIndex:
computationNodePtr = New<PackedIndexNode<ElementType>>(network->GetDeviceId(), function->Name());
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node, input1Node });
break;
case PrimitiveOpType::GatherPacked:
computationNodePtr = New<GatherPackedNode<ElementType>>(network->GetDeviceId(), function->Name());
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input1Node, input0Node });
break;
case PrimitiveOpType::Slice:
{
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
// Internal CNTK SliceNode takes 1 based axis indices instead of 0 based
computationNodePtr = New<SliceNode<ElementType>>(network->GetDeviceId(), function->Name(), beginIndex, endIndex, (int)(axis.StaticAxisIndex() + 1));
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
default: default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op)); LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
break; break;
@ -486,11 +561,11 @@ namespace CNTK
if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes()) if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes())
return{ value->Data()->GetMatrix<ElementType>(), nullptr }; return{ value->Data()->GetMatrix<ElementType>(), nullptr };
if (value->Data()->Shape().NumAxes() != (var.Shape().NumAxes() + var.DynamicAxes().size() + 1)) if (value->Data()->Shape().NumAxes() < (var.Shape().NumAxes() + var.DynamicAxes().size()))
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by 1 + number of dynamic axes"); InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by number of dynamic axes");
if (var.DynamicAxes().size() > 1) if (var.DynamicAxes().size() > 2)
LogicError("More than one dynamic axis for a variable is currently unsupported"); LogicError("More than 2 dynamic axis for a variable is currently unsupported");
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().NumAxes()]; size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().NumAxes()];
size_t numSequences = value->Data()->Shape()[var.Shape().NumAxes() + 1]; size_t numSequences = value->Data()->Shape()[var.Shape().NumAxes() + 1];
@ -618,9 +693,9 @@ namespace CNTK
sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps()); sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps());
} }
// Reshuffle to data to unpack and uninterleave the CNTK form data // Reshuffle to data to unpack and uninterleave the CNTK form packed data
// Now generate the gather indices // Now generate the scatter indices
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId()); auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat());
std::vector<size_t> sequencesShorterThanLongestSequence; std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i) for (size_t i = 0; i < numSequences; ++i)
@ -659,15 +734,15 @@ namespace CNTK
} }
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape)); auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, readOnly, tensorView); auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView);
return MakeSharedObject<Value>(data, mask); return MakeSharedObject<Value>(data, mask);
} }
template <typename ElementType> template <typename ElementType>
/*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/) /*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/)
{ {
if (var.DynamicAxes().size() > 1) if (var.DynamicAxes().size() > 2)
LogicError("More than one dynamic axis for a variable is currently unsupported"); LogicError("More than 2 dynamic axis for a variable is currently unsupported");
if (AsDataType<ElementType>() != var.GetDataType()) if (AsDataType<ElementType>() != var.GetDataType())
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(var.GetDataType())); LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(var.GetDataType()));
@ -732,7 +807,7 @@ namespace CNTK
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second; MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto nodeLayout = computationNode->GetMBLayout(); auto nodeLayout = computationNode->GetMBLayout();
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout))) if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call"); InvalidArgument("The layout of the specified gradient Value is incompatible with the layout of the corresponding Variable computed during Forward call");
computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first); computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
} }
@ -814,12 +889,9 @@ namespace CNTK
} }
if (varValue == nullptr) if (varValue == nullptr)
{ varValue = nodeValue->DeepClone();
auto data = MakeSharedObject<NDArrayView>(var.GetDataType(), valueShape, AsDeviceDescriptor(computationNode->ValuePtr()->GetDeviceId())); else
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr; varValue->CopyFrom(*nodeValue);
varValue = MakeSharedObject<Value>(data, mask);
}
varValue->CopyFrom(*nodeValue);
} }
void CompositeFunction::GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs) void CompositeFunction::GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs)
@ -984,7 +1056,7 @@ namespace CNTK
FunctionPtr Round(const Variable& operand, const std::wstring& name/* = L""*/) FunctionPtr Round(const Variable& operand, const std::wstring& name/* = L""*/)
{ {
return Floor(Plus(operand, Constant(NDShape({}), 0.5f)), name); return Floor(Plus(operand, ScalarConstant(operand.GetDataType(), 0.5f)), name);
} }
FunctionPtr Floor(const Variable& operand, const std::wstring& name/* = L""*/) FunctionPtr Floor(const Variable& operand, const std::wstring& name/* = L""*/)
@ -1012,6 +1084,71 @@ namespace CNTK
return UnaryOp(PrimitiveOpType::Softmax, operand, Dictionary(), name); return UnaryOp(PrimitiveOpType::Softmax, operand, Dictionary(), name);
} }
FunctionPtr Hardmax(const Variable& operand, const std::wstring& name/* = L""*/)
{
return UnaryOp(PrimitiveOpType::Hardmax, operand, Dictionary(), name);
}
FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name /*= L""*/)
{
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
LogicError("TransposeAxes currently does not support transposing dynamic axes");
auto additionalProperties = Dictionary();
additionalProperties[L"axis1"] = axis1.Name();
additionalProperties[L"axis2"] = axis2.Name();
return UnaryOp(PrimitiveOpType::TransposeAxes, operand, std::move(additionalProperties), name);
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
{
if ((endIndex - beginIndex) <= 0)
InvalidArgument("CNTK::Slice: endIndex (%d) - beginIndex (%d) must be a positive number", endIndex, beginIndex);
if (axis == Axis::DefaultBatchAxis())
LogicError("Slice is currently unsupported along the batch axis");
if (axis.IsStaticAxis())
return Internal::Slice(operand, axis, beginIndex, endIndex, name);
auto operandAxes = operand.DynamicAxes();
auto findAxis = std::find(operandAxes.begin(), operandAxes.end(), axis);
if (findAxis == operandAxes.end())
InvalidArgument("The specified dynamic axis named %S does not match any of the dynamic axes of the operand", axis.Name().c_str());
auto beginFlagsLambda = [beginIndex, operand]() {
return (beginIndex > 0) ? Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, beginIndex)) : Internal::IsWithin(operand, beginIndex);
};
auto endFlagsLambda = [endIndex, operand]() {
return (endIndex > 0) ? Internal::IsWithin(operand, endIndex) : Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, endIndex));
};
FunctionPtr flags;
if (beginIndex == 0)
flags = endFlagsLambda();
else if (endIndex == 0)
flags = beginFlagsLambda();
else
flags = ElementTimes(beginFlagsLambda(), endFlagsLambda());
// Since we are slicing along a dynamic axis, the output variable's dynamic axes will be different than the operand
std::vector<Axis> newDynamicAxes;
for (auto operandAxis : operandAxes)
{
if (operandAxis == axis)
{
// If we are selecting just one frame from the dynamic axis, we can remove that axis
if ((endIndex - beginIndex) > 1)
newDynamicAxes.push_back(CompositeFunction::NextAutoGeneratedDynamicAxis());
}
else
newDynamicAxes.push_back(operandAxis);
}
return Internal::Gather(operand, flags, newDynamicAxes);
}
FunctionPtr BinaryOp(PrimitiveOpType op, const Variable& leftOperand, const Variable& rightOperand, Dictionary&& opConfig, const std::wstring& name) FunctionPtr BinaryOp(PrimitiveOpType op, const Variable& leftOperand, const Variable& rightOperand, Dictionary&& opConfig, const std::wstring& name)
{ {
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(op, std::vector<Variable>({ leftOperand, rightOperand }), std::move(opConfig), name), name); return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(op, std::vector<Variable>({ leftOperand, rightOperand }), std::move(opConfig), name), name);
@ -1074,6 +1211,13 @@ namespace CNTK
return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, std::move(additionalProperties), name); return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, std::move(additionalProperties), name);
} }
FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes /*= 1*/, const std::wstring& name/* = L""*/)
{
auto additionalProperties = Dictionary();
additionalProperties[L"numOutputAxes"] = numOutputAxes;
return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
}
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/) FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/)
{ {
return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name); return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name);
@ -1081,18 +1225,20 @@ namespace CNTK
FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/) FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{ {
return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name); return ReduceSum(Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction)), name);
//return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name);
} }
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/) FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{ {
return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name); return ReduceSum(Minus(ScalarConstant(prediction.GetDataType(), 1.0f), TransposeTimes(labels, Hardmax(prediction))), name);
//return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name);
} }
FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/) FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
{ {
if (operand.DynamicAxes().size() != 1) if (operand.DynamicAxes().size() != 2)
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis"); InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize); additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
@ -1101,8 +1247,8 @@ namespace CNTK
FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/) FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
{ {
if (operand.DynamicAxes().size() != 1) if (operand.DynamicAxes().size() != 2)
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis"); InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
auto additionalProperties = Dictionary(); auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize); additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
@ -1111,7 +1257,17 @@ namespace CNTK
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/) FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/)
{ {
return UnaryOp(PrimitiveOpType::ReduceSum, operand, Dictionary(), name); return UnaryOp(PrimitiveOpType::SumAll, operand, Dictionary(), name);
}
FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
{
return Internal::ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, axis, name);
}
FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
{
return Internal::ReduceElements(operand, PrimitiveFunction::InternalLogSumReductionOpName, axis, name);
} }
FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name /*= L""*/) FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name /*= L""*/)
@ -1207,4 +1363,94 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name); return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
} }
namespace Internal
{
FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name /*= L""*/)
{
return BinaryOp(PrimitiveOpType::PackedIndex, operand, index, Dictionary(), name);
}
FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name /*= L""*/)
{
return BinaryOp(PrimitiveOpType::GatherPacked, operand, packedIndex, Dictionary(), name);
}
FunctionPtr ZeroesLike(const Variable& operand)
{
if (operand.Shape().NumAxes() > 1)
LogicError("ZerosLike currently does not support operands with more than 1 static axes");
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
return Minus(rowSliceFunc, rowSliceFunc);
}
FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/)
{
if (offset == 0)
InvalidArgument("Internal::CNTK::IsWithin: The offset must be positive");
if (offset > 0)
return PastValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), offset, name);
else
return FutureValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), -offset, name);
}
FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
auto additionalProperties = Dictionary();
std::vector<std::wstring> newDynamicAxesNames;
for (auto axis : newDynamicAxes)
newDynamicAxesNames.push_back(axis.Name());
additionalProperties[L"newDynamicAxes"] = AsDictionaryValueVector(newDynamicAxesNames);
return UnaryOp(PrimitiveOpType::Where, condition, std::move(additionalProperties), name);
}
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
return Internal::GatherPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)));
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
{
auto additionalProperties = Dictionary();
additionalProperties[L"axis"] = axis.Name();
additionalProperties[L"beginIndex"] = (size_t)beginIndex;
additionalProperties[L"endIndex"] = (size_t)endIndex;
return UnaryOp(PrimitiveOpType::Slice, operand, std::move(additionalProperties), name);
}
FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name /*= L""*/)
{
using namespace std::placeholders;
if (axis.IsStaticAxis())
{
auto additionalProperties = Dictionary();
additionalProperties[L"CNTKInternalReductionAxisIndex"] = (size_t)(axis.StaticAxisIndex() + 1);
additionalProperties[L"ReductionOpName"] = reductionOpName;
return UnaryOp(PrimitiveOpType::ReduceElements, operand, std::move(additionalProperties), name);
}
if (axis == Axis::DefaultBatchAxis())
LogicError("Reduction is currently unsupported along the batch axis");
if (reductionOpName != PrimitiveFunction::InternalSumReductionOpName)
LogicError("%S reduction along dynamic axis is currently unsupported", reductionOpName.c_str());
std::function<FunctionPtr(const Variable& leftOperand, const Variable& rightOperand)> reductionFunctor;
if (reductionOpName == PrimitiveFunction::InternalSumReductionOpName)
reductionFunctor = std::bind(Plus, _1, _2, L"");
// We are reducing over a dynamic axis which is currently implemented using recurrence
auto cumulativeSumFunctionPlaceholder = Placeholder(operand.Shape());
auto prevAccumulatedValuesFunction = PastValue(ScalarConstant(operand.GetDataType(), 0.0f), cumulativeSumFunctionPlaceholder, 1);
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0);
}
}
} }

Просмотреть файл

@ -27,7 +27,12 @@ namespace CNTK
Abs, Abs,
Reciprocal, Reciprocal,
Softmax, Softmax,
Hardmax,
TransposeAxes,
Where,
Slice,
Pooling, Pooling,
SumAll,
Plus, Plus,
Minus, Minus,
ElementTimes, ElementTimes,
@ -37,14 +42,17 @@ namespace CNTK
LessEqual, LessEqual,
Greater, Greater,
GreaterEqual, GreaterEqual,
PackedIndex,
GatherPacked,
Times, Times,
TransposeTimes,
Convolution, Convolution,
SquaredError, SquaredError,
CrossEntropyWithSoftmax, CrossEntropyWithSoftmax,
ClassificationError, ClassificationError,
PastValue, PastValue,
FutureValue, FutureValue,
ReduceSum, ReduceElements,
BatchNormalization, BatchNormalization,
Combine, Combine,
}; };
@ -77,7 +85,12 @@ namespace CNTK
{ PrimitiveOpType::Abs, "Abs" }, { PrimitiveOpType::Abs, "Abs" },
{ PrimitiveOpType::Reciprocal, "Reciprocal" }, { PrimitiveOpType::Reciprocal, "Reciprocal" },
{ PrimitiveOpType::Softmax, "Softmax" }, { PrimitiveOpType::Softmax, "Softmax" },
{ PrimitiveOpType::Hardmax, "Hardmax" },
{ PrimitiveOpType::TransposeAxes, "TransposeAxes" },
{ PrimitiveOpType::Where, "Where" },
{ PrimitiveOpType::Slice, "Slice" },
{ PrimitiveOpType::Pooling, "Pooling" }, { PrimitiveOpType::Pooling, "Pooling" },
{ PrimitiveOpType::SumAll, "SumAll" },
{ PrimitiveOpType::Plus, "Plus" }, { PrimitiveOpType::Plus, "Plus" },
{ PrimitiveOpType::Minus, "Minus" }, { PrimitiveOpType::Minus, "Minus" },
{ PrimitiveOpType::ElementTimes, "ElementTimes" }, { PrimitiveOpType::ElementTimes, "ElementTimes" },
@ -87,14 +100,17 @@ namespace CNTK
{ PrimitiveOpType::LessEqual, "LessEqual" }, { PrimitiveOpType::LessEqual, "LessEqual" },
{ PrimitiveOpType::Greater, "Greater" }, { PrimitiveOpType::Greater, "Greater" },
{ PrimitiveOpType::GreaterEqual, "GreaterEqual" }, { PrimitiveOpType::GreaterEqual, "GreaterEqual" },
{ PrimitiveOpType::PackedIndex, "PackedIndex" },
{ PrimitiveOpType::GatherPacked, "GatherPacked" },
{ PrimitiveOpType::Times, "Times" }, { PrimitiveOpType::Times, "Times" },
{ PrimitiveOpType::TransposeTimes, "TransposeTimes" },
{ PrimitiveOpType::Convolution, "Convolution" }, { PrimitiveOpType::Convolution, "Convolution" },
{ PrimitiveOpType::SquaredError, "SquaredError" }, { PrimitiveOpType::SquaredError, "SquaredError" },
{ PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" }, { PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" },
{ PrimitiveOpType::ClassificationError, "ClassificationError" }, { PrimitiveOpType::ClassificationError, "ClassificationError" },
{ PrimitiveOpType::PastValue, "PastValue" }, { PrimitiveOpType::PastValue, "PastValue" },
{ PrimitiveOpType::FutureValue, "FutureValue" }, { PrimitiveOpType::FutureValue, "FutureValue" },
{ PrimitiveOpType::ReduceSum, "ReduceSum" }, { PrimitiveOpType::ReduceElements, "ReduceElements" },
{ PrimitiveOpType::BatchNormalization, "BatchNormalization" }, { PrimitiveOpType::BatchNormalization, "BatchNormalization" },
{ PrimitiveOpType::Combine, "Combine" } { PrimitiveOpType::Combine, "Combine" }
}; };
@ -107,6 +123,15 @@ namespace CNTK
class PrimitiveFunction final : public Function class PrimitiveFunction final : public Function
{ {
public:
static const std::wstring InternalSumReductionOpName;
static const std::wstring InternalLogSumReductionOpName;
static const std::wstring InternalMeanReductionOpName;
static const std::wstring InternalMaxReductionOpName;
static const std::wstring InternalMinReductionOpName;
static const std::wstring InternalAllReductionOpName;
static const std::wstring InternalAnyReductionOpName;
public: public:
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"") PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"")
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig)) : Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig))
@ -242,16 +267,26 @@ namespace CNTK
DataType outputDataType = inputs[0].GetDataType(); DataType outputDataType = inputs[0].GetDataType();
// We currently require that the inputs' dynamic axes if any match // We currently require that the inputs' dynamic axes if any match
std::vector<Axis> outputDynamicAxes = inputs[0].DynamicAxes(); std::vector<Axis> outputDynamicAxes;
for (auto inputVar : inputs) if (op == PrimitiveOpType::Where)
;
else if ((op == PrimitiveOpType::PackedIndex) || (op == PrimitiveOpType::GatherPacked))
{ {
auto currentInputDynamicAxes = inputVar.DynamicAxes(); outputDynamicAxes = inputs[1].DynamicAxes();
if (outputDynamicAxes.empty()) }
outputDynamicAxes = currentInputDynamicAxes; else
else {
outputDynamicAxes = inputs[0].DynamicAxes();
for (auto inputVar : inputs)
{ {
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes)) auto currentInputDynamicAxes = inputVar.DynamicAxes();
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand"); if (outputDynamicAxes.empty())
outputDynamicAxes = currentInputDynamicAxes;
else
{
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes))
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand");
}
} }
} }
@ -268,9 +303,38 @@ namespace CNTK
case PrimitiveOpType::Abs: case PrimitiveOpType::Abs:
case PrimitiveOpType::Reciprocal: case PrimitiveOpType::Reciprocal:
case PrimitiveOpType::Softmax: case PrimitiveOpType::Softmax:
case PrimitiveOpType::Hardmax:
assert(inputs.size() == 1); assert(inputs.size() == 1);
if (((op == PrimitiveOpType::Softmax) || (op == PrimitiveOpType::Hardmax)) && (inputs[0].Shape().NumAxes() > 1))
InvalidArgument("Softmax/Hardmax operation can only be applied to a 1D input");
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes)); outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes));
break; break;
case PrimitiveOpType::TransposeAxes:
{
assert(inputs.size() == 1);
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
LogicError("TransposeAxes operation currently does not support transposing dynamic axes");
auto transposedTensorShape = AsTensorShape(inputs[0].Shape(), true);
transposedTensorShape.SwapDimsInPlace(axis1.StaticAxisIndex(), axis2.StaticAxisIndex());
outputs.push_back(Variable(AsNDShape(transposedTensorShape), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Where:
{
assert(inputs.size() == 1);
std::vector<Axis> newDynamicAxes;
auto newDynamicAxesNames = AsBasicElementTypeVector<std::wstring>(functionConfig[L"newDynamicAxes"].GetValue<std::vector<DictionaryValue>>());
for (auto axisName : newDynamicAxesNames)
newDynamicAxes.push_back(Axis(axisName));
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, newDynamicAxes));
break;
}
case PrimitiveOpType::Pooling: case PrimitiveOpType::Pooling:
{ {
assert(inputs.size() == 1); assert(inputs.size() == 1);
@ -282,6 +346,10 @@ namespace CNTK
outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[0].Shape(), poolingWindowsShape, { 1 }, strides, { true }, autoPadding, lowerPad, upperPad, false), outputDataType, owner, outputDynamicAxes)); outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[0].Shape(), poolingWindowsShape, { 1 }, strides, { true }, autoPadding, lowerPad, upperPad, false), outputDataType, owner, outputDynamicAxes));
break; break;
} }
case PrimitiveOpType::SumAll:
assert(inputs.size() == 1);
outputs.push_back(Variable({}, outputDataType, owner, std::vector<Axis>({})));
break;
case PrimitiveOpType::Plus: case PrimitiveOpType::Plus:
case PrimitiveOpType::Minus: case PrimitiveOpType::Minus:
case PrimitiveOpType::ElementTimes: case PrimitiveOpType::ElementTimes:
@ -297,15 +365,26 @@ namespace CNTK
case PrimitiveOpType::Times: case PrimitiveOpType::Times:
{ {
assert(inputs.size() == 2); assert(inputs.size() == 2);
// TODO: Support dynamic axes on the left operand
if (!inputs[0].DynamicAxes().empty())
LogicError("Dynamic axes are currently unsupported for left operand of a Times operation");
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>(); size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes)); outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
break; break;
} }
case PrimitiveOpType::TransposeTimes:
{
assert(inputs.size() == 2);
auto numLeftOperandAxes = inputs[0].Shape().NumAxes();
if (numLeftOperandAxes > 2)
InvalidArgument("TransposeTimes operation only supports left operands of rank 1 or 2");
NDShape transposedLeftOperandShape(2, 1);
for (size_t i = 0; i < numLeftOperandAxes; ++i)
transposedLeftOperandShape[transposedLeftOperandShape.NumAxes() - i - 1] = inputs[0].Shape()[i];
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
outputs.push_back(Variable(TimesOpOutputShape(transposedLeftOperandShape, inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Convolution: case PrimitiveOpType::Convolution:
{ {
assert(inputs.size() == 2); assert(inputs.size() == 2);
@ -341,26 +420,45 @@ namespace CNTK
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i) for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
reductionAxes.push_back(i); reductionAxes.push_back(i);
outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, {})); outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, std::vector<Axis>({})));
break; break;
} }
case PrimitiveOpType::PastValue: case PrimitiveOpType::PastValue:
case PrimitiveOpType::FutureValue: case PrimitiveOpType::FutureValue:
{
assert(inputs.size() == 2); assert(inputs.size() == 2);
Variable initialStateVar = inputs[0];
Variable inputOperandVar = inputs[1];
// TODO: Current we only support a scalar initial state
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
if (inputOperandVar.Shape().NumAxes() > 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with <= 1 static axis");
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
if (inputOperandVar.DynamicAxes().size() != 2)
LogicError("Currently PastValue/FutureValue Function only supports input operand with with 2 dynamic axis (1 sequence-axis and 1 batch-axis)");
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes)); outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
break; break;
case PrimitiveOpType::ReduceSum: }
case PrimitiveOpType::ReduceElements:
{ {
assert(inputs.size() == 1); assert(inputs.size() == 1);
auto CNTKInternalReductionAxisIndex = functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
// TODO: For reductions, we should remove any of the dynamic axes from 'outputDynamicAxes' that are being reduced over.
// Currently we only support reductions that reduce over all axes
std::vector<Axis> reductionOutputDynamicAxes = {};
std::vector<size_t> reductionAxes; std::vector<size_t> reductionAxes;
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i) // TODO: Do not use a integer literal for the special value of axis id that indicates all static axes
reductionAxes.push_back(i); if (CNTKInternalReductionAxisIndex == 0)
{
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
reductionAxes.push_back(i);
}
else
reductionAxes.push_back(CNTKInternalReductionAxisIndex - 1);
outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, reductionOutputDynamicAxes)); outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, inputs[0].DynamicAxes()));
break; break;
} }
case PrimitiveOpType::BatchNormalization: case PrimitiveOpType::BatchNormalization:
@ -369,6 +467,60 @@ namespace CNTK
case PrimitiveOpType::Combine: case PrimitiveOpType::Combine:
outputs = inputs; outputs = inputs;
break; break;
case PrimitiveOpType::PackedIndex:
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
break;
case PrimitiveOpType::GatherPacked:
{
bool sourceHasDynamicAxis = !inputs[0].DynamicAxes().empty();
NDShape outputShape;
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...
if (sourceHasDynamicAxis)
outputShape = inputs[0].Shape();
else
{
if (inputs[0].Shape().NumAxes() > 1)
outputShape = outputShape.SubShape(0, outputShape.NumAxes() - 1);
else
outputShape = {};
}
outputs.push_back(Variable(outputShape, outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Slice:
{
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
if (!axis.IsStaticAxis())
LogicError("Built-in Slice operation currently does not support slicing along dynamic axis");
if (axis.StaticAxisIndex() >= inputs[0].Shape().NumAxes())
InvalidArgument("The specified axis index (%d) for the Slice operation is outside the bounds of the available axes of the input", (int)axis.StaticAxisIndex());
size_t sliceAxisDim = inputs[0].Shape()[axis.StaticAxisIndex()];
int realBeginIndex = (beginIndex >= 0) ? beginIndex : beginIndex + sliceAxisDim;
int realEndIndex = (endIndex > 0) ? endIndex : endIndex + sliceAxisDim;
if ((sliceAxisDim < realEndIndex) || (realEndIndex < realBeginIndex) || (realBeginIndex < 0))
RuntimeError("Slice operation: Index range [%d,%d), interpreted as [%d,%d), is invalid for input ([%S]).",
beginIndex,
endIndex,
realBeginIndex,
realEndIndex,
inputs[0].Shape().AsString().c_str());
auto outputTensorShape = AsTensorShape(inputs[0].Shape(), true);
// propagate as much as we can
if ((axis.StaticAxisIndex() < outputTensorShape.GetRank()) && (0 <= realBeginIndex) && (realBeginIndex <= realEndIndex) && (realEndIndex <= sliceAxisDim))
outputTensorShape.NarrowTo(axis.StaticAxisIndex(), realBeginIndex, realEndIndex);
outputs.push_back(Variable(AsNDShape(outputTensorShape), outputDataType, owner, outputDynamicAxes));
break;
}
default: default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op)); LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
break; break;
@ -417,6 +569,17 @@ namespace CNTK
std::unordered_map<StreamInfo, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndInvStdDevs, std::unordered_map<StreamInfo, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndInvStdDevs,
const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/); const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/);
public:
static std::wstring s_internalDefaultDynamicAxisName;
static std::wstring s_internalNoSequenceAxisName;
static Axis NextAutoGeneratedDynamicAxis()
{
static std::atomic<unsigned int> nextAutoGeneratedDynamicAxis(0);
static const std::wstring autoGeneratedDynamicAxisNamePrefix = L"autoGeneratedDynamicAxis_";
return Axis(autoGeneratedDynamicAxisNamePrefix + std::to_wstring(nextAutoGeneratedDynamicAxis++));
}
public: public:
static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"") static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
{ {
@ -524,4 +687,17 @@ namespace CNTK
// the next 'Backward' call. // the next 'Backward' call.
std::unordered_set<Variable> m_currentBackpropRoots; std::unordered_set<Variable> m_currentBackpropRoots;
}; };
inline std::vector<CNTK::Axis> DynamicAxesFromInternalDynamicAxisName(const std::wstring& internalDynamicAxisName)
{
std::vector<CNTK::Axis> inputVarDynamicAxes;
if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalDefaultDynamicAxisName)
inputVarDynamicAxes = { CNTK::Axis::DefaultDynamicAxis(), CNTK::Axis::DefaultBatchAxis() };
else if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalNoSequenceAxisName)
inputVarDynamicAxes = { CNTK::Axis::DefaultBatchAxis() };
else
inputVarDynamicAxes = { CNTK::Axis(internalDynamicAxisName), CNTK::Axis::DefaultBatchAxis() };
return inputVarDynamicAxes;
}
} }

Просмотреть файл

@ -8,19 +8,18 @@
#include "Utils.h" #include "Utils.h"
#define UPDATE_FUNCTION \ #define UPDATE_FUNCTION \
switch (smoothedGradientValue->GetDataType()) \ switch (smoothedGradientValue->GetDataType()) \
{ \ { \
case DataType::Float: \ case DataType::Float: \
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \ Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \ break; \
case DataType::Double: \ case DataType::Double: \
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \ Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \ break; \
default: \ default: \
NOT_IMPLEMENTED; \ NOT_IMPLEMENTED; \
} }
using namespace Microsoft::MSR::CNTK; using namespace Microsoft::MSR::CNTK;
using namespace std; using namespace std;
@ -141,7 +140,7 @@ namespace CNTK
// L1 regularizer with proximal gradient descent method // L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0) if (m_additionalOptions.l1RegularizationWeight > 0)
{ {
auto learningRate = ElementType(ParameterDependentLearningRate(parameter)); auto learningRate = ElementType(m_learningRates[m_sampleCount]);
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample // multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize); auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight); parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
@ -154,48 +153,49 @@ namespace CNTK
return arrayView->GetWritableTensorView<ElementType>(); return arrayView->GetWritableTensorView<ElementType>();
} }
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters) LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients /* = true */)
: Learner(parameters), : Learner(parameters),
m_learningRatePerSample(0.0), m_learningRates(learningRates),
m_sampleCount(0) m_sampleCount(0),
m_minibatchCount(0)
{ {
const unordered_set<Parameter>& parameterSet = parameters; for (const auto& parameter : parameters)
for (const auto& parameter : parameterSet)
{ {
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct? if (!allocateSmoothGradients)
// Should the device be specified on the per-parameter basis?
NDArrayViewPtr view;
if (parameter.GetDataType() == DataType::Float)
{ {
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), parameter.Value()->Device()); continue;
} }
else
{ NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), parameter.Value()->Device());
}
m_smoothedGradientValues.insert(make_pair(parameter, view)); m_smoothedGradientValues.insert(make_pair(parameter, view));
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
} }
} }
void LearnerBase::ResetSmoothedGradients() /*static*/ NDArrayViewPtr LearnerBase::AllocateNDArrayView(const Parameter& parameter, const NDShape& shape)
{ {
for (const auto& parameter : Parameters()) if (parameter.GetDataType() == DataType::Float)
{ {
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter); return MakeSharedObject<NDArrayView>(float(0.0), shape, parameter.Value()->Device());
const auto& data = smoothedGradientValue; }
switch (data->GetDataType()) else
{ {
case DataType::Float: return MakeSharedObject<NDArrayView>(0.0, shape, parameter.Value()->Device());
data->SetValue(0.0f); }
break; }
case DataType::Double:
data->SetValue(0.0); /*static*/ NDShape LearnerBase::GetMatrixShape(const Parameter& parameter)
break; {
default: if (parameter.GetDataType() == DataType::Float)
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType())); {
} auto matrix = GetMatrix<float>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
}
else
{
auto matrix = GetMatrix<double>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
} }
} }
@ -219,17 +219,19 @@ namespace CNTK
#endif #endif
#if DUMPOUTPUT #if DUMPOUTPUT
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n", LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount); learningRate, momentum, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n", LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
LearnerType().c_str(), m_GaussianNoiseInjectStd); LearnerType().c_str(), m_additionalOptions.gaussianNoiseInjectionStdDev);
Print(gradientValue, "Gradient Update"); Print(gradientValue, "Gradient Update");
Print(smoothedGradientValue, "Smoothed Gradient Input"); Print(smoothedGradientValue, "Smoothed Gradient Input");
#endif #endif
UPDATE_FUNCTION; UPDATE_FUNCTION;
#if DUMPOUTPUT #if DUMPOUTPUT
Print(parameterValue, "Parameter Update"); Print(parameter.Value(), "Parameter Update");
#endif #endif
#ifdef _DEBUG #ifdef _DEBUG
@ -239,6 +241,7 @@ namespace CNTK
#endif #endif
} }
m_sampleCount += trainingSampleCount; m_sampleCount += trainingSampleCount;
m_minibatchCount++;
return false; return false;
} }
@ -265,9 +268,16 @@ namespace CNTK
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/ /*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
{ {
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
Dictionary checkpoint; Dictionary checkpoint;
checkpoint[L"checkpointVersion"] = checkpointVersion;
checkpoint[L"sampleCount"] = m_sampleCount;
checkpoint[L"minibatchCount"] = m_minibatchCount;
// TODO: should we also save learning rate schedule into the checkpoint?
// If that is the case, need to be able to override this method in subclasses
// and save momentum schedule as well.
for (const auto& parameter : Parameters()) for (const auto& parameter : Parameters())
{ {
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects // TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
@ -277,31 +287,48 @@ namespace CNTK
{ {
LogicError("Parameter names must be unique"); LogicError("Parameter names must be unique");
} }
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
// Potentially, could store things like dimensions, element size, format, etc., but const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
// that seems to be redundant, since all of that is passed in the constructor. checkpoint[parameter.Name()] = *smoothedGradientValue;
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue);
} }
return checkpoint; return checkpoint;
} }
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/ /*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{ {
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this. m_sampleCount = checkpoint[L"sampleCount"].GetValue<size_t>();
m_minibatchCount = checkpoint[L"minibatchCount"].GetValue<size_t>();
size_t version = checkpoint[L"minibatchCount"].GetValue<size_t>();
if (checkpointVersion != version)
{
// At the moment, we only support one version, so this should never happen.
LogicError("Unsupported checkpoint version.");
}
for (const auto& parameter : Parameters()) for (const auto& parameter : Parameters())
{ {
if (!checkpoint.Contains(parameter.Name())) if (!checkpoint.Contains(parameter.Name()))
{ {
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str()); LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
} }
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter); const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const NDArrayView& checkpointedValue = checkpoint[parameter.Name()].GetValue<NDArrayView>();
if (smoothedGradientValue->GetDataType() != checkpointedValue.GetDataType())
{
LogicError("A value restored from a checkpoint for the smoothed gradient data type for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const DictionaryValue& state = checkpoint[parameter.Name()]; if (smoothedGradientValue->Shape() != checkpointedValue.Shape())
{
LogicError("A value restored from a checkpoint for the smoothed gradient shape for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const auto& data = smoothedGradientValue; smoothedGradientValue->CopyFrom(checkpointedValue);
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
} }
} }
@ -313,23 +340,25 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{ {
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value(); const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue); const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter)); auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters // TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG). // (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix, smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration); learningRate, momentum, m_useNesterovAcceleration);
} }
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters, bool needAveMultiplier) LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters,
: LearnerBase(parameters), m_needAveMultiplier(needAveMultiplier) const LearningRatesPerSample& learningRates,
bool needAveMultiplier)
: LearnerBase(parameters, learningRates),
m_needAveMultiplier(needAveMultiplier)
{ {
} }
@ -348,15 +377,23 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter)); auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier); auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix); Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
} }
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters) LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters,
: LearnerMomentumSGD(parameters) const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false)
{ {
for (const auto& parameter : parameters)
{
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
} }
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/ /*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -373,21 +410,33 @@ namespace CNTK
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue); const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount); auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
auto learningRate = ElementType(ParameterDependentLearningRate(parameter)); smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix, learningRate, momentum);
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample));
} }
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier) double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
: LearnerBase(parameters), : LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min), m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier) m_needAveMultiplier(needAveMultiplier)
{ {
for (const auto& parameter : parameters)
{
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
// TODO: verify that this is correct.
size_t factor = 3;
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
{
factor = 4;
}
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], factor * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
} }
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/ /*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -405,12 +454,12 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue); const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue); const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter)); auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix, auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc), ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec), ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier); ElementType(m_min), m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix); Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
} }
@ -418,34 +467,35 @@ namespace CNTK
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView); template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView);
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView); template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView);
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, double learningRatePerSample) LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates)
{ {
return MakeSharedObject<LearnerSGD>(parameters, learningRatePerSample); return MakeSharedObject<LearnerSGD>(parameters, learningRates);
} }
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters) LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{ {
return MakeSharedObject<LearnerMomentumSGD>(parameters); return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums);
} }
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters) LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{ {
return MakeSharedObject<LearnerNesterov>(parameters); return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums);
} }
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, bool needAveMultiplier) LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, bool needAveMultiplier)
{ {
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier); return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier);
} }
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters) LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{ {
return MakeSharedObject<LearnerFSAdaGrad>(parameters); return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums);
} }
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier) double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier)
{ {
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier); return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier);
} }
} }

Просмотреть файл

@ -9,6 +9,7 @@
namespace CNTK namespace CNTK
{ {
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
// A collection of additional options that are applicable for all standard learners // A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner). // (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions struct AdditionalLearningOptions
@ -18,7 +19,6 @@ namespace CNTK
double gaussianNoiseInjectionStdDev = 0.0; double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = true; bool gradientClippingWithTruncation = true;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity(); double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
std::unordered_map<Parameter, double> learningRateMultipliers;
}; };
// An abstract base class at the root of the standard learners hierarchy // An abstract base class at the root of the standard learners hierarchy
@ -33,32 +33,16 @@ namespace CNTK
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final; virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
{
m_additionalOptions = additionalOptions;
}
// TODO: should this be called ResetMomentum?
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
void ResetSmoothedGradients();
// TODO: move learning rate and momentum scheduling and adjustment functionality
// inside the learner and drop these setters.
void SetLearningRate(double value) { m_learningRatePerSample = value; }
protected: protected:
LearnerBase(const std::unordered_set<Parameter>& parameters); LearnerBase(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true);
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0; virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
double ParameterDependentLearningRate(const Parameter& parameter) const
{
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
}
std::string LearnerType() const; std::string LearnerType() const;
double m_learningRatePerSample; LearningRatesPerSample m_learningRates;
AdditionalLearningOptions m_additionalOptions; AdditionalLearningOptions m_additionalOptions;
@ -91,6 +75,16 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const; void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const;
// Returns an NDArrayView with the required shape, with the same data type as parameter value
// and allocated on the same device.
static NDArrayViewPtr AllocateNDArrayView(const Parameter& parameter, const NDShape& shape);
// Retrieves the shape of the matrix corresponding to the parameter value.
static NDShape GetMatrixShape(const Parameter& parameter);
size_t m_sampleCount;
size_t m_minibatchCount;
private: private:
// Templatized update function, it invokes preprocess and postprocess using the provided // Templatized update function, it invokes preprocess and postprocess using the provided
// template parameter and also invokes virtual Update method implemented in one of the subclasses. // template parameter and also invokes virtual Update method implemented in one of the subclasses.
@ -101,18 +95,20 @@ namespace CNTK
static bool HasNan(const NDArrayViewPtr& value, const char* name); static bool HasNan(const NDArrayViewPtr& value, const char* name);
static void Print(const NDArrayViewPtr& value, const char* msg); static void Print(const NDArrayViewPtr& value, const char* msg);
size_t m_sampleCount; static const size_t checkpointVersion = 1;
}; };
// Vanilla gradient descent optimization algorithm. // Vanilla gradient descent optimization algorithm.
class LearnerSGD : public LearnerBase class LearnerSGD : public LearnerBase
{ {
public: public:
LearnerSGD(const std::unordered_set<Parameter>& parameters, double learningRatePerSample = 0) LearnerSGD(const std::unordered_set<Parameter>& parameters,
: LearnerBase(parameters), m_momentumPerSample(0.0), m_useNesterovAcceleration(false) const LearningRatesPerSample& learningRates,
{ bool allocateSmoothGradients = true)
SetLearningRate(learningRatePerSample); : LearnerBase(parameters, learningRates, allocateSmoothGradients),
} m_momentums(0.0),
m_useNesterovAcceleration(false)
{ }
protected: protected:
@ -121,7 +117,8 @@ namespace CNTK
template <typename ElementType> template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const; void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
double m_momentumPerSample; // TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumsPerSample m_momentums;
bool m_useNesterovAcceleration; bool m_useNesterovAcceleration;
}; };
@ -129,20 +126,25 @@ namespace CNTK
class LearnerMomentumSGD : public LearnerSGD class LearnerMomentumSGD : public LearnerSGD
{ {
public: public:
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters) LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters,
: LearnerSGD(parameters) const LearningRatesPerSample& learningRates,
{} const MomentumsPerSample& momentums,
bool allocateSmoothGradients = true)
void SetMomentum(double value) { m_momentumPerSample = value; } : LearnerSGD(parameters, learningRates, allocateSmoothGradients)
{
m_momentums = momentums;
}
}; };
// Nesterov's accelerated SGDLearnerBase descent. // Nesterov's accelerated SGDLearnerBase descent.
class LearnerNesterov : public LearnerSGD class LearnerNesterov : public LearnerMomentumSGD
{ {
public: public:
LearnerNesterov(const std::unordered_set<Parameter>& parameters) LearnerNesterov(const std::unordered_set<Parameter>& parameters,
: LearnerSGD(parameters) const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums)
{ {
m_useNesterovAcceleration = true; m_useNesterovAcceleration = true;
} }
@ -152,7 +154,9 @@ namespace CNTK
{ {
public: public:
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier); LearnerAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier);
protected: protected:
bool m_needAveMultiplier; bool m_needAveMultiplier;
@ -167,7 +171,9 @@ namespace CNTK
{ {
public: public:
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters); LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
protected: protected:
@ -182,7 +188,9 @@ namespace CNTK
public: public:
LearnerRMSProp(const std::unordered_set<Parameter>& parameters, LearnerRMSProp(const std::unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier); const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier);
protected: protected:

Просмотреть файл

@ -49,10 +49,12 @@ namespace CNTK
m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) }); m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) });
} }
/*virtual*/ std::unordered_map<StreamInfo, MinibatchData> CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits, /*virtual*/ const std::unordered_map<StreamInfo, MinibatchData>&
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/ CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/
{ {
std::unordered_map<StreamInfo, MinibatchData> minibatchData; m_minibatchData.clear();
if (!m_epochEndReached) if (!m_epochEndReached)
{ {
// TODO: Support different minibatch sizes for different streams // TODO: Support different minibatch sizes for different streams
@ -117,7 +119,9 @@ namespace CNTK
auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i]; auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i];
if (currentStreamDesc->m_elementType == ElementType::tfloat) if (currentStreamDesc->m_elementType == ElementType::tfloat)
{ {
auto dataMatrix = std::make_shared<Matrix<float>>(CPUDEVICE); auto CNTKMatrixType = (currentStreamDesc->m_storageType == StorageType::dense) ? DENSE : SPARSE;
auto CNTKMatrixFormat = (currentStreamDesc->m_storageType == StorageType::dense) ? matrixFormatDense : matrixFormatSparseCSC;
auto dataMatrix = std::make_shared<Matrix<float>>(0, 0, CPUDEVICE, CNTKMatrixType, CNTKMatrixFormat);
size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements(); size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements();
// TODO: Eliminate the unnecessary CPU to CPU copy // TODO: Eliminate the unnecessary CPU to CPU copy
@ -127,14 +131,14 @@ namespace CNTK
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples(); size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences(); size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();
minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr }; m_minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
} }
else else
LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!"); LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!");
} }
} }
return minibatchData; return m_minibatchData;
} }
void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource, void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource,

Просмотреть файл

@ -19,8 +19,8 @@ namespace CNTK
virtual const std::unordered_set<StreamInfo>& StreamInfos() override { return m_streamInfos; } virtual const std::unordered_set<StreamInfo>& StreamInfos() override { return m_streamInfos; }
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits, virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override; const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override;
private: private:
std::unordered_set<StreamInfo> m_streamInfos; std::unordered_set<StreamInfo> m_streamInfos;
@ -28,5 +28,6 @@ namespace CNTK
bool m_epochEndReached; bool m_epochEndReached;
size_t m_prevMinibatchSize; size_t m_prevMinibatchSize;
size_t m_epochSize; size_t m_epochSize;
std::unordered_map<StreamInfo, MinibatchData> m_minibatchData;
}; };
} }

Просмотреть файл

@ -61,11 +61,12 @@ namespace CNTK
LogicError("The gradient value for a Parameter cannot have an associated mask!"); LogicError("The gradient value for a Parameter cannot have an associated mask!");
} }
auto trainingLossArguments = m_trainingLossVar.Owner()->Arguments(); auto trainingLossArgument = *(m_trainingLossVar.Owner()->Arguments().begin());
auto labelsVar = *(std::find_if(trainingLossArguments.begin(), trainingLossArguments.end(), [](const Variable& var) {
return var.IsInput(); // Find the argument whose dynamic axes match the criterion operation's dynamic axes (i.e. label dynamic axes)
})); auto argumentValue = std::find_if(arguments.begin(), arguments.end(), [trainingLossArgument](const std::pair<Variable, ValuePtr>& currentPair) {
auto argumentValue = arguments.at(labelsVar); return (currentPair.first.DynamicAxes() == trainingLossArgument.DynamicAxes());
})->second;
auto argumentData = argumentValue->Data(); auto argumentData = argumentValue->Data();
auto argumentDataShape = argumentData->Shape(); auto argumentDataShape = argumentData->Shape();
auto mask = argumentValue->Mask(); auto mask = argumentValue->Mask();

Просмотреть файл

@ -6,20 +6,100 @@
#include "stdafx.h" #include "stdafx.h"
#include "CNTKLibrary.h" #include "CNTKLibrary.h"
#include "Utils.h" #include "Utils.h"
#include "File.h" #include <istream>
#include <ostream>
using namespace std; using namespace std;
namespace CNTK namespace CNTK
{ {
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
struct BinaryOStreamWrapper
{
BinaryOStreamWrapper(ostream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryOStreamWrapper&>::type
operator<<(const T& value)
{
m_stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
return *this ;
}
BinaryOStreamWrapper& operator<<(const wstring& str)
{
*this << str.length();
m_stream.write(reinterpret_cast<const char*>(str.c_str()), str.length() * sizeof(wchar_t));
return *this;
}
operator ostream& () { return m_stream; }
ostream& m_stream;
BinaryOStreamWrapper(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper(BinaryOStreamWrapper&&) = delete; BinaryOStreamWrapper& operator=(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper& operator=(BinaryOStreamWrapper&&) = delete;
};
// This wrapper redefines operator>> in terms of unformatted (binary) read operation.
struct BinaryIStreamWrapper
{
BinaryIStreamWrapper(istream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryIStreamWrapper&>::type
operator>>(T& value)
{
static_assert(sizeof(T) <= sizeof(size_t), "size_t is the largest supported type.");
m_stream.read(buf, sizeof(T));
value = *(reinterpret_cast<T*>(buf));
return *this ;
}
BinaryIStreamWrapper& operator>>(wstring& str)
{
size_t length;
*this >> length;
str.resize(length);
for (size_t i = 0; i < length; ++i)
{
m_stream.read(buf, sizeof(wchar_t));
str[i] = *(reinterpret_cast<wchar_t*>(buf));
}
return *this;
}
operator istream& () const { return m_stream ;}
istream& m_stream;
char buf[sizeof(size_t)];
BinaryIStreamWrapper(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper(BinaryIStreamWrapper&&) = delete; BinaryIStreamWrapper& operator=(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper& operator=(BinaryIStreamWrapper&&) = delete;
};
template <typename T>
T* CreateDataPtr(const T& value)
{
return new T(value);
}
template <>
NDArrayView* CreateDataPtr<NDArrayView>(const NDArrayView& value)
{
// TODO: replace this copy with an alias to value.
NDArrayView* viewPtr = new NDArrayView(value.GetDataType(), value.Shape(), DeviceDescriptor::CPUDevice());
viewPtr->CopyFrom(value);
return viewPtr;
}
template <typename T> template <typename T>
void DictionaryValue::AllocateDataPtr(const T& value) void DictionaryValue::AllocateDataPtr(const T& value)
{ {
static_assert(is_same<T, NDShape>::value || static_assert(is_same<T, NDShape>::value ||
is_same<T, wstring>::value || is_same<T, wstring>::value ||
is_same<T, vector<DictionaryValue>>::value || is_same<T, vector<DictionaryValue>>::value ||
is_same<T, Dictionary>::value, "AllocateDataPtr called with invalid type"); is_same<T, Dictionary>::value ||
m_data.m_ptr = new T(value); is_same<T, NDArrayView>::value,
"AllocateDataPtr called with invalid type");
m_data.m_ptr = CreateDataPtr<T>(value);
} }
template <typename T> template <typename T>
@ -31,12 +111,163 @@ namespace CNTK
m_data.m_ptr = nullptr; m_data.m_ptr = nullptr;
} }
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us) template <typename ElementType>
bool AreEqual(NDArrayView& view1, NDArrayView& view2)
{ {
if (view1.GetDataType() != view2.GetDataType() ||
view1.Shape() != view2.Shape())
{
return false;
}
ElementType* data1 = nullptr;
ElementType* data2 = nullptr;
if (view1.Device().Type() == DeviceKind::CPU)
{
data1 = view1.WritableDataBuffer<ElementType>();
data2 = view2.WritableDataBuffer<ElementType>();
}
else
{
NDArrayViewPtr temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view1.Shape(), DeviceDescriptor::CPUDevice());
temp1CpuDataView->CopyFrom(view1);
data1 = temp1CpuDataView->WritableDataBuffer<ElementType>();
NDArrayViewPtr temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view2.Shape(), DeviceDescriptor::CPUDevice());
temp2CpuDataView->CopyFrom(view2);
data2 = temp2CpuDataView->WritableDataBuffer<ElementType>();
}
size_t numElements = view1.Shape().TotalSize();
for (size_t i = 0; i < numElements; ++i)
{
if (data1[i] != data2[i])
{
return false;
}
}
return true;
}
bool DictionaryValue::operator==(const DictionaryValue& other) const
{
if (this == &other)
{
return true;
}
if (m_valueType != other.m_valueType)
{
return false;
}
switch (m_valueType)
{
case DictionaryValue::Type::Bool:
return (m_data.m_boolean == other.m_data.m_boolean);
case DictionaryValue::Type::SizeT:
return (m_data.m_sizeT == other.m_data.m_sizeT);
case DictionaryValue::Type::Float:
return (m_data.m_float == other.m_data.m_float);
case DictionaryValue::Type::Double:
return (m_data.m_double == other.m_data.m_double);
case DictionaryValue::Type::String:
{
wstring* strPtr1 = reinterpret_cast<wstring*>(m_data.m_ptr);
wstring* strPtr2 = reinterpret_cast<wstring*>(other.m_data.m_ptr);
return (*strPtr1 == *strPtr2);
}
case DictionaryValue::Type::NDShape:
{
NDShape* shapePtr1 = reinterpret_cast<NDShape*>(m_data.m_ptr);
NDShape* shapePtr2 = reinterpret_cast<NDShape*>(other.m_data.m_ptr);
return (*shapePtr1 == *shapePtr2);
}
case DictionaryValue::Type::Vector:
{
vector<DictionaryValue>* vectorPtr1 = reinterpret_cast<vector<DictionaryValue>*>(m_data.m_ptr);
vector<DictionaryValue>* vectorPtr2 = reinterpret_cast<vector<DictionaryValue>*>(other.m_data.m_ptr);
return (*vectorPtr1 == *vectorPtr2);
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr1 = reinterpret_cast<Dictionary*>(m_data.m_ptr);
Dictionary* dictPtr2 = reinterpret_cast<Dictionary*>(other.m_data.m_ptr);
return (*dictPtr1 == *dictPtr2);
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr1 = reinterpret_cast<NDArrayView*>(m_data.m_ptr);
NDArrayView* viewPtr2 = reinterpret_cast<NDArrayView*>(other.m_data.m_ptr);
switch (viewPtr1->GetDataType())
{
case DataType::Float:
return AreEqual<float>(*viewPtr1, *viewPtr2);
case DataType::Double:
return AreEqual<double>(*viewPtr1, *viewPtr2);
default:
NOT_IMPLEMENTED;
}
}
default:
NOT_IMPLEMENTED;
}
}
bool DictionaryValue::operator!=(const DictionaryValue& other) const
{
return !(*this == other);
}
BinaryOStreamWrapper& operator<<(BinaryOStreamWrapper& stream, const NDShape& us)
{
auto size = us.NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << us[i];
}
return stream;
}
template <typename T>
void Write(BinaryOStreamWrapper& stream, const NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
const T* buffer = view.DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream << buffer[i];
}
}
template <typename T>
void Read(BinaryIStreamWrapper& stream, NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
T* buffer = view.WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream >> buffer[i];
}
}
istream& operator>>(istream& stdStream, DictionaryValue& us)
{
BinaryIStreamWrapper stream(stdStream);
size_t version; size_t version;
stream >> version; stream >> version;
stream >> us.m_valueType; unsigned int type;
stream >> type;
us.m_valueType = static_cast<DictionaryValue::Type>(type);
switch (us.ValueType()) switch (us.ValueType())
{ {
@ -52,28 +283,72 @@ namespace CNTK
case DictionaryValue::Type::Double: case DictionaryValue::Type::Double:
stream >> us.m_data.m_double; stream >> us.m_data.m_double;
break; break;
case DictionaryValue::Type::String:
{
wstring* strPtr = new wstring();
stream >> *strPtr;
us.m_data.m_ptr = strPtr;
break;
}
case DictionaryValue::Type::NDShape: case DictionaryValue::Type::NDShape:
{ {
size_t size; size_t size;
stream >> size; stream >> size;
vector<size_t> dims(size); NDShape* shapePtr = new NDShape(size);
for (auto i = 0; i < size; i++) for (auto i = 0; i < size; i++)
{ {
stream >> dims[i]; stream >> shapePtr->operator[](i);
} }
us.AllocateDataPtr(NDShape(dims)); us.m_data.m_ptr = shapePtr;
break; break;
} }
case DictionaryValue::Type::Vector: case DictionaryValue::Type::Vector:
{ {
size_t size; size_t size;
stream >> size; stream >> size;
vector<DictionaryValue> values(size); vector<DictionaryValue>* vectorPtr = new vector<DictionaryValue>(size);
for (auto i = 0; i < size; i++) for (auto i = 0; i < size; i++)
{ {
stream >> values[i]; stream >> vectorPtr->at(i);
} }
us.AllocateDataPtr(values); us.m_data.m_ptr = vectorPtr;
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = new Dictionary();
stream >> *dictPtr;
us.m_data.m_ptr = dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
unsigned int type;
stream >> type;
DataType dtype = static_cast<DataType>(type);
size_t size;
stream >> size;
NDShape shape(size);
for (auto i = 0; i < size; i++)
{
stream >> shape[i];
}
NDArrayView* viewPtr = new NDArrayView(dtype, shape, DeviceDescriptor::CPUDevice());
switch (dtype)
{
case DataType::Float:
Read<float>(stream, *viewPtr);
break;
case DataType::Double:
Read<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(dtype));
}
us.m_data.m_ptr = viewPtr;
break; break;
} }
default: default:
@ -82,11 +357,13 @@ namespace CNTK
return stream; return stream;
} }
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us) ostream& operator<<(ostream& stdStream, const DictionaryValue& us)
{ {
BinaryOStreamWrapper stream(stdStream);
stream << us.version; stream << us.version;
stream << us.ValueType(); stream << static_cast<unsigned int>(us.ValueType());
switch (us.ValueType()) switch (us.ValueType())
{ {
@ -102,15 +379,16 @@ namespace CNTK
case DictionaryValue::Type::Double: case DictionaryValue::Type::Double:
stream << us.m_data.m_double; stream << us.m_data.m_double;
break; break;
case DictionaryValue::Type::String:
{
wstring* stringPtr = reinterpret_cast<wstring*>(us.m_data.m_ptr);
stream << *stringPtr;
break;
}
case DictionaryValue::Type::NDShape: case DictionaryValue::Type::NDShape:
{ {
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr); NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
auto size = shapePtr->NumAxes(); stream << *shapePtr;
stream << size;
for (auto i = 0; i < size; i++)
{
stream << shapePtr->operator[](i);
}
break; break;
} }
case DictionaryValue::Type::Vector: case DictionaryValue::Type::Vector:
@ -121,7 +399,31 @@ namespace CNTK
stream << size; stream << size;
for (auto i = 0; i < size; i++) for (auto i = 0; i < size; i++)
{ {
stream << vectorPtr->operator[](i); stream << vectorPtr->at(i);
}
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = reinterpret_cast<Dictionary*>(us.m_data.m_ptr);
stream << *dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr = reinterpret_cast<NDArrayView*>(us.m_data.m_ptr);
stream << static_cast<unsigned int>(viewPtr->GetDataType());
stream << viewPtr->Shape();
switch (viewPtr->GetDataType())
{
case DataType::Float:
Write<float>(stream, *viewPtr);
break;
case DataType::Double:
Write<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
} }
break; break;
} }
@ -148,7 +450,7 @@ namespace CNTK
Dictionary& Dictionary::operator=(const Dictionary& other) Dictionary& Dictionary::operator=(const Dictionary& other)
{ {
assert(this != &other); assert(this != &other);
m_dictionaryData.reset(new std::unordered_map<std::wstring, DictionaryValue>(*(other.m_dictionaryData))); m_dictionaryData.reset(new unordered_map<wstring, DictionaryValue>(*(other.m_dictionaryData)));
return *this; return *this;
} }
@ -183,20 +485,51 @@ namespace CNTK
return (m_dictionaryData->find(key) != m_dictionaryData->end()); return (m_dictionaryData->find(key) != m_dictionaryData->end());
} }
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us) bool Dictionary::operator==(const Dictionary& other) const
{ {
if (this == &other)
{
return true;
}
if (m_dictionaryData->size() != other.m_dictionaryData->size())
{
return false;
}
for (auto& kv : *m_dictionaryData)
{
auto result = other.m_dictionaryData->find(kv.first);
if (result == other.m_dictionaryData->end() || kv.second != result->second)
{
return false;
}
}
return true;
}
bool Dictionary::operator!=(const Dictionary& other) const
{
return !(*this == other);
}
ostream& operator<<(ostream& stdStream, const Dictionary& us)
{
BinaryOStreamWrapper stream(stdStream);
stream << us.version; stream << us.version;
stream << us.m_dictionaryData->size(); stream << us.m_dictionaryData->size();
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it) for (auto& kv : *(us.m_dictionaryData))
{ {
stream << it->first; stream << kv.first;
stream << it->second; stream << kv.second;
} }
return stream; return stream;
} }
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us) istream& operator>>(istream& stdStream, Dictionary& us)
{ {
BinaryIStreamWrapper stream(stdStream);
size_t version; size_t version;
stream >> version; stream >> version;
size_t size; size_t size;
@ -206,113 +539,36 @@ namespace CNTK
{ {
wstring key; wstring key;
stream >> key; stream >> key;
DictionaryValue value; stream >> us[key];
stream >> value;
us.m_dictionaryData->insert(make_pair(key, value));
} }
return stream; return stream;
} }
// Returns the element whose key is greater than the required sample count
// or the last element if no such key exists.
template <typename T> template <typename T>
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr) const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
{ {
if (viewPtr->IsSparse()) assert(m_schedule.size() > 0);
auto it = m_schedule.upper_bound(sampleCount);
if (it == m_schedule.end())
{ {
LogicError("Sparse NDArrayView cannot be serialized into a vector."); --it;
} }
return it->second;
auto numElements = viewPtr->Shape().TotalSize();
vector<DictionaryValue> values(numElements);
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
cpuDataViewPtr->CopyFrom(*viewPtr);
}
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
T v = buffer[i];
values[i] = DictionaryValue(v);
}
return values;
} }
template <typename T>
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
if (viewPtr->IsSparse())
{
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
}
auto numElements = viewPtr->Shape().TotalSize();
if (values.size() != numElements)
{
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
values.size(), numElements);
}
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
}
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
buffer[i] = values[i].GetValue<T>();
}
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
viewPtr->CopyFrom(*cpuDataViewPtr);
}
}
// TODO: we store the type info for every element in the vector, which is extremely redundant.
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
return SerializeToVector<float>(viewPtr);
case DataType::Double:
return SerializeToVector<double>(viewPtr);
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
DeserializeFromVector<float>(viewPtr, values);
break;
case DataType::Double:
DeserializeFromVector<double>(viewPtr, values);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value); template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value); template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value); template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value);
template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value); template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value);
template void DictionaryValue::AllocateDataPtr<NDArrayView>(const NDArrayView& value);
template void DictionaryValue::FreePtrAsType<NDShape>(); template void DictionaryValue::FreePtrAsType<NDShape>();
template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>(); template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>();
template void DictionaryValue::FreePtrAsType<wstring>(); template void DictionaryValue::FreePtrAsType<wstring>();
template void DictionaryValue::FreePtrAsType<Dictionary>(); template void DictionaryValue::FreePtrAsType<Dictionary>();
template void DictionaryValue::FreePtrAsType<NDArrayView>();
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
} }

Просмотреть файл

@ -167,9 +167,6 @@ namespace CNTK
return var.IsInput() && var.IsSparse(); return var.IsInput() && var.IsSparse();
} }
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces) inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces)
{ {
@ -250,7 +247,8 @@ namespace CNTK
static_assert(std::is_same<T, bool>::value || static_assert(std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value || std::is_same<T, size_t>::value ||
std::is_same<T, float>::value || std::is_same<T, float>::value ||
std::is_same<T, double>::value, "Unsupported ValueType"); std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
std::vector<DictionaryValue> dictionaryValueVector; std::vector<DictionaryValue> dictionaryValueVector;
for (auto value : basicElementTypeVector) for (auto value : basicElementTypeVector)
@ -265,7 +263,8 @@ namespace CNTK
static_assert(std::is_same<T, bool>::value || static_assert(std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value || std::is_same<T, size_t>::value ||
std::is_same<T, float>::value || std::is_same<T, float>::value ||
std::is_same<T, double>::value, "Unsupported ValueType"); std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
std::vector<T> basicElementTypeVector; std::vector<T> basicElementTypeVector;
for (auto value : dictionaryValueVector) for (auto value : dictionaryValueVector)
@ -313,4 +312,19 @@ namespace CNTK
return{ paddedOutputMapCount, kernelShape }; return{ paddedOutputMapCount, kernelShape };
} }
inline CNTK::Constant ScalarConstant(CNTK::DataType dataType, float value, const CNTK::DeviceDescriptor& device = CNTK::DeviceDescriptor::CPUDevice())
{
if (dataType == CNTK::DataType::Float)
return CNTK::Constant({}, value, device);
else if (dataType == CNTK::DataType::Double)
return CNTK::Constant({}, (double)value, device);
else
LogicError("CNTK::ScalarConstant: Unsupported DataType %s", DataTypeName(dataType));
}
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
{
return std::pow(momentumPerSample, minibatchSize);
}
} }

Просмотреть файл

@ -7,6 +7,8 @@
namespace CNTK namespace CNTK
{ {
/*static*/ const std::vector<Axis> Variable::s_defaultInputVariableDynamicAxes = { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() };
Variable::Variable(const FunctionPtr& function) Variable::Variable(const FunctionPtr& function)
: Variable(function->Output()) : Variable(function->Output())
{ {

Просмотреть файл

@ -11,6 +11,13 @@
#define __UNIX__ #define __UNIX__
#endif #endif
#ifdef _MSC_VER
// TODO: thread_local is supported in VS2015. Remove this macro when we uprade to VS2015
#define THREAD_LOCAL __declspec(thread)
#else
#define THREAD_LOCAL thread_local
#endif
// =========================================================================== // ===========================================================================
// compiler differences // compiler differences
// =========================================================================== // ===========================================================================

Просмотреть файл

@ -11,6 +11,7 @@
#include <stdio.h> #include <stdio.h>
#include <vector> #include <vector>
#include <algorithm> #include <algorithm>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
@ -24,6 +25,31 @@ static inline size_t rand(const size_t begin, const size_t end)
return begin + randno % (end - begin); return begin + randno % (end - begin);
} }
// Rand based on Mersenne Twister.
// We use our own distribution in order to match baselines between different operating systems,
// because uniform_distribution is not guranteed to provide the same numbers on different platforms.
// TODO: Switching to Boost would eliminate this problem.
static inline size_t RandMT(const size_t begin, const size_t end, std::mt19937_64& rng)
{
const size_t randomNumber = rng();
return begin + randomNumber % (end - begin);
}
// Rand based on Mersenne Twister.
// We use our own distribution in order to match baselines between different operating systems,
// instead of using std::shuffle which uses unitform_distribution internally.
// TODO: Switching to Boost would eliminate this problem.
template <typename TVector>
inline void RandomShuffleMT(TVector& v, std::mt19937_64& rng)
{
foreach_index(currentLocation, v)
{
// Pick a random location a location and swap with current
const size_t randomLocation = RandMT(0, v.size(), rng);
std::swap(v[currentLocation], v[randomLocation]);
}
}
class RandomOrdering // note: NOT thread-safe at all class RandomOrdering // note: NOT thread-safe at all
{ {
// constants for randomization // constants for randomization

Просмотреть файл

@ -258,13 +258,20 @@ public:
m_evalOrders[rootNode] = nodes; m_evalOrders[rootNode] = nodes;
} }
bool EvalOrderExists(const ComputationNodeBasePtr& rootNode) const
{
return m_evalOrders.find(rootNode) != m_evalOrders.end();
}
// get depth-first traversal order // get depth-first traversal order
// TODO: This is currently not immutable because it gets patched w.r.t. recurrent loops. Ideally we don't patch. Need to review and verify that it is sufficient. // TODO: This is currently not immutable because it gets patched w.r.t. recurrent loops. Ideally we don't patch. Need to review and verify that it is sufficient.
const std::list<ComputationNodeBasePtr>& GetEvalOrder(const ComputationNodeBasePtr& rootNode) const const std::list<ComputationNodeBasePtr>& GetEvalOrder(const ComputationNodeBasePtr& rootNode) const
{ {
auto iter = m_evalOrders.find(rootNode); auto iter = m_evalOrders.find(rootNode);
if (iter == m_evalOrders.end()) if (iter == m_evalOrders.end())
{
LogicError("GetEvalOrder: Called without prior call to FormEvalOrder() for %ls %ls operation", rootNode->NodeName().c_str(), rootNode->OperationName().c_str()); LogicError("GetEvalOrder: Called without prior call to FormEvalOrder() for %ls %ls operation", rootNode->NodeName().c_str(), rootNode->OperationName().c_str());
}
return iter->second; return iter->second;
} }

Просмотреть файл

@ -76,6 +76,9 @@ void ComputationNetwork::CopySubTree(const ComputationNetwork& fromNet,
ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName); ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName);
if (!fromNet.EvalOrderExists(fromRoot))
const_cast<ComputationNetwork&>(fromNet).FormEvalOrder(fromRoot);
for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point. for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
{ {
wstring fromNodeName = fromNode->NodeName(); wstring fromNodeName = fromNode->NodeName();
@ -353,6 +356,9 @@ void ComputationNetwork::SetLearnableNodesBelowLearningRateMultiplier(const floa
else else
{ {
// for calculating a specific node // for calculating a specific node
if (!EvalOrderExists(rootNode))
const_cast<ComputationNetwork&>(*this).FormEvalOrder(rootNode);
for (const auto& node : GetAllNodesForRoot(rootNode)) for (const auto& node : GetAllNodesForRoot(rootNode))
{ {
if (node->OperationName() == OperationNameOf(LearnableParameter)) if (node->OperationName() == OperationNameOf(LearnableParameter))

Просмотреть файл

@ -32,15 +32,16 @@
#define CNTK_MODEL_VERSION_1 1 #define CNTK_MODEL_VERSION_1 1
#define CNTK_MODEL_VERSION_2 2 #define CNTK_MODEL_VERSION_2 2
#define CNTK_MODEL_VERSION_3 3 #define CNTK_MODEL_VERSION_3 3
#define CNTK_MODEL_VERSION_4 4 // PastValue #define CNTK_MODEL_VERSION_4 4 // PastValue
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling #define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending #define CNTK_MODEL_VERSION_6 6 // batch-norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file #define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs #define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution #define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
#define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes #define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes
#define CNTK_MODEL_VERSION_11 11 // Times() m_inputRank to support parameter-rank inference #define CNTK_MODEL_VERSION_11 11 // dynamic axis name for where nodes
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_11 #define CNTK_MODEL_VERSION_12 12 // Times() m_inputRank to support parameter-rank inference
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_12
extern bool g_shareNodeValueMatrices; extern bool g_shareNodeValueMatrices;

Просмотреть файл

@ -365,6 +365,7 @@ public:
TensorShape outputShape; TensorShape outputShape;
// If 2D convolution syntax is used then some of the tensor dimensions need to be inferred. // If 2D convolution syntax is used then some of the tensor dimensions need to be inferred.
if (m_convolution2D) if (m_convolution2D)
// NOTE: when m_convolution2D is true, it's a legacy branch. Code should not enter here any more.
{ {
// Need to update some tensors with correct input dims. // Need to update some tensors with correct input dims.
auto inDims = ImageDimensions(GetInputSampleLayout(inputIdx), m_imageLayout); auto inDims = ImageDimensions(GetInputSampleLayout(inputIdx), m_imageLayout);
@ -396,6 +397,8 @@ public:
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride, outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad); m_sharing, m_autoPad, m_lowerPad, m_upperPad);
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
} }
else else
{ {
@ -414,9 +417,12 @@ public:
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride, outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad); m_sharing, m_autoPad, m_lowerPad, m_upperPad);
} }
if (m_imageLayout == ImageLayoutKind::CHW)
SetDims(outputShape, HasMBLayout());
else // legacy format
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
} }
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
// update LearnableParameter if it has 0 dimensions (to be inferred) // update LearnableParameter if it has 0 dimensions (to be inferred)
// Typically this would be the #inputChannels (C). // Typically this would be the #inputChannels (C).

Просмотреть файл

@ -450,9 +450,9 @@ public:
assert(dimsA.size() == m_outputRank + numReductionDims); assert(dimsA.size() == m_outputRank + numReductionDims);
while (numReductionDims < inputRank) while (numReductionDims < inputRank)
{ {
dimsA.push_back(0); dimsA.push_back(0);
numReductionDims++; numReductionDims++;
} }
} }
// fill in the missing ones // fill in the missing ones
@ -561,8 +561,8 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
public: public:
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode); DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name) TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name, /*outputRank=*/1, /*inputRank=*/1) : Base(deviceId, name, outputRank, /*inputRank=*/1)
{ {
} }
}; };
@ -665,6 +665,9 @@ public:
m_axis1 = 1, m_axis2 = 2; // default m_axis1 = 1, m_axis2 = 2; // default
} }
int Axis1() const { return m_axis1; }
int Axis2() const { return m_axis2; }
private: private:
// compute the transposed tensor shape (in-place) // compute the transposed tensor shape (in-place)
void TransposeShape(TensorShape& shape) const void TransposeShape(TensorShape& shape) const

Просмотреть файл

@ -300,7 +300,7 @@ template <class ElemType>
if (!m_pMBLayout) if (!m_pMBLayout)
{ {
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
m_pMBLayout->SetUniqueAxisName(L"WhereNodeAxis"); m_pMBLayout->SetUniqueAxisName(m_dynamicAxisName);
} }
// we map scalars to scalars // we map scalars to scalars
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1) if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)

Просмотреть файл

@ -217,6 +217,9 @@ public:
virtual bool /*ComputationNodeBase::*/ InputUsedInComputingInputNodesGradients(size_t childIndex) const override; virtual bool /*ComputationNodeBase::*/ InputUsedInComputingInputNodesGradients(size_t childIndex) const override;
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override; virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
std::wstring ReductionOpName() const { return m_operation; }
int ReductionAxis() const { return m_axis; }
private: private:
// operation attributes // operation attributes
int m_axis; int m_axis;
@ -341,11 +344,12 @@ public:
fstream << m_axis; fstream << m_axis;
} }
private:
// these implement numpy-style negative bound values to index from the end // these implement numpy-style negative bound values to index from the end
size_t BeginIndex() const { return m_beginIndex >= 0 ? (size_t)m_beginIndex : (size_t)(m_beginIndex + Input(0)->GetSampleLayout()[m_axis - 1]); } size_t BeginIndex() const { return m_beginIndex >= 0 ? (size_t)m_beginIndex : (size_t)(m_beginIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); } size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
int Axis() const { return m_axis; }
private:
// determine the tensor shape that represents slice of the input that we are taking // determine the tensor shape that represents slice of the input that we are taking
TensorShape GetInputSlice(size_t rank, const FrameRange & fr) const TensorShape GetInputSlice(size_t rank, const FrameRange & fr) const
@ -655,10 +659,11 @@ class WhereNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate; typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"Where"; } static const std::wstring TypeName() { return L"Where"; }
static const std::wstring DefaultWhereNodeDynamicAxisName() { return L"WhereNodeAxis"; }
public: public:
DeclareConstructorFromConfigWithNumInputs(WhereNode); DeclareConstructorFromConfigWithNumInputs(WhereNode);
WhereNode(DEVICEID_TYPE deviceId, const wstring& name) : WhereNode(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName = DefaultWhereNodeDynamicAxisName()) :
Base(deviceId, name) Base(deviceId, name), m_dynamicAxisName(dynamicAxisName)
{ {
MarkValueNonSharable(); MarkValueNonSharable();
} }
@ -669,11 +674,29 @@ public:
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; } virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
virtual void Validate(bool isFinalValidationPass) override; virtual void Validate(bool isFinalValidationPass) override;
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::Load(fstream, modelVersion);
if (modelVersion >= CNTK_MODEL_VERSION_11)
fstream >> m_dynamicAxisName;
else
m_dynamicAxisName = DefaultWhereNodeDynamicAxisName();
}
virtual void Save(File& fstream) const override
{
Base::Save(fstream);
fstream << m_dynamicAxisName;
}
std::wstring DynamicAxisName() const { return m_dynamicAxisName; }
private: private:
// buffers for creating the result sequences (kept as object state to avoid memory allocations) // buffers for creating the result sequences (kept as object state to avoid memory allocations)
std::vector<std::vector<size_t>> m_indexSequenceBuffer; // [sequenceIndex][t] for creating the result sequences std::vector<std::vector<size_t>> m_indexSequenceBuffer; // [sequenceIndex][t] for creating the result sequences
std::vector<size_t> m_rowAllocationsBuffer; // [row] for determining new MBLayout packing std::vector<size_t> m_rowAllocationsBuffer; // [row] for determining new MBLayout packing
std::vector<std::pair<size_t, size_t>> m_placementBuffer; // [sequenceIndex] assigned location for a sequence std::vector<std::pair<size_t, size_t>> m_placementBuffer; // [sequenceIndex] assigned location for a sequence
std::wstring m_dynamicAxisName;
}; };
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------

Просмотреть файл

@ -37,14 +37,8 @@
#pragma warning(disable : 4244) // unreachable code; triggered for unknown reasons #pragma warning(disable : 4244) // unreachable code; triggered for unknown reasons
#pragma warning(disable : 4702) // conversion from 'double' to 'float' #pragma warning(disable : 4702) // conversion from 'double' to 'float'
#ifdef USE_ACML
// Download ACML 5.3.1 (e.g., acml5.3.1-ifort64.exe) or above #ifdef USE_MKL
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
// Install the ifort64_mp variant (compiled with intel compiler) of the library
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.1\ifort64_mp or the folder you installed acml
// to point to your folder for the include file and link library
#include <acml.h> // requires ACML 5.3.1 and above
#elif defined(USE_MKL)
// requires MKL 10.0 and above // requires MKL 10.0 and above
#include <mkl.h> #include <mkl.h>
#else #else
@ -57,12 +51,6 @@
#include <lapacke.h> #include <lapacke.h>
#endif #endif
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
#define BLAS_COLMAJOR
#else
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
#endif
#define SWAP(a, b) \ #define SWAP(a, b) \
{ \ { \
(a) ^= (b); \ (a) ^= (b); \
@ -912,11 +900,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
#pragma omp parallel for #pragma omp parallel for
foreach_column (j, us) foreach_column (j, us)
{ {
#ifdef USE_ACML
dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
#else
cblas_dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1); cblas_dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
#endif
} }
} }
else else
@ -926,11 +910,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
{ {
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
#else
cblas_scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1); cblas_scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
#endif
} }
} }
} }
@ -2844,20 +2824,12 @@ ElemType CPUMatrix<ElemType>::SumOfAbsElements() const
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML
return (ElemType) dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
#else
return (ElemType) cblas_dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1); return (ElemType) cblas_dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
return sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
#else
return cblas_sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1); return cblas_sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
#endif
} }
} }
@ -3028,11 +3000,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
#pragma omp parallel for #pragma omp parallel for
foreach_column (j, c) foreach_column (j, c)
{ {
#ifdef USE_ACML
c(0, j) = (ElemType) dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1); c(0, j) = (ElemType) cblas_dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
#endif
} }
} }
else else
@ -3041,11 +3009,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
foreach_column (j, c) foreach_column (j, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
c(0, j) = snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
#else
c(0, j) = cblas_snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1); c(0, j) = cblas_snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
#endif
} }
} }
} }
@ -3058,11 +3022,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
#pragma omp parallel for #pragma omp parallel for
foreach_row (i, c) foreach_row (i, c)
{ {
#ifdef USE_ACML
c(i, 0) = dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
#else
c(i, 0) = cblas_dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m); c(i, 0) = cblas_dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
#endif
} }
} }
else else
@ -3071,11 +3031,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
foreach_row (i, c) foreach_row (i, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
#else
c(i, 0) = cblas_snrm2(n, reinterpret_cast<float*>(bufPtr + i), m); c(i, 0) = cblas_snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
#endif
} }
} }
} }
@ -4486,34 +4442,22 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
int m, n, k, l; int m, n, k, l;
int lda, ldb, ldc; int lda, ldb, ldc;
#ifdef USE_ACML
char transA, transB;
#else
CBLAS_TRANSPOSE mklTransA; CBLAS_TRANSPOSE mklTransA;
CBLAS_TRANSPOSE mklTransB; CBLAS_TRANSPOSE mklTransB;
#endif
if (transposeA) if (transposeA)
{ {
m = (int) a.GetNumCols(); m = (int) a.GetNumCols();
k = (int) a.GetNumRows(); k = (int) a.GetNumRows();
lda = k; lda = k;
#ifdef USE_ACML
transA = (char) MatrixTranspose::Trans;
#else
mklTransA = CBLAS_TRANSPOSE::CblasTrans; mklTransA = CBLAS_TRANSPOSE::CblasTrans;
#endif
} }
else else
{ {
m = (int) a.GetNumRows(); m = (int) a.GetNumRows();
k = (int) a.GetNumCols(); k = (int) a.GetNumCols();
lda = m; lda = m;
#ifdef USE_ACML
transA = (char) MatrixTranspose::NoTrans;
#else
mklTransA = CBLAS_TRANSPOSE::CblasNoTrans; mklTransA = CBLAS_TRANSPOSE::CblasNoTrans;
#endif
} }
if (transposeB) if (transposeB)
@ -4521,22 +4465,14 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
l = (int) b.GetNumCols(); l = (int) b.GetNumCols();
n = (int) b.GetNumRows(); n = (int) b.GetNumRows();
ldb = n; ldb = n;
#ifdef USE_ACML
transB = (char) MatrixTranspose::Trans;
#else
mklTransB = CBLAS_TRANSPOSE::CblasTrans; mklTransB = CBLAS_TRANSPOSE::CblasTrans;
#endif
} }
else else
{ {
l = (int) b.GetNumRows(); l = (int) b.GetNumRows();
n = (int) b.GetNumCols(); n = (int) b.GetNumCols();
ldb = l; ldb = l;
#ifdef USE_ACML
transB = (char) MatrixTranspose::NoTrans;
#else
mklTransB = CBLAS_TRANSPOSE::CblasNoTrans; mklTransB = CBLAS_TRANSPOSE::CblasNoTrans;
#endif
} }
assert(m > 0 && k > 0 && l > 0 && n > 0); // converting from size_t to int may cause overflow assert(m > 0 && k > 0 && l > 0 && n > 0); // converting from size_t to int may cause overflow
@ -4553,20 +4489,12 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML cblas_dgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
dgemm(transA, transB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
#else
cblas_dgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML cblas_sgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
sgemm(BLAS_COLMAJOR transA, transB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
#else
cblas_sgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
#endif
} }
} }
@ -4611,9 +4539,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML #ifdef USE_MKL
dgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &info);
#elif defined(USE_MKL)
double wkopt; double wkopt;
int lwork = -1; int lwork = -1;
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, &wkopt, &lwork, &info); dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
@ -4622,16 +4548,13 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, reinterpret_cast<double*>(W.Data()), &lwork, &info); dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, reinterpret_cast<double*>(W.Data()), &lwork, &info);
#else #else
std::vector<double> superb(std::max(std::min(m, n) - 1, 1)); std::vector<double> superb(std::max(std::min(m, n) - 1, 1));
info = LAPACKE_dgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()), info = LAPACKE_dgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()),
reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &superb[0]); reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &superb[0]);
#endif #endif
} }
else else
{ {
#ifdef USE_ACML #ifdef USE_MKL
#pragma warning(suppress : 4244)
sgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &info);
#elif defined(USE_MKL)
float wkopt; float wkopt;
int lwork = -1; int lwork = -1;
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, &wkopt, &lwork, &info); sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
@ -4640,7 +4563,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, reinterpret_cast<float*>(W.Data()), &lwork, &info); sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, reinterpret_cast<float*>(W.Data()), &lwork, &info);
#else #else
std::vector<float> superb(std::max(std::min(m, n) - 1, 1)); std::vector<float> superb(std::max(std::min(m, n) - 1, 1));
info = LAPACKE_sgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()), info = LAPACKE_sgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()),
reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &superb[0]); reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &superb[0]);
#endif #endif
} }
@ -4837,20 +4760,12 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML
daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
#else
cblas_daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy); cblas_daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
#else
cblas_saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy); cblas_saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
#endif
} }
} }
else if (a.GetNumElements() == 1) // scalar, add to all elements else if (a.GetNumElements() == 1) // scalar, add to all elements
@ -4889,11 +4804,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
#pragma omp parallel for #pragma omp parallel for
foreach_column (j, c) foreach_column (j, c)
{ {
#ifdef USE_ACML
daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
#else
cblas_daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1); cblas_daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
#endif
} }
} }
else else
@ -4902,11 +4813,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
foreach_column (j, c) foreach_column (j, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
#else
cblas_saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1); cblas_saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
#endif
} }
} }
} }
@ -4925,11 +4832,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
#pragma omp parallel for #pragma omp parallel for
foreach_row (i, c) foreach_row (i, c)
{ {
#ifdef USE_ACML
daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
#else
cblas_daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m); cblas_daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
#endif
} }
} }
else else
@ -4938,11 +4841,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
foreach_row (i, c) foreach_row (i, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
#else
cblas_saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m); cblas_saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
#endif
} }
} }
} }
@ -5163,20 +5062,12 @@ template <class ElemType>
} }
else if (sizeof(ElemType) == sizeof(double)) else if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML
dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx); // TODO: Use overloads.
#else
cblas_dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx); cblas_dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
#else
cblas_sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx); cblas_sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
#endif
} }
} }
@ -5224,11 +5115,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
#pragma omp parallel for #pragma omp parallel for
foreach_column (j, c) foreach_column (j, c)
{ {
#ifdef USE_ACML
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1); c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
} }
} }
else else
@ -5237,11 +5124,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
foreach_column (j, c) foreach_column (j, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1); c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
} }
} }
} }
@ -5256,11 +5139,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
#pragma omp parallel for #pragma omp parallel for
foreach_row (i, c) foreach_row (i, c)
{ {
#ifdef USE_ACML
c(i, 0) = ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m); c(i, 0) = cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#endif
} }
} }
else else
@ -5269,11 +5148,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
foreach_row (i, c) foreach_row (i, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m); c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#endif
} }
} }
} }
@ -5298,20 +5173,12 @@ ElemType CPUMatrix<ElemType>::InnerProductOfMatrices(const CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML
return (ElemType) ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
#else
return (ElemType) cblas_ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1); return (ElemType) cblas_ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
return (ElemType) sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
#else
return (ElemType) cblas_sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1); return (ElemType) cblas_sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
#endif
} }
} }
@ -5539,21 +5406,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
{ {
for (long j = 0; j < n; j++) for (long j = 0; j < n; j++)
{ {
#ifdef USE_ACML
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1); c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
} }
for (long j = 0; j < n; j++) for (long j = 0; j < n; j++)
{ {
for (long i = 1; i < negnumber + 1; i++) for (long i = 1; i < negnumber + 1; i++)
{ {
#ifdef USE_ACML
c(i, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#else
c(i, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1); c(i, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#endif
} }
} }
} }
@ -5561,21 +5420,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
{ {
for (long j = 0; j < n; j++) for (long j = 0; j < n; j++)
{ {
#ifdef USE_ACML
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1); c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
} }
for (long j = 0; j < n; j++) for (long j = 0; j < n; j++)
{ {
for (long i = 1; i < negnumber + 1; i++) for (long i = 1; i < negnumber + 1; i++)
{ {
#ifdef USE_ACML
c(i, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#else
c(i, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1); c(i, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#endif
} }
} }
} }
@ -5593,11 +5444,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
#pragma omp parallel for #pragma omp parallel for
foreach_row (i, c) foreach_row (i, c)
{ {
#ifdef USE_ACML
c(i, 0) = (ElemType) ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#else
c(i, 0) = (ElemType) cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m); c(i, 0) = (ElemType) cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#endif
} }
} }
else else
@ -5606,11 +5453,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
foreach_row (i, c) foreach_row (i, c)
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m); c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#endif
} }
} }
} }
@ -6025,13 +5868,11 @@ int CPUMatrix<ElemType>::SetNumThreads(int numThreads)
omp_set_num_threads(numThreads); omp_set_num_threads(numThreads);
numThreads = omp_get_max_threads(); numThreads = omp_get_max_threads();
#ifdef USE_ACML #ifdef USE_MKL
acmlsetnumthreads(numThreads); mkl_set_num_threads(numThreads);
#elif defined(USE_MKL) #elif defined(USE_OPENBLAS)
mkl_set_num_threads(numThreads); openblas_set_num_threads(numThreads);
#elif defined(USE_OPENBLAS) #endif
openblas_set_num_threads(numThreads);
#endif
#endif #endif
return numThreads; return numThreads;
} }

Просмотреть файл

@ -23,15 +23,7 @@
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this #pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
#ifdef USE_ACML #ifdef USE_MKL
// use ACML as default.
// Download ACML 5.3.0 (e.g., acml5.3.0-ifort64.exe) or above
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
// Install the ifort64 variant (compiled with intel compiler) of the library
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.0\ifort64_mp or the folder you installed acml
// to point to your folder for the include file and link library
#include <acml.h> // requires ACML 5.3.0 and above
#elif defined(USE_MKL)
// requires MKL 10.0 and above // requires MKL 10.0 and above
#include <mkl.h> #include <mkl.h>
#else #else
@ -53,12 +45,6 @@
// return 42; // return 42;
//} //}
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
#define BLAS_COLMAJOR
#else
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
#endif
// TODO: Move to CommonMatrix.h // TODO: Move to CommonMatrix.h
#define IDX2C(i, j, ld) (((j) * (ld)) + (i)) // 0 based indexing #define IDX2C(i, j, ld) (((j) * (ld)) + (i)) // 0 based indexing
@ -261,11 +247,23 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
RequireSizeAndAllocate(v.GetNumRows(), v.GetNumCols(), v.NzSize()); RequireSizeAndAllocate(v.GetNumRows(), v.GetNumCols(), v.NzSize());
let nz = v.NzCount(); let nz = v.NzCount();
auto matrixFormat = v.GetFormat();
if (((matrixFormat == matrixFormatSparseBlockCol) || (matrixFormat == matrixFormatSparseBlockRow)) && (v.GetBlockIdShift() > 0))
NOT_IMPLEMENTED;
if (nz > 0) if (nz > 0)
{ {
memcpy(NzValues(), v.NzValues(), v.NzSize()); memcpy(NzValues(), v.NzValues(), v.NzSize());
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize()); if ((matrixFormat == matrixFormatSparseCSC) || (matrixFormat == matrixFormatSparseCSR))
{
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
}
else
{
memcpy(GetBlockIds(), v.GetBlockIds(), v.GetBlockSize());
}
} }
if (v.m_sliceViewOffset > 0) if (v.m_sliceViewOffset > 0)
{ {
@ -384,6 +382,66 @@ CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoGatherColumnsOf(ElemType
return *this; return *this;
} }
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
template <class ElemType>
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha)
{
VerifyWritable(__func__);
if ((a.GetFormat() != matrixFormatSparseCSC) || (GetFormat() != matrixFormatSparseCSC))
NOT_IMPLEMENTED;
if (idx.GetNumRows() != 1) // index is 1-dimensional only
InvalidArgument("DoScatterColumnsOf: Map must be a row vector.");
if (beta != 0)
NOT_IMPLEMENTED;
if (NzCount() != 0)
InvalidArgument("CPUSparseMatrix::DoScatterColumnsOf: The target matrix cannot have pre-existing non-zero values when being scattered into");
size_t numNonZeroElements = a.NzCount();
if (beta == 0)
RequireSizeAndAllocate(GetNumRows(), GetNumCols(), numNonZeroElements);
// Setup the Secondary index
std::vector<int> columnElementCounts(GetNumCols(), 0);
size_t numColsToWrite = idx.GetNumCols();
for (long j = 0; j < numColsToWrite; j++)
{
auto jOutF = idx(0, j); // this is the column we need to write to
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
continue;
size_t jOut = (size_t)jOutF;
columnElementCounts[jOut] = a.SecondaryIndexLocation()[j + 1] - a.SecondaryIndexLocation()[j];
}
// TODO: Replace with std::exclusive_scan when we switch to C++17
for (size_t i = 1; i <= GetNumCols(); ++i)
SecondaryIndexLocation()[i] = SecondaryIndexLocation()[i - 1] + columnElementCounts[i - 1];
size_t offset = a.SecondaryIndexLocation()[0];
// TODO: Does it make sense to parallelize this?
for (long j = 0; j < numColsToWrite; j++)
{
auto jOutF = idx(0, j); // this is the column we need to write to
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
continue;
size_t jOut = (size_t)jOutF;
auto start = SecondaryIndexLocation()[jOut];
auto end = SecondaryIndexLocation()[jOut + 1];
for (auto p = start; p < end; p++, offset++)
{
GetUnCompIndex()[p] = a.GetUnCompIndex()[offset];
Buffer()[p] = a.Buffer()[offset] * alpha;
}
}
return *this;
}
template <class ElemType> template <class ElemType>
void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const
{ {
@ -587,13 +645,7 @@ void CPUSparseMatrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYP
} }
template <class ElemType> template <class ElemType>
ElemType* CPUSparseMatrix<ElemType>::Data() const ElemType* CPUSparseMatrix<ElemType>::Data() const
{
return Buffer() + GetCompIndex()[m_sliceViewOffset];
}
template <class ElemType>
ElemType* CPUSparseMatrix<ElemType>::Data()
{ {
return (Buffer() + return (Buffer() +
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0)); ((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
@ -1340,20 +1392,12 @@ ElemType CPUSparseMatrix<ElemType>::SumOfAbsElements() const
if (sizeof(ElemType) == sizeof(double)) if (sizeof(ElemType) == sizeof(double))
{ {
#ifdef USE_ACML
return (ElemType) dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
#else
return (ElemType) cblas_dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1); return (ElemType) cblas_dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
#endif
} }
else else
{ {
#pragma warning(suppress : 4244) #pragma warning(suppress : 4244)
#ifdef USE_ACML
return sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
#else
return cblas_sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1); return cblas_sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
#endif
} }
} }
@ -1495,7 +1539,6 @@ template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&); template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
//template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&); //template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template char* CPUSparseMatrix<char>::Data() const; template char* CPUSparseMatrix<char>::Data() const;
template char* CPUSparseMatrix<char>::Data();
template void CPUSparseMatrix<char>::Reset(void); template void CPUSparseMatrix<char>::Reset(void);
template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool); template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool); template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
@ -1518,7 +1561,6 @@ template void CPUSparseMatrix<short>::SetValue(size_t, size_t, short);
template void CPUSparseMatrix<short>::SetValue(CPUSparseMatrix<short> const&); template void CPUSparseMatrix<short>::SetValue(CPUSparseMatrix<short> const&);
//template void CPUSparseMatrix<short>::SetValue(GPUSparseMatrix<short> const&); //template void CPUSparseMatrix<short>::SetValue(GPUSparseMatrix<short> const&);
template short* CPUSparseMatrix<short>::Data() const; template short* CPUSparseMatrix<short>::Data() const;
template short* CPUSparseMatrix<short>::Data();
template void CPUSparseMatrix<short>::Reset(void); template void CPUSparseMatrix<short>::Reset(void);
template void CPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool); template void CPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
template void CPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool); template void CPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);

Просмотреть файл

@ -92,13 +92,13 @@ public:
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val); void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha); CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
CPUSparseMatrix<ElemType>& DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
size_t BufferSize() const size_t BufferSize() const
{ {
return GetSizeAllocated() * sizeof(ElemType); return GetSizeAllocated() * sizeof(ElemType);
} }
ElemType* Data() const; ElemType* Data() const;
ElemType* Data();
inline size_t GetNumElemAllocated() const inline size_t GetNumElemAllocated() const
{ {
return GetSizeAllocated(); return GetSizeAllocated();
@ -262,7 +262,8 @@ public:
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const
{ {
return GetUnCompIndex() + GetCompIndex()[m_sliceViewOffset]; return (GetUnCompIndex() +
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
} // this is the major index, row/col ids in CSC/CSR format } // this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const size_t MajorIndexCount() const

Просмотреть файл

@ -237,7 +237,7 @@ std::pair<size_t, size_t> TracingGPUMemoryAllocator::GetFreeAndTotalMemoryInMBs(
// deviceId - the device on which the operation will take place // deviceId - the device on which the operation will take place
void PrepareDevice(DEVICEID_TYPE deviceId) void PrepareDevice(DEVICEID_TYPE deviceId)
{ {
static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED; THREAD_LOCAL static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED;
// and if we last set the device to be this device we are good // and if we last set the device to be this device we are good
if (deviceId == currentDevice) if (deviceId == currentDevice)
return; return;

Просмотреть файл

@ -227,6 +227,5 @@
<Target Name="CheckDependencies"> <Target Name="CheckDependencies">
<Error Condition="'$(MathLibrary)' == 'MKL' And '$(CNTK_MKL_PATH)' == ''" Text="CNTK custom MKL location not specified, see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." /> <Error Condition="'$(MathLibrary)' == 'MKL' And '$(CNTK_MKL_PATH)' == ''" Text="CNTK custom MKL location not specified, see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
<Error Condition="'$(MathLibrary)' == 'MKL' And !Exists('$(CNTKCustomMKLPath)')" Text="CNTK custom MKL not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." /> <Error Condition="'$(MathLibrary)' == 'MKL' And !Exists('$(CNTKCustomMKLPath)')" Text="CNTK custom MKL not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
<Error Condition="'$(MathLibrary)' == 'ACML' And !Exists('$(ACML_PATH)')" Text="ACML not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#acml for instructions." />
</Target> </Target>
</Project> </Project>

Просмотреть файл

@ -1081,7 +1081,7 @@ Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matr
DISPATCH_MATRIX_ON_FLAG(&a, this, DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); }, { m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); }, { m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ NOT_IMPLEMENTED; }, { m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{ NOT_IMPLEMENTED; }); { NOT_IMPLEMENTED; });
return *this; return *this;

Просмотреть файл

@ -377,8 +377,8 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
// second, remove trailing slash if there is any // second, remove trailing slash if there is any
// TODO: when gcc -v is 4.9 or greater, this should be: std::regex_replace(rootpath, L"\\/+$", wstring()); // TODO: when gcc -v is 4.9 or greater, this should be: std::regex_replace(rootpath, L"\\/+$", wstring());
size_t stringPos = 0; int stringPos = 0;
for (stringPos = rootpath.length() - 1; stringPos >= 0; stringPos--) for (stringPos = (int) (rootpath.length() - 1); stringPos >= 0; stringPos--)
{ {
if (rootpath[stringPos] != L'/') if (rootpath[stringPos] != L'/')
{ {
@ -517,11 +517,11 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
m_lattices->setverbosity(m_verbosity); m_lattices->setverbosity(m_verbosity);
// now get the frame source. This has better randomization and doesn't create temp files // now get the frame source. This has better randomization and doesn't create temp files
bool minimizeReaderMemoryFootprint = readerConfig(L"minimizeReaderMemoryFootprint", true); bool useMersenneTwisterRand = readerConfig(L"useMersenneTwisterRand", false);
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims, m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(useMersenneTwisterRand, infilesmulti, labelsmulti, m_featDims, m_labelDims,
numContextLeft, numContextRight, randomize, numContextLeft, numContextRight, randomize,
*m_lattices, m_latticeMap, m_frameMode, *m_lattices, m_latticeMap, m_frameMode,
minimizeReaderMemoryFootprint, m_expandToUtt)); m_expandToUtt));
m_frameSource->setverbosity(m_verbosity); m_frameSource->setverbosity(m_verbosity);
} }
else if (EqualCI(readMethod, L"rollingWindow")) else if (EqualCI(readMethod, L"rollingWindow"))

Просмотреть файл

@ -12,7 +12,8 @@
#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training) #include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
#include "minibatchsourcehelpers.h" #include "minibatchsourcehelpers.h"
#include "minibatchiterator.h" #include "minibatchiterator.h"
#include "unordered_set" #include <unordered_set>
#include <random>
namespace msra { namespace dbn { namespace msra { namespace dbn {
@ -38,6 +39,10 @@ class minibatchutterancesourcemulti : public minibatchsource
// const std::vector<unique_ptr<latticesource>> &lattices; // const std::vector<unique_ptr<latticesource>> &lattices;
const latticesource &lattices; const latticesource &lattices;
// Flag indicating whether to use Mersenne Twister random generator.
bool m_useMersenneTwister;
std::mt19937_64 m_rng;
// std::vector<latticesource> lattices; // std::vector<latticesource> lattices;
// word-level transcripts (for MMI mode when adding best path to lattices) // word-level transcripts (for MMI mode when adding best path to lattices)
const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts; // (used for getting word-level transcripts) const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts; // (used for getting word-level transcripts)
@ -413,6 +418,7 @@ class minibatchutterancesourcemulti : public minibatchsource
// When true we use a rolling window of randomized framerefs to minimize memory // When true we use a rolling window of randomized framerefs to minimize memory
// footprint, instead of using a large vector listing all frames in the training corpus // footprint, instead of using a large vector listing all frames in the training corpus
// Functionally, the 2 methods are identical. // Functionally, the 2 methods are identical.
// When it is true, we also use Mersenne Twister random generator for randomization.
const bool m_minimizeMemoryFootprint; const bool m_minimizeMemoryFootprint;
// [globalt-sweepts] -> (chunk, utt, frame) lookup table for randomized frames --this can be REALLY big! // [globalt-sweepts] -> (chunk, utt, frame) lookup table for randomized frames --this can be REALLY big!
@ -429,6 +435,10 @@ class minibatchutterancesourcemulti : public minibatchsource
size_t m_currentRangeEndChunkIdx; size_t m_currentRangeEndChunkIdx;
size_t m_nextFramePosNotYetRandomized; size_t m_nextFramePosNotYetRandomized;
// If m_minimizeMemoryFootprint is true, Mersenne Twister is used for randomization
// because rand has problems in distributed case.
std::mt19937_64 m_rng;
public: public:
framerandomizer(const std::vector<std::vector<chunk>>& randomizedChunks, bool minimizeMemoryFootprint) framerandomizer(const std::vector<std::vector<chunk>>& randomizedChunks, bool minimizeMemoryFootprint)
: m_randomizedChunks(randomizedChunks), m_minimizeMemoryFootprint(minimizeMemoryFootprint), m_currentRangeBeginChunkIdx(0), m_currentRangeEndChunkIdx(0), m_nextFramePosNotYetRandomized(0) : m_randomizedChunks(randomizedChunks), m_minimizeMemoryFootprint(minimizeMemoryFootprint), m_currentRangeBeginChunkIdx(0), m_currentRangeEndChunkIdx(0), m_nextFramePosNotYetRandomized(0)
@ -496,7 +506,9 @@ class minibatchutterancesourcemulti : public minibatchsource
for (;;) // (randomization retry loop) for (;;) // (randomization retry loop)
{ {
size_t tswap = Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range size_t tswap = m_minimizeMemoryFootprint ?
Microsoft::MSR::CNTK::RandMT(postbegin, postend, m_rng) :
Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range
// We want to swap 't' to 'tswap' and 'tswap' to 't'. // We want to swap 't' to 'tswap' and 'tswap' to 't'.
// - Both may have been swapped before. // - Both may have been swapped before.
// - Both must stay within the randomization window of their respective position. // - Both must stay within the randomization window of their respective position.
@ -542,11 +554,11 @@ class minibatchutterancesourcemulti : public minibatchsource
void reset(unsigned int randSeed) void reset(unsigned int randSeed)
{ {
srand(randSeed);
size_t sweepts = m_randomizedChunks[0][0].globalts; size_t sweepts = m_randomizedChunks[0][0].globalts;
size_t totalFrames = m_randomizedChunks[0].back().globalte() - sweepts; size_t totalFrames = m_randomizedChunks[0].back().globalte() - sweepts;
if (m_minimizeMemoryFootprint) if (m_minimizeMemoryFootprint)
{ {
m_rng.seed(randSeed);
m_randomizedframerefsWindow.clear(); m_randomizedframerefsWindow.clear();
m_currentRangeBeginChunkIdx = m_randomizedChunks[0][0].windowbegin; m_currentRangeBeginChunkIdx = m_randomizedChunks[0][0].windowbegin;
m_currentRangeEndChunkIdx = m_currentRangeBeginChunkIdx; m_currentRangeEndChunkIdx = m_currentRangeBeginChunkIdx;
@ -554,6 +566,7 @@ class minibatchutterancesourcemulti : public minibatchsource
} }
else else
{ {
srand(randSeed + 1);
if (m_randomizedframerefs.size() != totalFrames) if (m_randomizedframerefs.size() != totalFrames)
m_randomizedframerefs.resize(totalFrames); m_randomizedframerefs.resize(totalFrames);
@ -866,10 +879,11 @@ public:
// constructor // constructor
// Pass empty labels to denote unsupervised training (so getbatch() will not return uids). // Pass empty labels to denote unsupervised training (so getbatch() will not return uids).
// This mode requires utterances with time stamps. // This mode requires utterances with time stamps.
minibatchutterancesourcemulti(const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels, minibatchutterancesourcemulti(bool useMersenneTwister, const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels,
std::vector<size_t> vdim, std::vector<size_t> udim, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t randomizationrange, std::vector<size_t> vdim, std::vector<size_t> udim, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t randomizationrange,
const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, bool minimizeMemoryFootprint, std::vector<bool> expandToUtt) const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, std::vector<bool> expandToUtt)
: vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, minimizeMemoryFootprint), expandToUtt(expandToUtt) : vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, useMersenneTwister), expandToUtt(expandToUtt),
m_useMersenneTwister(useMersenneTwister)
// [v-hansu] change framemode (lattices.empty()) into framemode (false) to run utterance mode without lattice // [v-hansu] change framemode (lattices.empty()) into framemode (false) to run utterance mode without lattice
// you also need to change another line, search : [v-hansu] comment out to run utterance mode without lattice // you also need to change another line, search : [v-hansu] comment out to run utterance mode without lattice
{ {
@ -1251,8 +1265,16 @@ private:
randomizedchunkrefs[i].push_back(allchunks[i].begin() + j); randomizedchunkrefs[i].push_back(allchunks[i].begin() + j);
assert(randomizedchunkrefs[i].size() == allchunks[i].size()); assert(randomizedchunkrefs[i].size() == allchunks[i].size());
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams if (m_useMersenneTwister)
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep) {
m_rng.seed((unsigned long)sweep);
Microsoft::MSR::CNTK::RandomShuffleMT(randomizedchunkrefs[i], m_rng); // bring into random order (with random seed depending on sweep)
}
else
{
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep)
}
} }
// place them onto the global timeline -> randomizedchunks[] // place them onto the global timeline -> randomizedchunks[]
@ -1348,7 +1370,7 @@ private:
// check we got those setup right // check we got those setup right
// we now randomly shuffle randomizedutterancerefs[pos], while considering the constraints of what chunk range needs to be in memory // we now randomly shuffle randomizedutterancerefs[pos], while considering the constraints of what chunk range needs to be in memory
srand((unsigned int) sweep + 1); m_useMersenneTwister ? m_rng.seed((unsigned long)sweep) : srand((unsigned int)sweep + 1);
for (size_t i = 0; i < randomizedutterancerefs.size(); i++) for (size_t i = 0; i < randomizedutterancerefs.size(); i++)
{ {
// get valid randomization range, expressed in chunks // get valid randomization range, expressed in chunks
@ -1364,7 +1386,9 @@ private:
for (;;) for (;;)
{ {
// pick a random location // pick a random location
const size_t j = Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window const size_t j = m_useMersenneTwister ?
Microsoft::MSR::CNTK::RandMT(posbegin, posend, m_rng) :
Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window
if (i == j) if (i == j)
break; // the random gods say "this one points to its original position"... nothing wrong about that, but better not try to swap break; // the random gods say "this one points to its original position"... nothing wrong about that, but better not try to swap
@ -1416,7 +1440,7 @@ private:
} }
else // frame mode else // frame mode
{ {
m_frameRandomizer.reset((unsigned int)sweep + 1); m_frameRandomizer.reset((unsigned int)sweep);
} }
return sweep; return sweep;

Просмотреть файл

@ -21,7 +21,7 @@ public:
ByteReader() = default; ByteReader() = default;
virtual ~ByteReader() = default; virtual ~ByteReader() = default;
virtual void Register(size_t seqId, const std::string& path) = 0; virtual void Register(const std::map<std::string, size_t>& sequences) = 0;
virtual cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) = 0; virtual cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) = 0;
DISABLE_COPY_AND_MOVE(ByteReader); DISABLE_COPY_AND_MOVE(ByteReader);
@ -30,7 +30,7 @@ public:
class FileByteReader : public ByteReader class FileByteReader : public ByteReader
{ {
public: public:
void Register(size_t, const std::string&) override {} void Register(const std::map<std::string, size_t>&) override {}
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override; cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
}; };
@ -40,7 +40,7 @@ class ZipByteReader : public ByteReader
public: public:
ZipByteReader(const std::string& zipPath); ZipByteReader(const std::string& zipPath);
void Register(size_t seqId, const std::string& path) override; void Register(const std::map<std::string, size_t>& sequences) override;
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override; cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
private: private:

Просмотреть файл

@ -13,6 +13,7 @@
#include "ImageConfigHelper.h" #include "ImageConfigHelper.h"
#include "StringUtil.h" #include "StringUtil.h"
#include "ConfigUtil.h" #include "ConfigUtil.h"
#include "TimerUtility.h"
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
@ -135,6 +136,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
} }
string precision = (ConfigValue)config("precision", "float"); string precision = (ConfigValue)config("precision", "float");
m_verbosity = config(L"verbosity", 0);
// Feature stream. // Feature stream.
ConfigParameters featureSection = inputs(featureNames[0]); ConfigParameters featureSection = inputs(featureNames[0]);
@ -144,6 +146,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
features->m_storageType = StorageType::dense; features->m_storageType = StorageType::dense;
features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble; features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
m_streams.push_back(features); m_streams.push_back(features);
m_featureElementType = features->m_elementType;
// Label stream. // Label stream.
ConfigParameters label = inputs(labelNames[0]); ConfigParameters label = inputs(labelNames[0]);
@ -179,6 +182,8 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
const auto& label = m_streams[configHelper.GetLabelStreamId()]; const auto& label = m_streams[configHelper.GetLabelStreamId()];
const auto& feature = m_streams[configHelper.GetFeatureStreamId()]; const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
m_verbosity = config(L"verbosity", 0);
// Expect data in HWC. // Expect data in HWC.
ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat()); ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC)); feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
@ -240,9 +245,13 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
size_t curId = 0; size_t curId = 0;
std::string line; std::string line;
PathReaderMap knownReaders; PathReaderMap knownReaders;
ReaderSequenceMap readerSequences;
ImageSequenceDescription description; ImageSequenceDescription description;
description.m_numberOfSamples = 1; description.m_numberOfSamples = 1;
Timer timer;
timer.Start();
auto& stringRegistry = corpus->GetStringRegistry(); auto& stringRegistry = corpus->GetStringRegistry();
for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex) for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
{ {
@ -296,9 +305,20 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
m_keyToSequence[description.m_key.m_sequence] = m_imageSequences.size(); m_keyToSequence[description.m_key.m_sequence] = m_imageSequences.size();
m_imageSequences.push_back(description); m_imageSequences.push_back(description);
RegisterByteReader(description.m_id, description.m_path, knownReaders); RegisterByteReader(description.m_id, description.m_path, knownReaders, readerSequences);
} }
} }
for (auto& reader : knownReaders)
{
reader.second->Register(readerSequences[reader.first]);
}
timer.Stop();
if (m_verbosity > 1)
{
fprintf(stderr, "ImageDeserializer: Read information about %d images in %.6g seconds\n", (int)m_imageSequences.size(), timer.ElapsedSeconds());
}
} }
ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId) ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
@ -307,7 +327,7 @@ ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
return std::make_shared<ImageChunk>(sequenceDescription, *this); return std::make_shared<ImageChunk>(sequenceDescription, *this);
} }
void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders) void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences)
{ {
assert(!path.empty()); assert(!path.empty());
@ -330,16 +350,19 @@ void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string&
{ {
reader = std::make_shared<ZipByteReader>(containerPath); reader = std::make_shared<ZipByteReader>(containerPath);
knownReaders[containerPath] = reader; knownReaders[containerPath] = reader;
readerSequences[containerPath] = std::map<std::string, size_t>();
} }
else else
{ {
reader = (*r).second; reader = (*r).second;
} }
reader->Register(seqId, itemPath);
readerSequences[containerPath][itemPath] = seqId;
m_readers[seqId] = reader; m_readers[seqId] = reader;
#else #else
UNUSED(seqId); UNUSED(seqId);
UNUSED(knownReaders); UNUSED(knownReaders);
UNUSED(readerSequences);
RuntimeError("The code is built without zip container support. Only plain image files are supported."); RuntimeError("The code is built without zip container support. Only plain image files are supported.");
#endif #endif
} }

Просмотреть файл

@ -72,7 +72,8 @@ private:
// Not using nocase_compare here as it's not correct on Linux. // Not using nocase_compare here as it's not correct on Linux.
using PathReaderMap = std::unordered_map<std::string, std::shared_ptr<ByteReader>>; using PathReaderMap = std::unordered_map<std::string, std::shared_ptr<ByteReader>>;
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders); using ReaderSequenceMap = std::map<std::string, std::map<std::string, size_t>>;
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences);
cv::Mat ReadImage(size_t seqId, const std::string& path, bool grayscale); cv::Mat ReadImage(size_t seqId, const std::string& path, bool grayscale);
// REVIEW alexeyk: can potentially use vector instead of map. Need to handle default reader and resizing though. // REVIEW alexeyk: can potentially use vector instead of map. Need to handle default reader and resizing though.
@ -80,6 +81,7 @@ private:
SeqReaderMap m_readers; SeqReaderMap m_readers;
FileByteReader m_defaultReader; FileByteReader m_defaultReader;
int m_verbosity;
}; };
}}} }}}

Просмотреть файл

@ -44,16 +44,46 @@ ZipByteReader::ZipPtr ZipByteReader::OpenZip()
}); });
} }
void ZipByteReader::Register(size_t seqId, const std::string& path) void ZipByteReader::Register(const std::map<std::string, size_t>& sequences)
{ {
auto zipFile = m_zips.pop_or_create([this]() { return OpenZip(); }); auto zipFile = m_zips.pop_or_create([this]() { return OpenZip(); });
zip_stat_t stat; zip_stat_t stat;
zip_stat_init(&stat); zip_stat_init(&stat);
int err = zip_stat(zipFile.get(), path.c_str(), 0, &stat);
if (ZIP_ER_OK != err) size_t numberOfEntries = 0;
RuntimeError("Failed to get file info of %s, zip library error: %s", path.c_str(), GetZipError(err).c_str()); size_t numEntries = zip_get_num_entries(zipFile.get(), 0);
m_seqIdToIndex[seqId] = std::make_pair(stat.index, stat.size); for (size_t i = 0; i < numEntries; ++i) {
int err = zip_stat_index(zipFile.get(), i, 0, &stat);
if (ZIP_ER_OK != err)
RuntimeError("Failed to get file info for index %d, zip library error: %s", (int)i, GetZipError(err).c_str());
auto sequenceId = sequences.find(std::string(stat.name));
if (sequenceId == sequences.end())
{
continue;
}
else
{
m_seqIdToIndex[sequenceId->second] = std::make_pair(stat.index, stat.size);
numberOfEntries++;
}
}
m_zips.push(std::move(zipFile)); m_zips.push(std::move(zipFile));
if (numberOfEntries != sequences.size())
{
// Not all sequences have been found. Let's print them out and throw.
for (const auto& s : sequences)
{
auto index = m_seqIdToIndex.find(s.second);
if (index == m_seqIdToIndex.end())
{
fprintf(stderr, "Sequence %s is not found in container %s.\n", s.first.c_str(), m_zipPath.c_str());
}
}
RuntimeError("Cannot retrieve image data for some sequences. For more detail, please see the log file.");
}
} }
cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscale) cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscale)

Просмотреть файл

@ -22,17 +22,10 @@
== Preeliminaries == == Preeliminaries ==
To build the cpu version, you have to install intel MKL blas library To build the cpu version, you have to install intel MKL blas library
or ACML library first. Note that ACML is free, whereas MKL may not be.
for MKL: for MKL:
1. Download from https://software.intel.com/en-us/intel-mkl 1. Download from https://software.intel.com/en-us/intel-mkl
for ACML:
1. Download from
http://developer.amd.com/tools-and-sdks/archive/amd-core-math-library-acml/acml-downloads-resources/
We have seen some problems with some versions of the library on Intel
processors, but have had success with acml-5-3-1-ifort-64bit.tgz
for Kaldi: for Kaldi:
1. In kaldi-trunk/tools/Makefile, uncomment # OPENFST_VERSION = 1.4.1, and 1. In kaldi-trunk/tools/Makefile, uncomment # OPENFST_VERSION = 1.4.1, and
re-install OpenFst using the makefile. re-install OpenFst using the makefile.
@ -54,8 +47,7 @@ build in the directory "build" type
(For an in source build, just run configure in the $CNTK directory). (For an in source build, just run configure in the $CNTK directory).
You will see various options for configure, as well as their default You will see various options for configure, as well as their default
values. CNTK needs a CPU math directory, either acml or mkl. If you values. CNTK needs a CPU math library (mkl). For GPU
do not specify one and both are available, acml will be used. For GPU
use, a cuda and gdk directory are also required. Similary, to build use, a cuda and gdk directory are also required. Similary, to build
the kaldi plugin a kaldi directory is required. You may also specify the kaldi plugin a kaldi directory is required. You may also specify
whether you want a debug or release build, as well as add additional whether you want a debug or release build, as well as add additional

Просмотреть файл

@ -171,6 +171,12 @@ public:
// setup all the state variables and state tables for state machine // setup all the state variables and state tables for state machine
void Init(); void Init();
// convenience function for setting the flags
inline unsigned int SetSequenceFlags()
{
return (m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak;
}
// Parser destructor // Parser destructor
~SequenceParser(); ~SequenceParser();
@ -334,8 +340,7 @@ public:
case EndOfLine: case EndOfLine:
if (seqPos) if (seqPos)
{ {
SequencePosition sequencePos(numbers->size(), labels->size(), SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
(m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak);
// add a sequence element to the list // add a sequence element to the list
seqPos->push_back(sequencePos); seqPos->push_back(sequencePos);
sequencePositionLast = sequencePos; sequencePositionLast = sequencePos;
@ -429,8 +434,7 @@ public:
// this could probably be fixed by taking another pass through the loop above, but this is easier // this could probably be fixed by taking another pass through the loop above, but this is easier
if (seqPos) if (seqPos)
{ {
SequencePosition sequencePos(numbers->size(), labels->size(), SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
// add the final sequence element if needed // add the final sequence element if needed
if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos)) if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos))
{ {
@ -510,6 +514,7 @@ public:
using SequenceParser<NumType, LabelType>::m_totalNumbersConverted; using SequenceParser<NumType, LabelType>::m_totalNumbersConverted;
using SequenceParser<NumType, LabelType>::m_dimLabelsOut; using SequenceParser<NumType, LabelType>::m_dimLabelsOut;
using SequenceParser<NumType, LabelType>::m_bufferStart; using SequenceParser<NumType, LabelType>::m_bufferStart;
using SequenceParser<NumType, LabelType>::SetSequenceFlags;
LMSequenceParser() LMSequenceParser()
{ {
mFile = nullptr; mFile = nullptr;
@ -594,8 +599,7 @@ public:
labels->push_back(std::move(vstr[i])); // TODO: is this an entire sequence, or multiple columns describing a single token? labels->push_back(std::move(vstr[i])); // TODO: is this an entire sequence, or multiple columns describing a single token?
// add a sequence element to the list // add a sequence element to the list
SequencePosition sequencePos(numbers->size(), labels->size(), SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
seqPos->push_back(sequencePos); seqPos->push_back(sequencePos);
lineCount++; lineCount++;

Просмотреть файл

@ -80,8 +80,8 @@ void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
#ifdef _DEBUG #ifdef _DEBUG
size_t epochStartFrame = config.m_epochIndex * m_epochSize; size_t epochStartFrame = config.m_epochIndex * m_epochSize;
fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": frames [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), data subset %" PRIu64 " of %" PRIu64 "\n", fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": samples [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), worker rank %" PRIu64 ", total workers %" PRIu64 "\n",
config.m_epochIndex, config.m_epochIndex + 1,
epochStartFrame, epochStartFrame,
epochStartFrame + m_epochSize, epochStartFrame + m_epochSize,
m_globalSamplePosition, m_globalSamplePosition,
@ -107,7 +107,7 @@ void BlockRandomizer::PrepareNewSweepIfNeeded(size_t samplePosition)
m_chunkRandomizer->Randomize((unsigned int)m_sweep); m_chunkRandomizer->Randomize((unsigned int)m_sweep);
// Resetting sequence randomizer. // Resetting sequence randomizer.
m_sequenceRandomizer->Reset(m_sweep + 1); m_sequenceRandomizer->Reset(m_sweep);
m_lastSeenChunkId = CHUNKID_MAX; m_lastSeenChunkId = CHUNKID_MAX;
} }
} }
@ -138,8 +138,8 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
if (m_verbosity >= Debug) if (m_verbosity >= Debug)
fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n", fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n",
sequences.size(),
decimated.size(), decimated.size(),
sequences.size(),
sampleCount, sampleCount,
m_sweep); m_sweep);

Просмотреть файл

@ -10,25 +10,6 @@
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
// NOTE: This is an old code, used for legacy randomization to make sure we preserve the same behavior for the tests.
// TODO: Deprecate when the new randomizer is in place.
template <typename TVector>
void RandomShuffle(TVector& v, size_t randomSeed)
{
if (v.size() > RAND_MAX * static_cast<size_t>(RAND_MAX))
{
RuntimeError("RandomShuffle: too large set: need to change to different random generator!");
}
srand(static_cast<unsigned int>(randomSeed));
foreach_index(currentLocation, v)
{
// Pick a random location a location and swap with current
const size_t randomLocation = rand(0, v.size());
std::swap(v[currentLocation], v[randomLocation]);
}
}
ChunkRandomizer::ChunkRandomizer(IDataDeserializerPtr deserializer, size_t randomizationRangeInSamples, bool legacy) : ChunkRandomizer::ChunkRandomizer(IDataDeserializerPtr deserializer, size_t randomizationRangeInSamples, bool legacy) :
m_deserializer(deserializer), m_legacy(legacy), m_randomizationRangeInSamples(randomizationRangeInSamples) m_deserializer(deserializer), m_legacy(legacy), m_randomizationRangeInSamples(randomizationRangeInSamples)
{ {
@ -52,15 +33,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
randomizedChunkIndices.push_back(i); randomizedChunkIndices.push_back(i);
} }
if (m_legacy) m_rng.seed(seed);
{ RandomShuffleMT(randomizedChunkIndices, m_rng);
RandomShuffle(randomizedChunkIndices, seed);
}
else
{
std::mt19937 m_rng(static_cast<int>(seed));
std::shuffle(randomizedChunkIndices.begin(), randomizedChunkIndices.end(), m_rng);
}
// Place randomized chunks on the timeline // Place randomized chunks on the timeline
m_randomizedChunks.clear(); m_randomizedChunks.clear();

Просмотреть файл

@ -7,6 +7,7 @@
#include <vector> #include <vector>
#include "DataDeserializer.h" #include "DataDeserializer.h"
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
@ -68,6 +69,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool m_legacy; bool m_legacy;
// Randomization range in samples. // Randomization range in samples.
size_t m_randomizationRangeInSamples; size_t m_randomizationRangeInSamples;
std::mt19937_64 m_rng;
}; };
typedef std::shared_ptr<ChunkRandomizer> ChunkRandomizerPtr; typedef std::shared_ptr<ChunkRandomizer> ChunkRandomizerPtr;

Просмотреть файл

@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Resets the current sweep according to the randomization seed provided. // Resets the current sweep according to the randomization seed provided.
void SequenceRandomizer::Reset(size_t randSeed) void SequenceRandomizer::Reset(size_t randSeed)
{ {
srand((unsigned int)randSeed); m_rng.seed((unsigned long)randSeed);
m_sequenceWindow.clear(); m_sequenceWindow.clear();
m_chunkWindow.clear(); m_chunkWindow.clear();
@ -197,7 +197,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (;;) for (;;)
{ {
// Pick a sequence position from [posBegin, posEnd) // Pick a sequence position from [posBegin, posEnd)
const size_t j = rand(posBegin, posEnd); const size_t j = RandMT(posBegin, posEnd, m_rng);
// Pick up j sequence. // Pick up j sequence.
ChunkIdType jChunkIndex = GetChunkIndexForSequencePosition(j); ChunkIdType jChunkIndex = GetChunkIndexForSequencePosition(j);

Просмотреть файл

@ -11,6 +11,7 @@
#include "DataDeserializer.h" #include "DataDeserializer.h"
#include "ChunkRandomizer.h" #include "ChunkRandomizer.h"
#include <deque> #include <deque>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK { namespace Microsoft { namespace MSR { namespace CNTK {
@ -164,6 +165,8 @@ private:
// General configuration // General configuration
int m_verbosity; int m_verbosity;
std::mt19937_64 m_rng;
}; };
typedef std::shared_ptr<SequenceRandomizer> SequenceRandomizerPtr; typedef std::shared_ptr<SequenceRandomizer> SequenceRandomizerPtr;

Просмотреть файл

@ -40,30 +40,10 @@ template SGD<double>::SGD(const ScriptableObjects::IConfigRecord&);
// ----------------------------------------------------------------------- // -----------------------------------------------------------------------
template <class ElemType> template <class ElemType>
void SGD<ElemType>::Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId, void SGD<ElemType>::Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
IDataReader* trainSetDataReader, IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader, IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint)
const bool makeMode)
{ {
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
int startEpoch = DetermineStartEpoch(makeMode);
if (startEpoch == m_maxEpochs)
{
LOGPRINTF(stderr, "No further training is necessary.\n");
return;
}
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
bool loadNetworkFromCheckpoint = startEpoch >= 0;
fprintf(stderr, "\n");
if (loadNetworkFromCheckpoint)
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
else
LOGPRINTF(stderr, "Creating virgin network.\n");
// create or load from checkpoint
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
// log the device we are computing on // log the device we are computing on
LOGPRINTF(stderr, "%s model with %d nodes", loadNetworkFromCheckpoint ? "Loaded" : "Created", (int)net->GetTotalNumberOfNodes()); LOGPRINTF(stderr, "%s model with %d nodes", loadNetworkFromCheckpoint ? "Loaded" : "Created", (int)net->GetTotalNumberOfNodes());
if (net->GetDeviceId() < 0) if (net->GetDeviceId() < 0)

Просмотреть файл

@ -110,6 +110,8 @@ struct SGDParams : public ScriptableObjects::Object
// SGDParams(SGDParams&&) = default; // (does not compile in VS 2013; not critical) // SGDParams(SGDParams&&) = default; // (does not compile in VS 2013; not critical)
size_t GetMaxEpochs() { return m_maxEpochs; }
protected: protected:
// learning rate per sample provided outside // learning rate per sample provided outside
floatargvector m_learningRatesParam; floatargvector m_learningRatesParam;
@ -342,10 +344,9 @@ public:
m_parallelizationMethod = ParallelizationMethod::none; m_parallelizationMethod = ParallelizationMethod::none;
} }
void Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId, void Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
IDataReader* trainSetDataReader, IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader, IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint);
const bool makeMode = true);
void Adapt(wstring origModelFileName, wstring refNodeName, void Adapt(wstring origModelFileName, wstring refNodeName,
IDataReader* trainSetDataReader, IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader, IDataReader* validationSetDataReader,
@ -483,6 +484,10 @@ public:
const double L1RegWeight, const double L1RegWeight,
const bool needAveMultiplier, const bool needAveMultiplier,
const bool useNesterovMomentum); const bool useNesterovMomentum);
// return -1 if nothing exists
int DetermineStartEpoch(const bool makeMode);
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
protected: protected:
// UpdateWeights - update the weights in // UpdateWeights - update the weights in
@ -517,10 +522,6 @@ protected:
/*out*/ size_t& minibatchSize); /*out*/ size_t& minibatchSize);
wstring GetCheckPointFileNameForEpoch(const int epoch); wstring GetCheckPointFileNameForEpoch(const int epoch);
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
// return -1 if nothing exists
int DetermineStartEpoch(const bool makeMode);
GradientsUpdateType GradUpdateType() const GradientsUpdateType GradUpdateType() const
{ {

Просмотреть файл

@ -180,7 +180,7 @@ public:
m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) { m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) {
DistGradHeader::Destroy(ptr); DistGradHeader::Destroy(ptr);
}); });
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false, m_traceLevel); m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/);
} }
m_gradHeader->numEvalNode = evalNodes.size(); m_gradHeader->numEvalNode = evalNodes.size();

Просмотреть файл

@ -21,6 +21,7 @@ mkdir $DataDir
cp -R $DataSourceDir/MNIST/v0/Train-28x28_cntk_text.txt $DataDir || exit $? cp -R $DataSourceDir/MNIST/v0/Train-28x28_cntk_text.txt $DataDir || exit $?
cp -R $DataSourceDir/CIFAR/v0/cifar-10-batches-py $DataDir || exit $? cp -R $DataSourceDir/CIFAR/v0/cifar-10-batches-py $DataDir || exit $?
cp -R $TEST_DIR/../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt $DataDir || exit $? cp -R $TEST_DIR/../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt $DataDir || exit $?
cp -R $TEST_DIR/../../Text/SequenceClassification/Data/Train.ctf $DataDir || exit $?
pushd $DataDir pushd $DataDir

Просмотреть файл

@ -272,8 +272,8 @@ Post-processing network...
4 roots: 4 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
errTop1 = ClassificationError() errTop1 = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 17 nodes to process in pass 1. Validating network. 17 nodes to process in pass 1.
@ -292,9 +292,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1] Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1] Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2. Validating network. 9 nodes to process in pass 2.
@ -314,8 +314,8 @@ Post-processing network complete.
05/13/2016 15:10:02: Evaluation criterion node(s): 05/13/2016 15:10:02: Evaluation criterion node(s):
05/13/2016 15:10:02: errTop1 = ClassificationError 05/13/2016 15:10:02: errTop1 = ErrorPrediction
05/13/2016 15:10:02: err = ClassificationError 05/13/2016 15:10:02: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -390,8 +390,8 @@ Post-processing network...
4 roots: 4 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
errTop1 = ClassificationError() errTop1 = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 17 nodes to process in pass 1. Validating network. 17 nodes to process in pass 1.
@ -410,9 +410,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1] Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1] Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2. Validating network. 9 nodes to process in pass 2.

Просмотреть файл

@ -270,8 +270,8 @@ Post-processing network...
4 roots: 4 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
errTop1 = ClassificationError() errTop1 = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 17 nodes to process in pass 1. Validating network. 17 nodes to process in pass 1.
@ -290,9 +290,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1] Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1] Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2. Validating network. 9 nodes to process in pass 2.
@ -312,8 +312,8 @@ Post-processing network complete.
05/13/2016 08:15:53: Evaluation criterion node(s): 05/13/2016 08:15:53: Evaluation criterion node(s):
05/13/2016 08:15:53: errTop1 = ClassificationError 05/13/2016 08:15:53: errTop1 = ErrorPrediction
05/13/2016 08:15:53: err = ClassificationError 05/13/2016 08:15:53: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -388,8 +388,8 @@ Post-processing network...
4 roots: 4 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
errTop1 = ClassificationError() errTop1 = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 17 nodes to process in pass 1. Validating network. 17 nodes to process in pass 1.
@ -408,9 +408,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1] Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1] Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2. Validating network. 9 nodes to process in pass 2.

Просмотреть файл

@ -284,7 +284,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 27 nodes to process in pass 1. Validating network. 27 nodes to process in pass 1.
@ -315,7 +315,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.
@ -343,7 +343,7 @@ Post-processing network complete.
05/13/2016 15:10:11: Evaluation criterion node(s): 05/13/2016 15:10:11: Evaluation criterion node(s):
05/13/2016 15:10:11: err = ClassificationError 05/13/2016 15:10:11: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -429,7 +429,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 27 nodes to process in pass 1. Validating network. 27 nodes to process in pass 1.
@ -460,7 +460,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -282,7 +282,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 27 nodes to process in pass 1. Validating network. 27 nodes to process in pass 1.
@ -313,7 +313,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.
@ -341,7 +341,7 @@ Post-processing network complete.
05/13/2016 08:16:18: Evaluation criterion node(s): 05/13/2016 08:16:18: Evaluation criterion node(s):
05/13/2016 08:16:18: err = ClassificationError 05/13/2016 08:16:18: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -427,7 +427,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 27 nodes to process in pass 1. Validating network. 27 nodes to process in pass 1.
@ -458,7 +458,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -287,7 +287,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 36 nodes to process in pass 1. Validating network. 36 nodes to process in pass 1.
@ -329,7 +329,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.
@ -363,7 +363,7 @@ Post-processing network complete.
05/13/2016 15:10:29: Evaluation criterion node(s): 05/13/2016 15:10:29: Evaluation criterion node(s):
05/13/2016 15:10:29: err = ClassificationError 05/13/2016 15:10:29: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -462,7 +462,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 36 nodes to process in pass 1. Validating network. 36 nodes to process in pass 1.
@ -502,7 +502,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -285,7 +285,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 36 nodes to process in pass 1. Validating network. 36 nodes to process in pass 1.
@ -327,7 +327,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.
@ -361,7 +361,7 @@ Post-processing network complete.
05/13/2016 08:16:58: Evaluation criterion node(s): 05/13/2016 08:16:58: Evaluation criterion node(s):
05/13/2016 08:16:58: err = ClassificationError 05/13/2016 08:16:58: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -460,7 +460,7 @@ Post-processing network...
3 roots: 3 roots:
ce = CrossEntropyWithSoftmax() ce = CrossEntropyWithSoftmax()
err = ClassificationError() err = ErrorPrediction()
ol.z = Plus() ol.z = Plus()
Validating network. 36 nodes to process in pass 1. Validating network. 36 nodes to process in pass 1.
@ -500,7 +500,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
Validating --> ol.b = LearnableParameter() : -> [10 x 1] Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1] Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2. Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -1,49 +1,62 @@
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=- CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
------------------------------------------------------------------- -------------------------------------------------------------------
Build info: Build info:
Built time: May 13 2016 14:50:25 Built time: Aug 16 2016 09:41:56
Last modified date: Thu May 12 14:00:37 2016 Last modified date: Fri Aug 12 07:32:43 2016
Build type: release Build type: release
Build target: GPU Build target: GPU
With 1bit-SGD: no With 1bit-SGD: no
Math lib: acml Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5 CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1 CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0 CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD Build Branch: HEAD
Build SHA1: 35fadc316f045d843bbd9b85061250a959268787 Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on d8dc82703b0f Built by philly on f67b30a647de
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
------------------------------------------------------------------- -------------------------------------------------------------------
Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData Changed current directory to /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
05/13/2016 15:10:47: Redirecting stderr to file -_Train_Test.log 08/16/2016 10:50:36: Redirecting stderr to file -_Train_Test.log
05/13/2016 15:10:47: ------------------------------------------------------------------- 08/16/2016 10:50:36: -------------------------------------------------------------------
05/13/2016 15:10:47: Build info: 08/16/2016 10:50:36: Build info:
05/13/2016 15:10:47: Built time: May 13 2016 14:50:25 08/16/2016 10:50:36: Built time: Aug 16 2016 09:41:56
05/13/2016 15:10:47: Last modified date: Thu May 12 14:00:37 2016 08/16/2016 10:50:36: Last modified date: Fri Aug 12 07:32:43 2016
05/13/2016 15:10:47: Build type: release 08/16/2016 10:50:36: Build type: release
05/13/2016 15:10:47: Build target: GPU 08/16/2016 10:50:36: Build target: GPU
05/13/2016 15:10:47: With 1bit-SGD: no 08/16/2016 10:50:36: With 1bit-SGD: no
05/13/2016 15:10:47: Math lib: acml 08/16/2016 10:50:36: Math lib: mkl
05/13/2016 15:10:47: CUDA_PATH: /usr/local/cuda-7.5 08/16/2016 10:50:36: CUDA_PATH: /usr/local/cuda-7.5
05/13/2016 15:10:47: CUB_PATH: /usr/local/cub-1.4.1 08/16/2016 10:50:36: CUB_PATH: /usr/local/cub-1.4.1
05/13/2016 15:10:47: CUDNN_PATH: /usr/local/cudnn-4.0 08/16/2016 10:50:36: CUDNN_PATH: /usr/local/cudnn-4.0
05/13/2016 15:10:47: Build Branch: HEAD 08/16/2016 10:50:36: Build Branch: HEAD
05/13/2016 15:10:47: Build SHA1: 35fadc316f045d843bbd9b85061250a959268787 08/16/2016 10:50:36: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
05/13/2016 15:10:47: Built by philly on d8dc82703b0f 08/16/2016 10:50:36: Built by philly on f67b30a647de
05/13/2016 15:10:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux 08/16/2016 10:50:36: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
05/13/2016 15:10:47: ------------------------------------------------------------------- 08/16/2016 10:50:36: -------------------------------------------------------------------
08/16/2016 10:50:37: -------------------------------------------------------------------
08/16/2016 10:50:37: GPU info:
05/13/2016 15:10:47: Running on localhost at 2016/05/13 15:10:47 08/16/2016 10:50:37: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
05/13/2016 15:10:47: Command line: 08/16/2016 10:50:37: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=- 08/16/2016 10:50:37: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: -------------------------------------------------------------------
08/16/2016 10:50:37: Running on localhost at 2016/08/16 10:50:37
08/16/2016 10:50:37: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> 08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/13/2016 15:10:47: RootDir = "." 08/16/2016 10:50:37: RootDir = "."
ConfigDir = "$RootDir$" ConfigDir = "$RootDir$"
DataDir = "$RootDir$" DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output" OutputDir = "$RootDir$/Output"
@ -53,7 +66,6 @@ precision = "float"
deviceId = 0 deviceId = 0
imageLayout = "cudnn" imageLayout = "cudnn"
initOnCPUOnly=true initOnCPUOnly=true
prefetch = "true"
command = Train:Test command = Train:Test
modelPath = "$ModelDir$/01_Convolution" modelPath = "$ModelDir$/01_Convolution"
stderr = "$OutputDir$/01_Conv" stderr = "$OutputDir$/01_Conv"
@ -86,7 +98,7 @@ Train = [
format = "dense" format = "dense"
] ]
] ]
] ]
] ]
Test = [ Test = [
action = "test" action = "test"
@ -104,42 +116,41 @@ Test = [
format = "dense" format = "dense"
] ]
] ]
] ]
] ]
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DeviceId=0 DeviceId=0
timestamping=true timestamping=true
Train=[SGD=[maxEpochs=10]] Train=[SGD=[maxEpochs=10]]
Train=[SGD=[epochSize=100]] Train=[SGD=[epochSize=100]]
stderr=- stderr=-
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< 08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/13/2016 15:10:47: RootDir = "." 08/16/2016 10:50:37: RootDir = "."
ConfigDir = "." ConfigDir = "."
DataDir = "." DataDir = "."
OutputDir = "./Output" OutputDir = "./Output"
ModelDir = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models" ModelDir = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models"
ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl" ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl"
precision = "float" precision = "float"
deviceId = 0 deviceId = 0
imageLayout = "cudnn" imageLayout = "cudnn"
initOnCPUOnly=true initOnCPUOnly=true
prefetch = "true"
command = Train:Test command = Train:Test
modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution" modelPath = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution"
stderr = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv" stderr = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv"
traceLevel = 1 traceLevel = 1
numMBsToShowResult = 500 numMBsToShowResult = 500
Train = [ Train = [
action = "train" action = "train"
NDLNetworkBuilder = [ NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl" networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
] ]
SGD = [ SGD = [
epochSize = 49984 epochSize = 49984
@ -152,7 +163,7 @@ Train = [
] ]
reader = [ reader = [
readerType = "CNTKTextFormatReader" readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt" file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
input = [ input = [
features = [ features = [
dim = 3072 dim = 3072
@ -163,14 +174,14 @@ Train = [
format = "dense" format = "dense"
] ]
] ]
] ]
] ]
Test = [ Test = [
action = "test" action = "test"
minibatchSize = 16 minibatchSize = 16
reader = [ reader = [
readerType = "CNTKTextFormatReader" readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt" file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
input = [ input = [
features = [ features = [
dim = 3072 dim = 3072
@ -181,45 +192,44 @@ Test = [
format = "dense" format = "dense"
] ]
] ]
] ]
] ]
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DeviceId=0 DeviceId=0
timestamping=true timestamping=true
Train=[SGD=[maxEpochs=10]] Train=[SGD=[maxEpochs=10]]
Train=[SGD=[epochSize=100]] Train=[SGD=[epochSize=100]]
stderr=- stderr=-
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: 01_Conv.cntk:command=Train:Test configparameters: 01_Conv.cntk:command=Train:Test
configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:deviceId=0 configparameters: 01_Conv.cntk:deviceId=0
configparameters: 01_Conv.cntk:imageLayout=cudnn configparameters: 01_Conv.cntk:imageLayout=cudnn
configparameters: 01_Conv.cntk:initOnCPUOnly=true configparameters: 01_Conv.cntk:initOnCPUOnly=true
configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models
configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl
configparameters: 01_Conv.cntk:numMBsToShowResult=500 configparameters: 01_Conv.cntk:numMBsToShowResult=500
configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:precision=float configparameters: 01_Conv.cntk:precision=float
configparameters: 01_Conv.cntk:prefetch=true
configparameters: 01_Conv.cntk:RootDir=. configparameters: 01_Conv.cntk:RootDir=.
configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:stderr=- configparameters: 01_Conv.cntk:stderr=-
configparameters: 01_Conv.cntk:Test=[ configparameters: 01_Conv.cntk:Test=[
action = "test" action = "test"
minibatchSize = 16 minibatchSize = 16
reader = [ reader = [
readerType = "CNTKTextFormatReader" readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt" file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
input = [ input = [
features = [ features = [
dim = 3072 dim = 3072
@ -230,7 +240,7 @@ configparameters: 01_Conv.cntk:Test=[
format = "dense" format = "dense"
] ]
] ]
] ]
] ]
configparameters: 01_Conv.cntk:timestamping=true configparameters: 01_Conv.cntk:timestamping=true
@ -238,7 +248,7 @@ configparameters: 01_Conv.cntk:traceLevel=1
configparameters: 01_Conv.cntk:Train=[ configparameters: 01_Conv.cntk:Train=[
action = "train" action = "train"
NDLNetworkBuilder = [ NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl" networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
] ]
SGD = [ SGD = [
epochSize = 49984 epochSize = 49984
@ -251,7 +261,7 @@ configparameters: 01_Conv.cntk:Train=[
] ]
reader = [ reader = [
readerType = "CNTKTextFormatReader" readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt" file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
input = [ input = [
features = [ features = [
dim = 3072 dim = 3072
@ -262,33 +272,57 @@ configparameters: 01_Conv.cntk:Train=[
format = "dense" format = "dense"
] ]
] ]
] ]
] [SGD=[maxEpochs=10]] [SGD=[epochSize=100]] ] [SGD=[maxEpochs=10]] [SGD=[epochSize=100]]
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: Commands: Train Test 08/16/2016 10:50:37: Commands: Train Test
05/13/2016 15:10:47: Precision = "float" 08/16/2016 10:50:37: Precision = "float"
05/13/2016 15:10:47: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution 08/16/2016 10:50:37: CNTKModelPath: /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
05/13/2016 15:10:47: CNTKCommandTrainInfo: Train : 10 08/16/2016 10:50:37: CNTKCommandTrainInfo: Train : 10
05/13/2016 15:10:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10 08/16/2016 10:50:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10
05/13/2016 15:10:47: ############################################################################## 08/16/2016 10:50:37: ##############################################################################
05/13/2016 15:10:47: # # 08/16/2016 10:50:37: # #
05/13/2016 15:10:47: # Action "train" # 08/16/2016 10:50:37: # Action "train" #
05/13/2016 15:10:47: # # 08/16/2016 10:50:37: # #
05/13/2016 15:10:47: ############################################################################## 08/16/2016 10:50:37: ##############################################################################
05/13/2016 15:10:47: CNTKCommandTrainBegin: Train 08/16/2016 10:50:37: CNTKCommandTrainBegin: Train
NDLBuilder Using GPU 0 NDLBuilder Using GPU 0
05/13/2016 15:10:47: Creating virgin network. 08/16/2016 10:50:37: Creating virgin network.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 0.000000.
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- 0.000000.
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- 0.000000.
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- 0.000000.
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- 0.000000.
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- 0.000000.
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- gaussian(seed=1, range=0.023094*0.004300, onCPU=false).
SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4 SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- gaussian(seed=2, range=0.007071*1.414000, onCPU=false).
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- gaussian(seed=3, range=0.007071*1.414000, onCPU=false).
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- gaussian(seed=4, range=0.008333*12.000000, onCPU=false).
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- gaussian(seed=5, range=0.025000*1.500000, onCPU=false).
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
Post-processing network... Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1. Validating network. 34 nodes to process in pass 1.
@ -326,7 +360,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 21 nodes to process in pass 2. Validating network. 21 nodes to process in pass 2.
@ -334,165 +368,183 @@ Validating network. 21 nodes to process in pass 2.
Validating network, final pass. Validating network, final pass.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
13 out of 34 nodes do not share the minibatch layout with the input data. 13 out of 34 nodes do not share the minibatch layout with the input data.
Post-processing network complete. Post-processing network complete.
05/13/2016 15:10:48: Created model with 34 nodes on GPU 0. 08/16/2016 10:50:38: Created model with 34 nodes on GPU 0.
05/13/2016 15:10:48: Training criterion node(s): 08/16/2016 10:50:38: Training criterion node(s):
05/13/2016 15:10:48: CE = CrossEntropyWithSoftmax 08/16/2016 10:50:38: CE = CrossEntropyWithSoftmax
05/13/2016 15:10:48: Evaluation criterion node(s): 08/16/2016 10:50:38: Evaluation criterion node(s):
08/16/2016 10:50:38: Err = ErrorPrediction
05/13/2016 15:10:48: Err = ClassificationError
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing: Out of 63 matrices, 38 are shared as 17, and 25 are not shared.
(nil): {[Err Gradient[1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *]] [features Gradient[32 x 32 x 3 x *]] [labels Gradient[10 x *]] } { conv1_act.W : [32 x 75] (gradient)
0x2485d28: {[OutputNodes.z Value[10 x 1 x *]] } conv1_act.p : [32 x 32 x 32 x *] }
0x2485ee8: {[CE Value[1]] } { conv1_act.c : [32 x 32 x 32 x *] (gradient)
0x2486168: {[conv1_act.W Gradient[32 x 75]] [conv1_act.p Value[32 x 32 x 32 x *]] } conv1_act.y : [32 x 32 x 32 x *] }
0x2486328: {[conv1_act.c Gradient[32 x 32 x 32 x *]] [conv1_act.y Value[32 x 32 x 32 x *]] } { conv1_act.p : [32 x 32 x 32 x *] (gradient)
0x24864e8: {[conv1_act.p Gradient[32 x 32 x 32 x *]] [pool1 Value[15 x 15 x 32 x *]] } pool1 : [15 x 15 x 32 x *] }
0x249a638: {[features Value[32 x 32 x 3 x *]] } { conv1_act.b : [1 x 1 x 32] (gradient)
0x2975298: {[conv1_act.b Value[1 x 1 x 32]] } conv1_act.y : [32 x 32 x 32 x *] (gradient) }
0x2976b48: {[conv2_act.W Value[32 x 800]] } { conv2_act.W : [32 x 800] (gradient)
0x2977ae8: {[conv2_act.b Value[1 x 1 x 32]] } conv2_act.p : [15 x 15 x 32 x *] }
0x2979668: {[conv3_act.W Value[64 x 800]] } { conv2_act.c : [15 x 15 x 32 x *] (gradient)
0x2979f08: {[conv3_act.b Value[1 x 1 x 64]] } conv2_act.y : [15 x 15 x 32 x *] }
0x297bae8: {[h1.W Value[64 x 3 x 3 x 64]] } { conv2_act.p : [15 x 15 x 32 x *] (gradient)
0x297c538: {[h1.b Value[64 x 1]] } pool1 : [15 x 15 x 32 x *] (gradient)
0x297d5c8: {[OutputNodes.W Value[10 x 64]] } pool2 : [7 x 7 x 32 x *] }
0x297ea98: {[OutputNodes.b Value[10]] } { conv2_act.b : [1 x 1 x 32] (gradient)
0x2dd1458: {[featOffs Value[1 x 1]] } conv2_act.y : [15 x 15 x 32 x *] (gradient) }
0x2dd2678: {[labels Value[10 x *]] } { conv3_act.W : [64 x 800] (gradient)
0x2dd2eb8: {[conv1_act.W Value[32 x 75]] } conv3_act.p : [7 x 7 x 64 x *] }
0x7a59dd8: {[Err Value[1]] } { conv3_act.c : [7 x 7 x 64 x *] (gradient)
0x7a5d378: {[featScaled Value[32 x 32 x 3 x *]] } conv3_act.y : [7 x 7 x 64 x *] }
0x7a5d6d8: {[conv1_act.c Value[32 x 32 x 32 x *]] } { conv3_act.p : [7 x 7 x 64 x *] (gradient)
0x7a5e478: {[conv2_act.c Value[15 x 15 x 32 x *]] } pool2 : [7 x 7 x 32 x *] (gradient)
0x7a5e638: {[conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.y Gradient[32 x 32 x 32 x *]] } pool3 : [3 x 3 x 64 x *] }
0x7a5e7f8: {[conv2_act.W Gradient[32 x 800]] [conv2_act.p Value[15 x 15 x 32 x *]] } { conv3_act.b : [1 x 1 x 64] (gradient)
0x7a7ade8: {[conv2_act.c Gradient[15 x 15 x 32 x *]] [conv2_act.y Value[15 x 15 x 32 x *]] } conv3_act.y : [7 x 7 x 64 x *] (gradient)
0x7a7afa8: {[conv2_act.p Gradient[15 x 15 x 32 x *]] [pool1 Gradient[15 x 15 x 32 x *]] [pool2 Value[7 x 7 x 32 x *]] } h1.t : [64 x *] }
0x7a7b168: {[conv3_act.c Value[7 x 7 x 64 x *]] } { h1.W : [64 x 3 x 3 x 64] (gradient)
0x7a7b328: {[conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.y Gradient[15 x 15 x 32 x *]] } h1.z : [64 x 1 x *] }
0x7a7b4e8: {[conv3_act.W Gradient[64 x 800]] [conv3_act.p Value[7 x 7 x 64 x *]] } { h1.t : [64 x *] (gradient)
0x7a7b6a8: {[conv3_act.c Gradient[7 x 7 x 64 x *]] [conv3_act.y Value[7 x 7 x 64 x *]] } h1.y : [64 x 1 x *] }
0x7a7b868: {[conv3_act.p Gradient[7 x 7 x 64 x *]] [pool2 Gradient[7 x 7 x 32 x *]] [pool3 Value[3 x 3 x 64 x *]] } { h1.z : [64 x 1 x *] (gradient)
0x7a7ba28: {[conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.y Gradient[7 x 7 x 64 x *]] [h1.t Value[64 x *]] } pool3 : [3 x 3 x 64 x *] (gradient) }
0x7a7bbe8: {[h1.W Gradient[64 x 3 x 3 x 64]] [h1.z Value[64 x 1 x *]] } { OutputNodes.t : [10 x 1 x *]
0x7a7bda8: {[h1.t Gradient[64 x *]] [h1.y Value[64 x 1 x *]] } h1.b : [64 x 1] (gradient)
0x7a7bf68: {[h1_d Value[64 x 1 x *]] } h1.y : [64 x 1 x *] (gradient) }
0x7a7c128: {[h1.z Gradient[64 x 1 x *]] [pool3 Gradient[3 x 3 x 64 x *]] } { OutputNodes.W : [10 x 64] (gradient)
0x7a7c2e8: {[OutputNodes.t Value[10 x 1 x *]] [h1.b Gradient[64 x 1]] [h1.y Gradient[64 x 1 x *]] } OutputNodes.z : [10 x 1 x *] (gradient) }
0x7a7cdc8: {[CE Gradient[1]] }
0x7a7cf88: {[OutputNodes.W Gradient[10 x 64]] [OutputNodes.z Gradient[10 x 1 x *]] }
0x7a7d148: {[OutputNodes.t Gradient[10 x 1 x *]] }
0x7a7d308: {[OutputNodes.b Gradient[10]] }
0x7a7d4c8: {[h1_d Gradient[64 x 1 x *]] }
05/13/2016 15:10:48: No PreCompute nodes found, skipping PreCompute step.
05/13/2016 15:10:48: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:38: Training 116906 parameters in 10 out of 10 parameter tensors and 29 nodes with gradient:
05/13/2016 15:10:48: Starting minibatch loop. 08/16/2016 10:50:38: Node 'OutputNodes.W' (LearnableParameter operation) : [10 x 64]
05/13/2016 15:10:51: Finished Epoch[ 1 of 10]: [Training] CE = 2.30242050 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.55904s 08/16/2016 10:50:38: Node 'OutputNodes.b' (LearnableParameter operation) : [10]
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1' 08/16/2016 10:50:38: Node 'conv1_act.W' (LearnableParameter operation) : [32 x 75]
08/16/2016 10:50:38: Node 'conv1_act.b' (LearnableParameter operation) : [1 x 1 x 32]
08/16/2016 10:50:38: Node 'conv2_act.W' (LearnableParameter operation) : [32 x 800]
08/16/2016 10:50:38: Node 'conv2_act.b' (LearnableParameter operation) : [1 x 1 x 32]
08/16/2016 10:50:38: Node 'conv3_act.W' (LearnableParameter operation) : [64 x 800]
08/16/2016 10:50:38: Node 'conv3_act.b' (LearnableParameter operation) : [1 x 1 x 64]
08/16/2016 10:50:38: Node 'h1.W' (LearnableParameter operation) : [64 x 3 x 3 x 64]
08/16/2016 10:50:38: Node 'h1.b' (LearnableParameter operation) : [64 x 1]
05/13/2016 15:10:51: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:38: No PreCompute nodes found, or all already computed. Skipping pre-computation step.
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:38: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
05/13/2016 15:10:51: Finished Epoch[ 2 of 10]: [Training] CE = 2.30175842 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.011903s BlockRandomizer::StartEpoch: epoch 0: frames [0..100] (first sequence at sample 0), data subset 0 of 1
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
05/13/2016 15:10:51: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:38: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 1 of 10]: [Training] CE = 2.30223602 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.51082s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1'
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:41: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
05/13/2016 15:10:51: Finished Epoch[ 3 of 10]: [Training] CE = 2.30054413 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012701s BlockRandomizer::StartEpoch: epoch 1: frames [100..200] (first sequence at sample 100), data subset 0 of 1
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
05/13/2016 15:10:51: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 2 of 10]: [Training] CE = 2.30189240 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.012555s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:41: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
05/13/2016 15:10:51: Finished Epoch[ 4 of 10]: [Training] CE = 2.30022812 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.01144s BlockRandomizer::StartEpoch: epoch 2: frames [200..300] (first sequence at sample 200), data subset 0 of 1
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
05/13/2016 15:10:51: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 3 of 10]: [Training] CE = 2.29965256 * 100; Err = 0.86000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012394s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:41: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
05/13/2016 15:10:51: Finished Epoch[ 5 of 10]: [Training] CE = 2.29579636 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.011529s BlockRandomizer::StartEpoch: epoch 3: frames [300..400] (first sequence at sample 300), data subset 0 of 1
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 4 of 10]: [Training] CE = 2.29966064 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.0124s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
08/16/2016 10:50:41: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 4: frames [400..500] (first sequence at sample 400), data subset 0 of 1
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 5 of 10]: [Training] CE = 2.30450394 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.012302s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
Setting dropout rate to 0.5. Setting dropout rate to 0.5.
05/13/2016 15:10:51: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 5: frames [500..600] (first sequence at sample 500), data subset 0 of 1
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 5 (GPU): creating curand object with seed 5
05/13/2016 15:10:51: Finished Epoch[ 6 of 10]: [Training] CE = 2.30121231 * 100; Err = 0.84000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012276s 08/16/2016 10:50:41: Finished Epoch[ 6 of 10]: [Training] CE = 2.29013916 * 100; Err = 0.81000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012412s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6' 08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6'
05/13/2016 15:10:51: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 6: frames [600..700] (first sequence at sample 600), data subset 0 of 1
05/13/2016 15:10:51: Starting minibatch loop. 08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 6 (GPU): creating curand object with seed 6
05/13/2016 15:10:52: Finished Epoch[ 7 of 10]: [Training] CE = 2.28975647 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.011495s 08/16/2016 10:50:41: Finished Epoch[ 7 of 10]: [Training] CE = 2.29815765 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.012303s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7' 08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7'
05/13/2016 15:10:52: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 7: frames [700..800] (first sequence at sample 700), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop. 08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 7 (GPU): creating curand object with seed 7
05/13/2016 15:10:52: Finished Epoch[ 8 of 10]: [Training] CE = 2.29035095 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012157s 08/16/2016 10:50:41: Finished Epoch[ 8 of 10]: [Training] CE = 2.28805603 * 100; Err = 0.89000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012517s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8' 08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8'
05/13/2016 15:10:52: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 8: frames [800..900] (first sequence at sample 800), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop. 08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 8 (GPU): creating curand object with seed 8
05/13/2016 15:10:52: Finished Epoch[ 9 of 10]: [Training] CE = 2.29797729 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.011451s 08/16/2016 10:50:41: Finished Epoch[ 9 of 10]: [Training] CE = 2.29380524 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.012463s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9' 08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9'
05/13/2016 15:10:52: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples 08/16/2016 10:50:41: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 9: frames [900..1000] (first sequence at sample 900), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop. 08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 9 (GPU): creating curand object with seed 9
05/13/2016 15:10:52: Finished Epoch[10 of 10]: [Training] CE = 2.29764435 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012689s 08/16/2016 10:50:41: Finished Epoch[10 of 10]: [Training] CE = 2.27814423 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012432s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution' 08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution'
05/13/2016 15:10:52: CNTKCommandTrainEnd: Train 08/16/2016 10:50:41: CNTKCommandTrainEnd: Train
05/13/2016 15:10:52: Action "train" complete. 08/16/2016 10:50:41: Action "train" complete.
05/13/2016 15:10:52: ############################################################################## 08/16/2016 10:50:41: ##############################################################################
05/13/2016 15:10:52: # # 08/16/2016 10:50:41: # #
05/13/2016 15:10:52: # Action "test" # 08/16/2016 10:50:41: # Action "test" #
05/13/2016 15:10:52: # # 08/16/2016 10:50:41: # #
05/13/2016 15:10:52: ############################################################################## 08/16/2016 10:50:41: ##############################################################################
Post-processing network... Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1. Validating network. 34 nodes to process in pass 1.
@ -530,7 +582,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 21 nodes to process in pass 2. Validating network. 21 nodes to process in pass 2.
@ -538,17 +590,17 @@ Validating network. 21 nodes to process in pass 2.
Validating network, final pass. Validating network, final pass.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0. conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0. pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
13 out of 34 nodes do not share the minibatch layout with the input data. 13 out of 34 nodes do not share the minibatch layout with the input data.
@ -560,46 +612,14 @@ evalNodeNames are not specified, using all the default evalnodes and training cr
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing: Out of 34 matrices, 0 are shared as 0, and 34 are not shared.
(nil): {[CE Gradient[1]] [Err Gradient[1]] [OutputNodes.W Gradient[10 x 64]] [OutputNodes.b Gradient[10]] [OutputNodes.t Gradient[10 x 1 x *1]] [OutputNodes.z Gradient[10 x 1 x *1]] [conv1_act.W Gradient[32 x 75]] [conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.c Gradient[32 x 32 x 32 x *1]] [conv1_act.p Gradient[32 x 32 x 32 x *1]] [conv1_act.y Gradient[32 x 32 x 32 x *1]] [conv2_act.W Gradient[32 x 800]] [conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.c Gradient[15 x 15 x 32 x *1]] [conv2_act.p Gradient[15 x 15 x 32 x *1]] [conv2_act.y Gradient[15 x 15 x 32 x *1]] [conv3_act.W Gradient[64 x 800]] [conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.c Gradient[7 x 7 x 64 x *1]] [conv3_act.p Gradient[7 x 7 x 64 x *1]] [conv3_act.y Gradient[7 x 7 x 64 x *1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *1]] [features Gradient[32 x 32 x 3 x *1]] [h1.W Gradient[64 x 3 x 3 x 64]] [h1.b Gradient[64 x 1]] [h1.t Gradient[64 x *1]] [h1.y Gradient[64 x 1 x *1]] [h1.z Gradient[64 x 1 x *1]] [h1_d Gradient[64 x 1 x *1]] [labels Gradient[10 x *1]] [pool1 Gradient[15 x 15 x 32 x *1]] [pool2 Gradient[7 x 7 x 32 x *1]] [pool3 Gradient[3 x 3 x 64 x *1]] }
0x7fc883e04ba8: {[conv1_act.b Value[1 x 1 x 32]] }
0x7fc883e05fc8: {[conv1_act.W Value[32 x 75]] }
0x7fc883e06768: {[conv2_act.b Value[1 x 1 x 32]] }
0x7fc883e06928: {[conv2_act.W Value[32 x 800]] }
0x7fc883e085b8: {[conv3_act.b Value[1 x 1 x 64]] }
0x7fc883e09528: {[conv3_act.W Value[64 x 800]] }
0x7fc883e0b568: {[featOffs Value[1 x 1]] }
0x7fc883e0c1e8: {[features Value[32 x 32 x 3 x *1]] }
0x7fc883e0cc38: {[h1.b Value[64 x 1]] }
0x7fc883e0cf08: {[h1.W Value[64 x 3 x 3 x 64]] }
0x7fc883e0eb48: {[labels Value[10 x *1]] }
0x7fc883e0f558: {[OutputNodes.b Value[10]] }
0x7fc883e10068: {[OutputNodes.W Value[10 x 64]] }
0x7fc883e286b8: {[Err Value[1]] }
0x7fc883e2bd28: {[CE Value[1]] }
0x7fc883e2bfa8: {[conv1_act.y Value[32 x 32 x 32 x *1]] }
0x7fc883e54728: {[conv1_act.c Value[32 x 32 x 32 x *1]] }
0x7fc883e54a88: {[featScaled Value[32 x 32 x 3 x *1]] }
0x7fc883e54c18: {[conv1_act.p Value[32 x 32 x 32 x *1]] }
0x7fc883e71a78: {[pool1 Value[15 x 15 x 32 x *1]] }
0x7fc883e71c38: {[conv2_act.c Value[15 x 15 x 32 x *1]] }
0x7fc883e71fb8: {[conv2_act.p Value[15 x 15 x 32 x *1]] }
0x7fc883e72178: {[conv2_act.y Value[15 x 15 x 32 x *1]] }
0x7fc883e72338: {[pool2 Value[7 x 7 x 32 x *1]] }
0x7fc883e724f8: {[conv3_act.c Value[7 x 7 x 64 x *1]] }
0x7fc883e72878: {[conv3_act.p Value[7 x 7 x 64 x *1]] }
0x7fc883e72a38: {[conv3_act.y Value[7 x 7 x 64 x *1]] }
0x7fc883e72bf8: {[pool3 Value[3 x 3 x 64 x *1]] }
0x7fc883e72db8: {[h1.t Value[64 x *1]] }
0x7fc883e72f78: {[h1.z Value[64 x 1 x *1]] }
0x7fc883e73138: {[h1.y Value[64 x 1 x *1]] }
0x7fc883e732f8: {[h1_d Value[64 x 1 x *1]] }
0x7fc883e73678: {[OutputNodes.t Value[10 x 1 x *1]] }
0x7fc883e73838: {[OutputNodes.z Value[10 x 1 x *1]] }
05/13/2016 15:10:58: Final Results: Minibatch[1-625]: Err = 0.86430000 * 10000; CE = 2.28476029 * 10000; perplexity = 9.82333117 BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sample 0), data subset 0 of 1
08/16/2016 10:50:43: Minibatch[1-500]: Err = 0.86125000 * 8000; CE = 2.28389484 * 8000
08/16/2016 10:50:43: Minibatch[501-625]: Err = 0.86350000 * 2000; CE = 2.28027481 * 2000
08/16/2016 10:50:43: Final Results: Minibatch[1-625]: Err = 0.86170000 * 10000; CE = 2.28317084 * 10000; perplexity = 9.80772986
05/13/2016 15:10:58: Action "test" complete. 08/16/2016 10:50:43: Action "test" complete.
05/13/2016 15:10:58: __COMPLETED__ 08/16/2016 10:50:43: __COMPLETED__

Просмотреть файл

@ -286,7 +286,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1. Validating network. 34 nodes to process in pass 1.
@ -324,7 +324,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 21 nodes to process in pass 2. Validating network. 21 nodes to process in pass 2.
@ -356,7 +356,7 @@ Post-processing network complete.
05/13/2016 08:17:53: Evaluation criterion node(s): 05/13/2016 08:17:53: Evaluation criterion node(s):
05/13/2016 08:17:53: Err = ClassificationError 05/13/2016 08:17:53: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -490,7 +490,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1. Validating network. 34 nodes to process in pass 1.
@ -528,7 +528,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 21 nodes to process in pass 2. Validating network. 21 nodes to process in pass 2.

Просмотреть файл

@ -291,7 +291,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1. Validating network. 45 nodes to process in pass 1.
@ -340,7 +340,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 20 nodes to process in pass 2. Validating network. 20 nodes to process in pass 2.
@ -380,7 +380,7 @@ Post-processing network complete.
05/13/2016 15:10:59: Evaluation criterion node(s): 05/13/2016 15:10:59: Evaluation criterion node(s):
05/13/2016 15:10:59: Err = ClassificationError 05/13/2016 15:10:59: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -491,7 +491,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1. Validating network. 45 nodes to process in pass 1.
@ -540,7 +540,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 20 nodes to process in pass 2. Validating network. 20 nodes to process in pass 2.

Просмотреть файл

@ -289,7 +289,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1. Validating network. 45 nodes to process in pass 1.
@ -338,7 +338,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 20 nodes to process in pass 2. Validating network. 20 nodes to process in pass 2.
@ -378,7 +378,7 @@ Post-processing network complete.
05/13/2016 08:18:26: Evaluation criterion node(s): 05/13/2016 08:18:26: Evaluation criterion node(s):
05/13/2016 08:18:26: Err = ClassificationError 05/13/2016 08:18:26: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -489,7 +489,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1. Validating network. 45 nodes to process in pass 1.
@ -538,7 +538,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 20 nodes to process in pass 2. Validating network. 20 nodes to process in pass 2.

Просмотреть файл

@ -356,7 +356,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1. Validating network. 184 nodes to process in pass 1.
@ -546,7 +546,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 75 nodes to process in pass 2. Validating network. 75 nodes to process in pass 2.
@ -652,7 +652,7 @@ Post-processing network complete.
05/03/2016 18:13:08: Evaluation criterion node(s): 05/03/2016 18:13:08: Evaluation criterion node(s):
05/03/2016 18:13:08: Err = ClassificationError 05/03/2016 18:13:08: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -907,7 +907,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1. Validating network. 184 nodes to process in pass 1.
@ -1095,7 +1095,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 75 nodes to process in pass 2. Validating network. 75 nodes to process in pass 2.

Просмотреть файл

@ -354,7 +354,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1. Validating network. 184 nodes to process in pass 1.
@ -544,7 +544,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 75 nodes to process in pass 2. Validating network. 75 nodes to process in pass 2.
@ -650,7 +650,7 @@ Post-processing network complete.
05/03/2016 14:04:12: Evaluation criterion node(s): 05/03/2016 14:04:12: Evaluation criterion node(s):
05/03/2016 14:04:12: Err = ClassificationError 05/03/2016 14:04:12: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -905,7 +905,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1. Validating network. 184 nodes to process in pass 1.
@ -1093,7 +1093,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 75 nodes to process in pass 2. Validating network. 75 nodes to process in pass 2.

Просмотреть файл

@ -356,7 +356,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1. Validating network. 949 nodes to process in pass 1.
@ -1311,7 +1311,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 390 nodes to process in pass 2. Validating network. 390 nodes to process in pass 2.
@ -1777,7 +1777,7 @@ Post-processing network complete.
05/03/2016 18:17:55: Evaluation criterion node(s): 05/03/2016 18:17:55: Evaluation criterion node(s):
05/03/2016 18:17:55: Err = ClassificationError 05/03/2016 18:17:55: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -2932,7 +2932,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1. Validating network. 949 nodes to process in pass 1.
@ -3885,7 +3885,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 390 nodes to process in pass 2. Validating network. 390 nodes to process in pass 2.

Просмотреть файл

@ -354,7 +354,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1. Validating network. 949 nodes to process in pass 1.
@ -1309,7 +1309,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 390 nodes to process in pass 2. Validating network. 390 nodes to process in pass 2.
@ -1775,7 +1775,7 @@ Post-processing network complete.
05/03/2016 14:05:00: Evaluation criterion node(s): 05/03/2016 14:05:00: Evaluation criterion node(s):
05/03/2016 14:05:00: Err = ClassificationError 05/03/2016 14:05:00: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -2930,7 +2930,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1. Validating network. 949 nodes to process in pass 1.
@ -3883,7 +3883,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 390 nodes to process in pass 2. Validating network. 390 nodes to process in pass 2.

Просмотреть файл

@ -282,7 +282,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1. Validating network. 32 nodes to process in pass 1.
@ -318,7 +318,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 19 nodes to process in pass 2. Validating network. 19 nodes to process in pass 2.
@ -350,7 +350,7 @@ Post-processing network complete.
05/13/2016 15:11:11: Evaluation criterion node(s): 05/13/2016 15:11:11: Evaluation criterion node(s):
05/13/2016 15:11:11: Err = ClassificationError 05/13/2016 15:11:11: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -446,7 +446,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1. Validating network. 32 nodes to process in pass 1.
@ -482,7 +482,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 19 nodes to process in pass 2. Validating network. 19 nodes to process in pass 2.

Просмотреть файл

@ -280,7 +280,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1. Validating network. 32 nodes to process in pass 1.
@ -316,7 +316,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 19 nodes to process in pass 2. Validating network. 19 nodes to process in pass 2.
@ -348,7 +348,7 @@ Post-processing network complete.
05/13/2016 08:19:02: Evaluation criterion node(s): 05/13/2016 08:19:02: Evaluation criterion node(s):
05/13/2016 08:19:02: Err = ClassificationError 05/13/2016 08:19:02: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
@ -444,7 +444,7 @@ Post-processing network...
3 roots: 3 roots:
CE = CrossEntropyWithSoftmax() CE = CrossEntropyWithSoftmax()
Err = ClassificationError() Err = ErrorPrediction()
OutputNodes.z = Plus() OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1. Validating network. 32 nodes to process in pass 1.
@ -480,7 +480,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10] Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1] Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1] Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 19 nodes to process in pass 2. Validating network. 19 nodes to process in pass 2.

Просмотреть файл

@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -370,7 +370,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -399,7 +399,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -423,14 +423,14 @@ Post-processing network complete.
05/03/2016 15:21:43: Evaluation criterion node(s): 05/03/2016 15:21:43: Evaluation criterion node(s):
05/03/2016 15:21:43: EvalClassificationError = ClassificationError 05/03/2016 15:21:43: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x1abc7c8: {[InvStdOfFeatures Value[2]] } 0x1abc7c8: {[InvStdOfFeatures Value[2]] }
0x1b40348: {[features Value[2 x *]] } 0x1b40348: {[features Value[2 x *]] }
0x1b408b8: {[MeanOfFeatures Value[2]] } 0x1b408b8: {[MeanOfFeatures Value[2]] }
@ -443,7 +443,7 @@ Memory Sharing Structure:
0x1b46708: {[labels Value[2 x *]] } 0x1b46708: {[labels Value[2 x *]] }
0x1b473e8: {[Prior Value[2]] } 0x1b473e8: {[Prior Value[2]] }
0x1b4b138: {[ScaledLogLikelihood Value[2 x 1 x *]] } 0x1b4b138: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x1b4cc28: {[EvalClassificationError Value[1]] } 0x1b4cc28: {[EvalErrorPrediction Value[1]] }
0x1b4cea8: {[CrossEntropyWithSoftmax Value[1]] } 0x1b4cea8: {[CrossEntropyWithSoftmax Value[1]] }
0x1b4d388: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 0x1b4d388: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x1b4d548: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] } 0x1b4d548: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
@ -473,139 +473,139 @@ Memory Sharing Structure:
05/03/2016 15:21:44: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:44: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:44: Starting minibatch loop. 05/03/2016 15:21:44: Starting minibatch loop.
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1 05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6 05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3 05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7 05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7
05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s 05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s
05/03/2016 15:21:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1' 05/03/2016 15:21:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
05/03/2016 15:21:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:21:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6 05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3 05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5 05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4 05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6 05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4 05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7 05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7
05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s 05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s
05/03/2016 15:21:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2' 05/03/2016 15:21:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
05/03/2016 15:21:51: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:51: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:21:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3 05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0 05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4 05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0 05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0
05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalClassificationError = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s 05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalErrorPrediction = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s
05/03/2016 15:21:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn' 05/03/2016 15:21:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
05/03/2016 15:21:54: CNTKCommandTrainEnd: Multigpu_Demo_Train 05/03/2016 15:21:54: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -623,7 +623,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -652,7 +652,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -676,7 +676,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1abbf28: {[B0 Value[50 x 1]] } 0x1abbf28: {[B0 Value[50 x 1]] }
0x1b47908: {[W1 Value[50 x 50]] } 0x1b47908: {[W1 Value[50 x 50]] }
0x1b48278: {[W2 Value[2 x 50]] } 0x1b48278: {[W2 Value[2 x 50]] }
@ -688,7 +688,7 @@ Memory Sharing Structure:
0x1b50cd8: {[Prior Value[2]] } 0x1b50cd8: {[Prior Value[2]] }
0x1b514f8: {[W0 Value[50 x 2]] } 0x1b514f8: {[W0 Value[50 x 2]] }
0x1b53938: {[B1 Value[50 x 1]] } 0x1b53938: {[B1 Value[50 x 1]] }
0x1c0fd98: {[EvalClassificationError Value[1]] } 0x1c0fd98: {[EvalErrorPrediction Value[1]] }
0x1c0fef8: {[CrossEntropyWithSoftmax Value[1]] } 0x1c0fef8: {[CrossEntropyWithSoftmax Value[1]] }
0x1c10438: {[LogOfPrior Value[2]] } 0x1c10438: {[LogOfPrior Value[2]] }
0x1c11f48: {[MVNormalizedFeatures Value[2 x *1]] } 0x1c11f48: {[MVNormalizedFeatures Value[2 x *1]] }
@ -701,7 +701,7 @@ Memory Sharing Structure:
0x1c12d78: {[W2*H1 Value[2 x 1 x *1]] } 0x1c12d78: {[W2*H1 Value[2 x 1 x *1]] }
0x1c12f38: {[HLast Value[2 x 1 x *1]] } 0x1c12f38: {[HLast Value[2 x 1 x *1]] }
05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005 05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005
05/03/2016 15:21:55: Action "test" complete. 05/03/2016 15:21:55: Action "test" complete.

Просмотреть файл

@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -371,7 +371,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -400,7 +400,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -424,14 +424,14 @@ Post-processing network complete.
05/03/2016 15:21:55: Evaluation criterion node(s): 05/03/2016 15:21:55: Evaluation criterion node(s):
05/03/2016 15:21:55: EvalClassificationError = ClassificationError 05/03/2016 15:21:55: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x12a62e8: {[features Value[2 x *]] } 0x12a62e8: {[features Value[2 x *]] }
0x20202b8: {[MeanOfFeatures Value[2]] } 0x20202b8: {[MeanOfFeatures Value[2]] }
0x20207c8: {[InvStdOfFeatures Value[2]] } 0x20207c8: {[InvStdOfFeatures Value[2]] }
@ -444,7 +444,7 @@ Memory Sharing Structure:
0x278ae18: {[Prior Value[2]] } 0x278ae18: {[Prior Value[2]] }
0x278c158: {[LogOfPrior Value[2]] } 0x278c158: {[LogOfPrior Value[2]] }
0x27908f8: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 0x27908f8: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x2790a18: {[EvalClassificationError Value[1]] } 0x2790a18: {[EvalErrorPrediction Value[1]] }
0x2790d18: {[ScaledLogLikelihood Value[2 x 1 x *]] } 0x2790d18: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x2790e78: {[CrossEntropyWithSoftmax Value[1]] } 0x2790e78: {[CrossEntropyWithSoftmax Value[1]] }
0x27966e8: {[B0 Value[50 x 1]] } 0x27966e8: {[B0 Value[50 x 1]] }
@ -474,139 +474,139 @@ Memory Sharing Structure:
05/03/2016 15:21:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop. 05/03/2016 15:21:56: Starting minibatch loop.
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7 05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s 05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1' 05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
05/03/2016 15:21:56: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:56: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1 05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s 05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2' 05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
05/03/2016 15:21:56: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:56: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8 05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalClassificationError = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s 05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalErrorPrediction = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn' 05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
05/03/2016 15:21:56: CNTKCommandTrainEnd: Multigpu_Demo_Train 05/03/2016 15:21:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -624,7 +624,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -653,7 +653,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -677,7 +677,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1222268: {[InvStdOfFeatures Value[2]] } 0x1222268: {[InvStdOfFeatures Value[2]] }
0x1223258: {[W2 Value[2 x 50]] } 0x1223258: {[W2 Value[2 x 50]] }
0x12a56c8: {[B0 Value[50 x 1]] } 0x12a56c8: {[B0 Value[50 x 1]] }
@ -697,12 +697,12 @@ Memory Sharing Structure:
0x2adcc08: {[W0*features Value[50 x *1]] } 0x2adcc08: {[W0*features Value[50 x *1]] }
0x2add0a8: {[W0 Value[50 x 2]] } 0x2add0a8: {[W0 Value[50 x 2]] }
0x2ae0518: {[W1 Value[50 x 50]] } 0x2ae0518: {[W1 Value[50 x 50]] }
0x68bf228: {[EvalClassificationError Value[1]] } 0x68bf228: {[EvalErrorPrediction Value[1]] }
0x68bf388: {[CrossEntropyWithSoftmax Value[1]] } 0x68bf388: {[CrossEntropyWithSoftmax Value[1]] }
0x68bf988: {[LogOfPrior Value[2]] } 0x68bf988: {[LogOfPrior Value[2]] }
0x68d0438: {[features Value[2 x *1]] } 0x68d0438: {[features Value[2 x *1]] }
05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240 05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240
05/03/2016 15:21:57: Action "test" complete. 05/03/2016 15:21:57: Action "test" complete.

Просмотреть файл

@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -368,7 +368,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -397,7 +397,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -421,14 +421,14 @@ Post-processing network complete.
05/03/2016 15:29:48: Evaluation criterion node(s): 05/03/2016 15:29:48: Evaluation criterion node(s):
05/03/2016 15:29:48: EvalClassificationError = ClassificationError 05/03/2016 15:29:48: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000CDDFBEECA0: {[features Value[2 x *]] } 000000CDDFBEECA0: {[features Value[2 x *]] }
000000CDDFC7B170: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] } 000000CDDFC7B170: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
000000CDDFC7B490: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] } 000000CDDFC7B490: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
@ -438,7 +438,7 @@ Memory Sharing Structure:
000000CDDFC7B990: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 000000CDDFC7B990: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
000000CDDFC7BC10: {[LogOfPrior Value[2]] } 000000CDDFC7BC10: {[LogOfPrior Value[2]] }
000000CDDFC7BCB0: {[MVNormalizedFeatures Value[2 x *]] } 000000CDDFC7BCB0: {[MVNormalizedFeatures Value[2 x *]] }
000000CDDFC7BD50: {[EvalClassificationError Value[1]] } 000000CDDFC7BD50: {[EvalErrorPrediction Value[1]] }
000000CDDFC7BDF0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] } 000000CDDFC7BDF0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
000000CDDFC7BF30: {[ScaledLogLikelihood Value[2 x 1 x *]] } 000000CDDFC7BF30: {[ScaledLogLikelihood Value[2 x 1 x *]] }
000000CDDFC7C070: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] } 000000CDDFC7C070: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
@ -471,139 +471,139 @@ Memory Sharing Structure:
05/03/2016 15:29:48: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:48: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:48: Starting minibatch loop. 05/03/2016 15:29:48: Starting minibatch loop.
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8 05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4 05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7 05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8 05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8
05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s 05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s
05/03/2016 15:29:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1' 05/03/2016 15:29:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
05/03/2016 15:29:49: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:49: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:29:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2 05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9 05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9
05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalClassificationError = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s 05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalErrorPrediction = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s
05/03/2016 15:29:50: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2' 05/03/2016 15:29:50: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
05/03/2016 15:29:50: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:50: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:29:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4 05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5 05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0 05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0
05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s 05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s
05/03/2016 15:29:51: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn' 05/03/2016 15:29:51: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
05/03/2016 15:29:51: CNTKCommandTrainEnd: Multigpu_Demo_Train 05/03/2016 15:29:51: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -621,7 +621,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -650,7 +650,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -674,7 +674,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
000000CDDFC7B490: {[W0 Value[50 x 2]] } 000000CDDFC7B490: {[W0 Value[50 x 2]] }
000000CDDFC7B530: {[features Value[2 x *1]] } 000000CDDFC7B530: {[features Value[2 x *1]] }
000000CDDFC7B710: {[W1 Value[50 x 50]] } 000000CDDFC7B710: {[W1 Value[50 x 50]] }
@ -690,7 +690,7 @@ Memory Sharing Structure:
000000CDDFC8C2B0: {[W1*H1+B1 Value[50 x 1 x *1]] } 000000CDDFC8C2B0: {[W1*H1+B1 Value[50 x 1 x *1]] }
000000CDDFC8C490: {[CrossEntropyWithSoftmax Value[1]] } 000000CDDFC8C490: {[CrossEntropyWithSoftmax Value[1]] }
000000CDDFC8C5D0: {[LogOfPrior Value[2]] } 000000CDDFC8C5D0: {[LogOfPrior Value[2]] }
000000CDDFC8C670: {[EvalClassificationError Value[1]] } 000000CDDFC8C670: {[EvalErrorPrediction Value[1]] }
000000CDDFC8C990: {[MVNormalizedFeatures Value[2 x *1]] } 000000CDDFC8C990: {[MVNormalizedFeatures Value[2 x *1]] }
000000CDDFC8CA30: {[H2 Value[50 x 1 x *1]] } 000000CDDFC8CA30: {[H2 Value[50 x 1 x *1]] }
000000CDDFC8CC10: {[W1*H1 Value[50 x 1 x *1]] } 000000CDDFC8CC10: {[W1*H1 Value[50 x 1 x *1]] }
@ -699,7 +699,7 @@ Memory Sharing Structure:
000000CDDFC8D610: {[HLast Value[2 x 1 x *1]] } 000000CDDFC8D610: {[HLast Value[2 x 1 x *1]] }
000000CDDFC8D750: {[W0*features+B0 Value[50 x 1 x *1]] } 000000CDDFC8D750: {[W0*features+B0 Value[50 x 1 x *1]] }
05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868 05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868
05/03/2016 15:29:52: Action "test" complete. 05/03/2016 15:29:52: Action "test" complete.

Просмотреть файл

@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -369,7 +369,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -398,7 +398,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -422,14 +422,14 @@ Post-processing network complete.
05/03/2016 15:29:53: Evaluation criterion node(s): 05/03/2016 15:29:53: Evaluation criterion node(s):
05/03/2016 15:29:53: EvalClassificationError = ClassificationError 05/03/2016 15:29:53: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000572B66ECA0: {[features Value[2 x *]] } 000000572B66ECA0: {[features Value[2 x *]] }
00000057420A1700: {[W1 Value[50 x 50]] } 00000057420A1700: {[W1 Value[50 x 50]] }
00000057420A1980: {[MeanOfFeatures Value[2]] } 00000057420A1980: {[MeanOfFeatures Value[2]] }
@ -448,7 +448,7 @@ Memory Sharing Structure:
00000057439283E0: {[LogOfPrior Value[2]] } 00000057439283E0: {[LogOfPrior Value[2]] }
00000057439285C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] } 00000057439285C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
0000005743928660: {[B1 Gradient[50 x 1]] [H2 Gradient[50 x 1 x *]] [HLast Gradient[2 x 1 x *]] } 0000005743928660: {[B1 Gradient[50 x 1]] [H2 Gradient[50 x 1 x *]] [HLast Gradient[2 x 1 x *]] }
00000057439287A0: {[EvalClassificationError Value[1]] } 00000057439287A0: {[EvalErrorPrediction Value[1]] }
0000005743928980: {[CrossEntropyWithSoftmax Value[1]] } 0000005743928980: {[CrossEntropyWithSoftmax Value[1]] }
0000005743928A20: {[B2 Gradient[2 x 1]] } 0000005743928A20: {[B2 Gradient[2 x 1]] }
0000005743928E80: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 0000005743928E80: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
@ -472,139 +472,139 @@ Memory Sharing Structure:
05/03/2016 15:29:54: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:54: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:54: Starting minibatch loop. 05/03/2016 15:29:54: Starting minibatch loop.
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3 05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3
05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s 05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s
05/03/2016 15:29:54: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1' 05/03/2016 15:29:54: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
05/03/2016 15:29:54: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:54: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:29:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalClassificationError = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalErrorPrediction = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4 05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7 05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7
05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalClassificationError = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s 05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalErrorPrediction = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s
05/03/2016 15:29:55: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2' 05/03/2016 15:29:55: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
05/03/2016 15:29:55: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:29:55: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1). 05/03/2016 15:29:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0 05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4 05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4
05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalClassificationError = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s 05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalErrorPrediction = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s
05/03/2016 15:29:56: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn' 05/03/2016 15:29:56: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
05/03/2016 15:29:56: CNTKCommandTrainEnd: Multigpu_Demo_Train 05/03/2016 15:29:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -622,7 +622,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -651,7 +651,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -675,7 +675,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000005743925BB0: {[HLast Value[2 x 1 x *1]] } 0000005743925BB0: {[HLast Value[2 x 1 x *1]] }
0000005743925D90: {[MVNormalizedFeatures Value[2 x *1]] } 0000005743925D90: {[MVNormalizedFeatures Value[2 x *1]] }
0000005743925E30: {[CrossEntropyWithSoftmax Value[1]] } 0000005743925E30: {[CrossEntropyWithSoftmax Value[1]] }
@ -688,7 +688,7 @@ Memory Sharing Structure:
00000057439265B0: {[W0*features+B0 Value[50 x 1 x *1]] } 00000057439265B0: {[W0*features+B0 Value[50 x 1 x *1]] }
0000005743926650: {[W1*H1 Value[50 x 1 x *1]] } 0000005743926650: {[W1*H1 Value[50 x 1 x *1]] }
0000005743926970: {[H2 Value[50 x 1 x *1]] } 0000005743926970: {[H2 Value[50 x 1 x *1]] }
0000005743926AB0: {[EvalClassificationError Value[1]] } 0000005743926AB0: {[EvalErrorPrediction Value[1]] }
000000574B7FAD10: {[W0 Value[50 x 2]] } 000000574B7FAD10: {[W0 Value[50 x 2]] }
000000574B7FB170: {[InvStdOfFeatures Value[2]] } 000000574B7FB170: {[InvStdOfFeatures Value[2]] }
000000574B7FB210: {[MeanOfFeatures Value[2]] } 000000574B7FB210: {[MeanOfFeatures Value[2]] }
@ -700,7 +700,7 @@ Memory Sharing Structure:
000000574D960E50: {[B2 Value[2 x 1]] } 000000574D960E50: {[B2 Value[2 x 1]] }
000000574D9610D0: {[B0 Value[50 x 1]] } 000000574D9610D0: {[B0 Value[50 x 1]] }
05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529 05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529
05/03/2016 15:29:56: Action "test" complete. 05/03/2016 15:29:56: Action "test" complete.

Просмотреть файл

@ -58,7 +58,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -157,7 +157,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -355,7 +355,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -384,7 +384,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -408,14 +408,14 @@ Post-processing network complete.
05/03/2016 15:21:15: Evaluation criterion node(s): 05/03/2016 15:21:15: Evaluation criterion node(s):
05/03/2016 15:21:15: EvalClassificationError = ClassificationError 05/03/2016 15:21:15: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x2e7f338: {[features Value[2 x *]] } 0x2e7f338: {[features Value[2 x *]] }
0x2e82908: {[MeanOfFeatures Value[2]] } 0x2e82908: {[MeanOfFeatures Value[2]] }
0x2e84f08: {[InvStdOfFeatures Value[2]] } 0x2e84f08: {[InvStdOfFeatures Value[2]] }
@ -427,7 +427,7 @@ Memory Sharing Structure:
0x2e8b718: {[B2 Value[2 x 1]] } 0x2e8b718: {[B2 Value[2 x 1]] }
0x2e8c1e8: {[labels Value[2 x *]] } 0x2e8c1e8: {[labels Value[2 x *]] }
0x2e8cf38: {[Prior Value[2]] } 0x2e8cf38: {[Prior Value[2]] }
0x2e926f8: {[EvalClassificationError Value[1]] } 0x2e926f8: {[EvalErrorPrediction Value[1]] }
0x2e92858: {[ScaledLogLikelihood Value[2 x 1 x *]] } 0x2e92858: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x2e929b8: {[CrossEntropyWithSoftmax Value[1]] } 0x2e929b8: {[CrossEntropyWithSoftmax Value[1]] }
0x2e93218: {[LogOfPrior Value[2]] } 0x2e93218: {[LogOfPrior Value[2]] }
@ -458,139 +458,139 @@ Memory Sharing Structure:
05/03/2016 15:21:17: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:17: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:17: Starting minibatch loop. 05/03/2016 15:21:17: Starting minibatch loop.
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4 05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5 05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0 05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3 05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4 05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9 05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4 05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4 05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4
05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s 05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s
05/03/2016 15:21:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.1' 05/03/2016 15:21:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.1'
05/03/2016 15:21:20: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:20: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:20: Starting minibatch loop. 05/03/2016 15:21:20: Starting minibatch loop.
05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0 05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5 05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1 05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0 05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0
05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalClassificationError = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s 05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalErrorPrediction = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s
05/03/2016 15:21:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.2' 05/03/2016 15:21:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.2'
05/03/2016 15:21:23: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:23: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:23: Starting minibatch loop. 05/03/2016 15:21:23: Starting minibatch loop.
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7 05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6 05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0 05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5 05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalClassificationError = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalErrorPrediction = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3 05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3 05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0 05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0 05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2 05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4 05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4
05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalClassificationError = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s 05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalErrorPrediction = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s
05/03/2016 15:21:26: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn' 05/03/2016 15:21:26: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn'
05/03/2016 15:21:26: CNTKCommandTrainEnd: Simple_Demo_Train 05/03/2016 15:21:26: CNTKCommandTrainEnd: Simple_Demo_Train
@ -608,7 +608,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -637,7 +637,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -661,7 +661,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x2e83eb8: {[W2 Value[2 x 50]] } 0x2e83eb8: {[W2 Value[2 x 50]] }
0x2e87ac8: {[MVNormalizedFeatures Value[2 x *1]] } 0x2e87ac8: {[MVNormalizedFeatures Value[2 x *1]] }
0x2e87e78: {[W0*features Value[50 x *1]] } 0x2e87e78: {[W0*features Value[50 x *1]] }
@ -676,7 +676,7 @@ Memory Sharing Structure:
0x2e8d298: {[B2 Value[2 x 1]] } 0x2e8d298: {[B2 Value[2 x 1]] }
0x2e8f2c8: {[labels Value[2 x *1]] } 0x2e8f2c8: {[labels Value[2 x *1]] }
0x2e8f8e8: {[MeanOfFeatures Value[2]] } 0x2e8f8e8: {[MeanOfFeatures Value[2]] }
0x2e91598: {[EvalClassificationError Value[1]] } 0x2e91598: {[EvalErrorPrediction Value[1]] }
0x2e916f8: {[CrossEntropyWithSoftmax Value[1]] } 0x2e916f8: {[CrossEntropyWithSoftmax Value[1]] }
0x2e91bb8: {[LogOfPrior Value[2]] } 0x2e91bb8: {[LogOfPrior Value[2]] }
0x2e93758: {[B0 Value[50 x 1]] } 0x2e93758: {[B0 Value[50 x 1]] }
@ -686,7 +686,7 @@ Memory Sharing Structure:
0x2e985f8: {[W1 Value[50 x 50]] } 0x2e985f8: {[W1 Value[50 x 50]] }
0x2e99178: {[features Value[2 x *1]] } 0x2e99178: {[features Value[2 x *1]] }
05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032 05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032
05/03/2016 15:21:26: Action "test" complete. 05/03/2016 15:21:26: Action "test" complete.
@ -702,7 +702,7 @@ Post-processing network...
8 roots: 8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -732,7 +732,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2] Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -755,7 +755,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0x2e82858: {[PosteriorProb Value[2 x 1 x *2]] } 0x2e82858: {[PosteriorProb Value[2 x 1 x *2]] }
0x2e83b58: {[labels Value[2 x *2]] } 0x2e83b58: {[labels Value[2 x *2]] }
0x2e84318: {[MeanOfFeatures Value[2]] } 0x2e84318: {[MeanOfFeatures Value[2]] }

Просмотреть файл

@ -58,7 +58,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -157,7 +157,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -356,7 +356,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -385,7 +385,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -409,14 +409,14 @@ Post-processing network complete.
05/03/2016 15:21:27: Evaluation criterion node(s): 05/03/2016 15:21:27: Evaluation criterion node(s):
05/03/2016 15:21:27: EvalClassificationError = ClassificationError 05/03/2016 15:21:27: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } (nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x1ef9338: {[features Value[2 x *]] } 0x1ef9338: {[features Value[2 x *]] }
0x2b32ad8: {[MeanOfFeatures Value[2]] } 0x2b32ad8: {[MeanOfFeatures Value[2]] }
0x2b32fe8: {[InvStdOfFeatures Value[2]] } 0x2b32fe8: {[InvStdOfFeatures Value[2]] }
@ -429,7 +429,7 @@ Memory Sharing Structure:
0x3185898: {[Prior Value[2]] } 0x3185898: {[Prior Value[2]] }
0x3186bd8: {[LogOfPrior Value[2]] } 0x3186bd8: {[LogOfPrior Value[2]] }
0x318b378: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 0x318b378: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x318b498: {[EvalClassificationError Value[1]] } 0x318b498: {[EvalErrorPrediction Value[1]] }
0x318b798: {[ScaledLogLikelihood Value[2 x 1 x *]] } 0x318b798: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x318b8f8: {[CrossEntropyWithSoftmax Value[1]] } 0x318b8f8: {[CrossEntropyWithSoftmax Value[1]] }
0x3191148: {[B0 Value[50 x 1]] } 0x3191148: {[B0 Value[50 x 1]] }
@ -459,139 +459,139 @@ Memory Sharing Structure:
05/03/2016 15:21:28: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:28: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop. 05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2 05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s 05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.1' 05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.1'
05/03/2016 15:21:28: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:28: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop. 05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5 05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5
05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalClassificationError = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s 05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalErrorPrediction = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.2' 05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.2'
05/03/2016 15:21:28: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 15:21:28: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop. 05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1 05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalClassificationError = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s 05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalErrorPrediction = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn' 05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn'
05/03/2016 15:21:29: CNTKCommandTrainEnd: Simple_Demo_Train 05/03/2016 15:21:29: CNTKCommandTrainEnd: Simple_Demo_Train
@ -609,7 +609,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -638,7 +638,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -662,11 +662,11 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1efcc08: {[B2 Value[2 x 1]] } 0x1efcc08: {[B2 Value[2 x 1]] }
0x1efd8c8: {[W0 Value[50 x 2]] } 0x1efd8c8: {[W0 Value[50 x 2]] }
0x1efee68: {[InvStdOfFeatures Value[2]] } 0x1efee68: {[InvStdOfFeatures Value[2]] }
0x2b337e8: {[EvalClassificationError Value[1]] } 0x2b337e8: {[EvalErrorPrediction Value[1]] }
0x2b33948: {[CrossEntropyWithSoftmax Value[1]] } 0x2b33948: {[CrossEntropyWithSoftmax Value[1]] }
0x2b33f08: {[LogOfPrior Value[2]] } 0x2b33f08: {[LogOfPrior Value[2]] }
0x31808e8: {[W2 Value[2 x 50]] } 0x31808e8: {[W2 Value[2 x 50]] }
@ -687,7 +687,7 @@ Memory Sharing Structure:
0x7273058: {[W2*H1 Value[2 x 1 x *1]] } 0x7273058: {[W2*H1 Value[2 x 1 x *1]] }
0x7273218: {[HLast Value[2 x 1 x *1]] } 0x7273218: {[HLast Value[2 x 1 x *1]] }
05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946 05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946
05/03/2016 15:21:29: Action "test" complete. 05/03/2016 15:21:29: Action "test" complete.
@ -703,7 +703,7 @@ Post-processing network...
8 roots: 8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -733,7 +733,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2] Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -756,7 +756,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] } (nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0x1efcef8: {[features Value[2 x *2]] } 0x1efcef8: {[features Value[2 x *2]] }
0x1efe2c8: {[labels Value[2 x *2]] } 0x1efe2c8: {[labels Value[2 x *2]] }
0x1eff188: {[PosteriorProb Value[2 x 1 x *2]] } 0x1eff188: {[PosteriorProb Value[2 x 1 x *2]] }

Просмотреть файл

@ -56,7 +56,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -155,7 +155,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -353,7 +353,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -382,7 +382,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -406,14 +406,14 @@ Post-processing network complete.
05/03/2016 13:12:46: Evaluation criterion node(s): 05/03/2016 13:12:46: Evaluation criterion node(s):
05/03/2016 13:12:46: EvalClassificationError = ClassificationError 05/03/2016 13:12:46: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000702B410E90: {[features Value[2 x *]] } 000000702B410E90: {[features Value[2 x *]] }
000000702B44E0C0: {[W0 Value[50 x 2]] } 000000702B44E0C0: {[W0 Value[50 x 2]] }
000000702B4D76F0: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] } 000000702B4D76F0: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
@ -428,7 +428,7 @@ Memory Sharing Structure:
000000702B4D8690: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] } 000000702B4D8690: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
000000702B4D8730: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] } 000000702B4D8730: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
000000702B4D89B0: {[CrossEntropyWithSoftmax Value[1]] } 000000702B4D89B0: {[CrossEntropyWithSoftmax Value[1]] }
000000702B4D8AF0: {[EvalClassificationError Value[1]] } 000000702B4D8AF0: {[EvalErrorPrediction Value[1]] }
000000702B4D8B90: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] } 000000702B4D8B90: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
000000702B4D8F50: {[B2 Gradient[2 x 1]] } 000000702B4D8F50: {[B2 Gradient[2 x 1]] }
000000702B4D91D0: {[ScaledLogLikelihood Value[2 x 1 x *]] } 000000702B4D91D0: {[ScaledLogLikelihood Value[2 x 1 x *]] }
@ -456,139 +456,139 @@ Memory Sharing Structure:
05/03/2016 13:12:47: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:12:47: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:47: Starting minibatch loop. 05/03/2016 13:12:47: Starting minibatch loop.
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0 05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5 05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5 05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5 05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0 05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0
05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s 05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.1' 05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.1'
05/03/2016 13:12:48: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:12:48: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:48: Starting minibatch loop. 05/03/2016 13:12:48: Starting minibatch loop.
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7 05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7
05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalClassificationError = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s 05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalErrorPrediction = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.2' 05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.2'
05/03/2016 13:12:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:12:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:48: Starting minibatch loop. 05/03/2016 13:12:48: Starting minibatch loop.
05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2 05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1 05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1
05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s 05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s
05/03/2016 13:12:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn' 05/03/2016 13:12:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn'
05/03/2016 13:12:49: CNTKCommandTrainEnd: Simple_Demo_Train 05/03/2016 13:12:49: CNTKCommandTrainEnd: Simple_Demo_Train
@ -606,7 +606,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -635,7 +635,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -659,7 +659,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
00000070343C5200: {[InvStdOfFeatures Value[2]] } 00000070343C5200: {[InvStdOfFeatures Value[2]] }
00000070343C5340: {[Prior Value[2]] } 00000070343C5340: {[Prior Value[2]] }
00000070343C53E0: {[W0 Value[50 x 2]] } 00000070343C53E0: {[W0 Value[50 x 2]] }
@ -671,7 +671,7 @@ Memory Sharing Structure:
000000703442D030: {[HLast Value[2 x 1 x *1]] } 000000703442D030: {[HLast Value[2 x 1 x *1]] }
000000703442D0D0: {[W0*features Value[50 x *1]] } 000000703442D0D0: {[W0*features Value[50 x *1]] }
000000703442D170: {[W1*H1+B1 Value[50 x 1 x *1]] } 000000703442D170: {[W1*H1+B1 Value[50 x 1 x *1]] }
000000703442D2B0: {[EvalClassificationError Value[1]] } 000000703442D2B0: {[EvalErrorPrediction Value[1]] }
000000703442D530: {[CrossEntropyWithSoftmax Value[1]] } 000000703442D530: {[CrossEntropyWithSoftmax Value[1]] }
000000703442D5D0: {[W2 Value[2 x 50]] } 000000703442D5D0: {[W2 Value[2 x 50]] }
000000703442D670: {[LogOfPrior Value[2]] } 000000703442D670: {[LogOfPrior Value[2]] }
@ -684,7 +684,7 @@ Memory Sharing Structure:
0000007034432340: {[B0 Value[50 x 1]] } 0000007034432340: {[B0 Value[50 x 1]] }
0000007034432480: {[B2 Value[2 x 1]] } 0000007034432480: {[B2 Value[2 x 1]] }
05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515 05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515
05/03/2016 13:12:50: Action "test" complete. 05/03/2016 13:12:50: Action "test" complete.
@ -700,7 +700,7 @@ Post-processing network...
8 roots: 8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -730,7 +730,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2] Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -753,7 +753,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
000000702E3275E0: {[H2 Value[50 x 1 x *2]] } 000000702E3275E0: {[H2 Value[50 x 1 x *2]] }
000000702E327680: {[W2*H1 Value[2 x 1 x *2]] } 000000702E327680: {[W2*H1 Value[2 x 1 x *2]] }
000000702E3277C0: {[LogOfPrior Value[2]] } 000000702E3277C0: {[LogOfPrior Value[2]] }

Просмотреть файл

@ -56,7 +56,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -155,7 +155,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 2:50*2:2 layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
initValueScale = 1.0 initValueScale = 1.0
applyMeanVarNorm = true applyMeanVarNorm = true
@ -354,7 +354,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -383,7 +383,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2] Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -407,14 +407,14 @@ Post-processing network complete.
05/03/2016 13:01:59: Evaluation criterion node(s): 05/03/2016 13:01:59: Evaluation criterion node(s):
05/03/2016 13:01:59: EvalClassificationError = ClassificationError 05/03/2016 13:01:59: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] } 0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000501A590FF0: {[W2 Value[2 x 50]] } 000000501A590FF0: {[W2 Value[2 x 50]] }
000000501A591090: {[W0 Value[50 x 2]] } 000000501A591090: {[W0 Value[50 x 2]] }
000000501A5919F0: {[B1 Value[50 x 1]] } 000000501A5919F0: {[B1 Value[50 x 1]] }
@ -427,7 +427,7 @@ Memory Sharing Structure:
000000501A5A1180: {[ScaledLogLikelihood Value[2 x 1 x *]] } 000000501A5A1180: {[ScaledLogLikelihood Value[2 x 1 x *]] }
000000501A5A1220: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] } 000000501A5A1220: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
000000501A5A17C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] } 000000501A5A17C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
000000501A5A1900: {[EvalClassificationError Value[1]] } 000000501A5A1900: {[EvalErrorPrediction Value[1]] }
000000501A5A19A0: {[W0*features Value[50 x *]] } 000000501A5A19A0: {[W0*features Value[50 x *]] }
000000501A5A1A40: {[W2*H1 Gradient[2 x 1 x *]] } 000000501A5A1A40: {[W2*H1 Gradient[2 x 1 x *]] }
000000501A5A1F40: {[MVNormalizedFeatures Value[2 x *]] } 000000501A5A1F40: {[MVNormalizedFeatures Value[2 x *]] }
@ -457,139 +457,139 @@ Memory Sharing Structure:
05/03/2016 13:01:59: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:01:59: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:01:59: Starting minibatch loop. 05/03/2016 13:01:59: Starting minibatch loop.
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5 05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7 05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7
05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s 05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.1' 05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.1'
05/03/2016 13:02:00: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:02:00: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:02:00: Starting minibatch loop. 05/03/2016 13:02:00: Starting minibatch loop.
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5 05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5
05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalClassificationError = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s 05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalErrorPrediction = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.2' 05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.2'
05/03/2016 13:02:00: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples 05/03/2016 13:02:00: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:02:00: Starting minibatch loop. 05/03/2016 13:02:00: Starting minibatch loop.
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6 05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7 05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7
05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalClassificationError = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s 05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalErrorPrediction = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s
05/03/2016 13:02:01: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn' 05/03/2016 13:02:01: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn'
05/03/2016 13:02:01: CNTKCommandTrainEnd: Simple_Demo_Train 05/03/2016 13:02:01: CNTKCommandTrainEnd: Simple_Demo_Train
@ -607,7 +607,7 @@ Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -636,7 +636,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2] Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -660,7 +660,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
000000501A591090: {[W0*features+B0 Value[50 x 1 x *1]] } 000000501A591090: {[W0*features+B0 Value[50 x 1 x *1]] }
000000501A591130: {[W1*H1 Value[50 x 1 x *1]] } 000000501A591130: {[W1*H1 Value[50 x 1 x *1]] }
000000501A5916D0: {[W1*H1+B1 Value[50 x 1 x *1]] } 000000501A5916D0: {[W1*H1+B1 Value[50 x 1 x *1]] }
@ -672,7 +672,7 @@ Memory Sharing Structure:
000000501A592850: {[LogOfPrior Value[2]] } 000000501A592850: {[LogOfPrior Value[2]] }
000000501A5928F0: {[H2 Value[50 x 1 x *1]] } 000000501A5928F0: {[H2 Value[50 x 1 x *1]] }
000000501A592B70: {[W2 Value[2 x 50]] } 000000501A592B70: {[W2 Value[2 x 50]] }
000000501A592D50: {[EvalClassificationError Value[1]] } 000000501A592D50: {[EvalErrorPrediction Value[1]] }
000000501A592DF0: {[CrossEntropyWithSoftmax Value[1]] } 000000501A592DF0: {[CrossEntropyWithSoftmax Value[1]] }
0000005024E60C70: {[W1 Value[50 x 50]] } 0000005024E60C70: {[W1 Value[50 x 50]] }
0000005024E613F0: {[W0 Value[50 x 2]] } 0000005024E613F0: {[W0 Value[50 x 2]] }
@ -685,7 +685,7 @@ Memory Sharing Structure:
0000005024E62430: {[features Value[2 x *1]] } 0000005024E62430: {[features Value[2 x *1]] }
0000005024E624D0: {[B1 Value[50 x 1]] } 0000005024E624D0: {[B1 Value[50 x 1]] }
05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526 05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526
05/03/2016 13:02:01: Action "test" complete. 05/03/2016 13:02:01: Action "test" complete.
@ -701,7 +701,7 @@ Post-processing network...
8 roots: 8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -731,7 +731,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1] Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2] Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2] Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2] Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2] Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -754,7 +754,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] } 0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
000000501A5914F0: {[InvStdOfFeatures Value[2]] } 000000501A5914F0: {[InvStdOfFeatures Value[2]] }
000000501A591590: {[MeanOfFeatures Value[2]] } 000000501A591590: {[MeanOfFeatures Value[2]] }
000000501A5916D0: {[labels Value[2 x *2]] } 000000501A5916D0: {[labels Value[2 x *2]] }

Просмотреть файл

@ -0,0 +1,434 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
MPIWrapper: initializing MPI
ping [requestnodes (before change)]: 1 nodes pinging each other
ping [requestnodes (before change)]: all 1 nodes responded
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
ping [requestnodes (after change)]: 1 nodes pinging each other
ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
08/16/2016 10:01:41: -------------------------------------------------------------------
08/16/2016 10:01:41: Build info:
08/16/2016 10:01:41: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:41: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:41: Build type: release
08/16/2016 10:01:41: Build target: GPU
08/16/2016 10:01:41: With 1bit-SGD: yes
08/16/2016 10:01:41: Math lib: mkl
08/16/2016 10:01:41: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:41: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:41: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:41: Build Branch: HEAD
08/16/2016 10:01:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:41: Built by philly on 643085f7f8c2
08/16/2016 10:01:41: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:41: -------------------------------------------------------------------
08/16/2016 10:01:42: -------------------------------------------------------------------
08/16/2016 10:01:42: GPU info:
08/16/2016 10:01:42: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: -------------------------------------------------------------------
08/16/2016 10:01:42: Running on localhost at 2016/08/16 10:01:42
08/16/2016 10:01:42: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:42: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:42: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:deviceId=-1
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: Commands: speechTrain
08/16/2016 10:01:42: Precision = "float"
08/16/2016 10:01:42: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
08/16/2016 10:01:42: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:42: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:42: ##############################################################################
08/16/2016 10:01:42: # #
08/16/2016 10:01:42: # Action "train" #
08/16/2016 10:01:42: # #
08/16/2016 10:01:42: ##############################################################################
08/16/2016 10:01:42: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using CPU
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
08/16/2016 10:01:42: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
Prior = Mean()
ScaledLogLikelihood = Minus()
Validating network. 25 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W2 = LearnableParameter() : -> [132 x 512]
Validating --> W1 = LearnableParameter() : -> [512 x 512]
Validating --> W0 = LearnableParameter() : -> [512 x 363]
Validating --> features = InputValue() : -> [363 x *]
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
Validating --> B0 = LearnableParameter() : -> [512 x 1]
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
Validating --> B1 = LearnableParameter() : -> [512 x 1]
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 17 nodes to process in pass 2.
Validating network, final pass.
12 out of 25 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:42: Created model with 25 nodes on CPU.
08/16/2016 10:01:42: Training criterion node(s):
08/16/2016 10:01:42: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 10:01:42: Evaluation criterion node(s):
08/16/2016 10:01:42: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
08/16/2016 10:01:42: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
08/16/2016 10:01:42: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:42: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:42: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:42: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 10:01:42: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 10:01:42: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 10:01:42: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:42: MeanOfFeatures = Mean()
08/16/2016 10:01:42: InvStdOfFeatures = InvStdDev()
08/16/2016 10:01:42: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:43: Precomputing --> Completed.
08/16/2016 10:01:43: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 10:01:44: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.45117986 * 2048; EvalErrorPrediction = 0.92187500 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.209966s
08/16/2016 10:01:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
08/16/2016 10:01:44: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:44: Action "train" complete.
08/16/2016 10:01:44: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,435 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
MPIWrapper: initializing MPI
ping [requestnodes (before change)]: 1 nodes pinging each other
ping [requestnodes (before change)]: all 1 nodes responded
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
ping [requestnodes (after change)]: 1 nodes pinging each other
ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
08/16/2016 10:01:45: -------------------------------------------------------------------
08/16/2016 10:01:45: Build info:
08/16/2016 10:01:45: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:45: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:45: Build type: release
08/16/2016 10:01:45: Build target: GPU
08/16/2016 10:01:45: With 1bit-SGD: yes
08/16/2016 10:01:45: Math lib: mkl
08/16/2016 10:01:45: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:45: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:45: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:45: Build Branch: HEAD
08/16/2016 10:01:45: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:45: Built by philly on 643085f7f8c2
08/16/2016 10:01:45: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:45: -------------------------------------------------------------------
08/16/2016 10:01:46: -------------------------------------------------------------------
08/16/2016 10:01:46: GPU info:
08/16/2016 10:01:46: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: -------------------------------------------------------------------
08/16/2016 10:01:46: Running on localhost at 2016/08/16 10:01:46
08/16/2016 10:01:46: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:46: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:46: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:deviceId=0
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: Commands: speechTrain
08/16/2016 10:01:46: Precision = "float"
08/16/2016 10:01:46: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
08/16/2016 10:01:46: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:46: ##############################################################################
08/16/2016 10:01:46: # #
08/16/2016 10:01:46: # Action "train" #
08/16/2016 10:01:46: # #
08/16/2016 10:01:46: ##############################################################################
08/16/2016 10:01:46: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using GPU 0
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
08/16/2016 10:01:46: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
Prior = Mean()
ScaledLogLikelihood = Minus()
Validating network. 25 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W2 = LearnableParameter() : -> [132 x 512]
Validating --> W1 = LearnableParameter() : -> [512 x 512]
Validating --> W0 = LearnableParameter() : -> [512 x 363]
Validating --> features = InputValue() : -> [363 x *]
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
Validating --> B0 = LearnableParameter() : -> [512 x 1]
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
Validating --> B1 = LearnableParameter() : -> [512 x 1]
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 17 nodes to process in pass 2.
Validating network, final pass.
12 out of 25 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:46: Created model with 25 nodes on GPU 0.
08/16/2016 10:01:46: Training criterion node(s):
08/16/2016 10:01:46: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 10:01:46: Evaluation criterion node(s):
08/16/2016 10:01:46: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
08/16/2016 10:01:46: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
08/16/2016 10:01:46: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:46: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:46: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:46: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 10:01:46: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 10:01:46: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 10:01:46: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:46: MeanOfFeatures = Mean()
08/16/2016 10:01:46: InvStdOfFeatures = InvStdDev()
08/16/2016 10:01:46: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:46: Precomputing --> Completed.
08/16/2016 10:01:46: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 10:01:46: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.023072s
08/16/2016 10:01:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
08/16/2016 10:01:46: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:46: Action "train" complete.
08/16/2016 10:01:46: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1,18 +1,24 @@
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
------------------------------------------------------------------- -------------------------------------------------------------------
Build info: Build info:
Built time: May 3 2016 13:15:46 Built time: Aug 16 2016 03:09:16
Last modified date: Tue Apr 26 23:35:31 2016 Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release Build type: Release
Build target: GPU Build target: GPU
With 1bit-SGD: no With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1 CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD Build Branch: HEAD
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12 Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on cntk-muc01 Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
------------------------------------------------------------------- -------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded ping [mpihelper]: all 1 nodes responded
05/03/2016 13:22:22: ------------------------------------------------------------------- 08/16/2016 03:20:10: -------------------------------------------------------------------
05/03/2016 13:22:22: Build info: 08/16/2016 03:20:10: Build info:
05/03/2016 13:22:22: Built time: May 3 2016 13:15:46 08/16/2016 03:20:10: Built time: Aug 16 2016 03:09:16
05/03/2016 13:22:22: Last modified date: Tue Apr 26 23:35:31 2016 08/16/2016 03:20:10: Last modified date: Fri Aug 12 05:28:23 2016
05/03/2016 13:22:22: Build type: Release 08/16/2016 03:20:10: Build type: Release
05/03/2016 13:22:22: Build target: GPU 08/16/2016 03:20:10: Build target: GPU
05/03/2016 13:22:22: With 1bit-SGD: no 08/16/2016 03:20:10: With 1bit-SGD: yes
05/03/2016 13:22:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 08/16/2016 03:20:10: Math lib: mkl
05/03/2016 13:22:22: CUB_PATH: c:\src\cub-1.4.1 08/16/2016 03:20:10: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
05/03/2016 13:22:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda 08/16/2016 03:20:10: CUB_PATH: c:\src\cub-1.4.1
05/03/2016 13:22:22: Build Branch: HEAD 08/16/2016 03:20:10: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
05/03/2016 13:22:22: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12 08/16/2016 03:20:10: Build Branch: HEAD
05/03/2016 13:22:22: Built by svcphil on cntk-muc01 08/16/2016 03:20:10: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
05/03/2016 13:22:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ 08/16/2016 03:20:10: Built by svcphil on Philly-Pool1
05/03/2016 13:22:22: ------------------------------------------------------------------- 08/16/2016 03:20:10: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:10: -------------------------------------------------------------------
08/16/2016 03:20:12: -------------------------------------------------------------------
08/16/2016 03:20:12: GPU info:
05/03/2016 13:22:22: Running on DPHAIM-22 at 2016/05/03 13:22:22 08/16/2016 03:20:12: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
05/03/2016 13:22:22: Command line: 08/16/2016 03:20:12: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] 08/16/2016 03:20:12: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:12: -------------------------------------------------------------------
08/16/2016 03:20:12: Running on DPHAIM-25 at 2016/08/16 03:20:12
08/16/2016 03:20:12: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:22: RootDir = ".." 08/16/2016 03:20:12: RootDir = ".."
ConfigDir = "$RootDir$/Config" ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data" DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output" OutputDir = "$RootDir$/Output"
@ -65,7 +79,7 @@ speechTrain = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -111,35 +125,36 @@ speechTrain = [
] ]
] ]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DeviceId=-1 DeviceId=-1
timestamping=true timestamping=true
speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]] speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:22: RootDir = ".." 08/16/2016 03:20:12: RootDir = ".."
ConfigDir = "../Config" ConfigDir = "../Config"
DataDir = "../Data" DataDir = "../Data"
OutputDir = "../Output" OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models" ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models"
deviceId = -1 deviceId = -1
command = speechTrain command = speechTrain
precision = "float" precision = "float"
traceLevel = "1" traceLevel = "1"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn" modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
parallelTrain = true parallelTrain = true
speechTrain = [ speechTrain = [
action = "train" action = "train"
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -185,36 +200,37 @@ speechTrain = [
] ]
] ]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DeviceId=-1 DeviceId=-1
timestamping=true timestamping=true
speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]] speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:deviceId=-1 configparameters: FeedForward.cntk:deviceId=-1
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:parallelTrain=true configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=.. configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:speechTrain=[ configparameters: FeedForward.cntk:speechTrain=[
action = "train" action = "train"
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
labelType = "category" labelType = "category"
] ]
] ]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] ] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1 configparameters: FeedForward.cntk:traceLevel=1
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: Commands: speechTrain 08/16/2016 03:20:12: Commands: speechTrain
05/03/2016 13:22:22: Precision = "float" 08/16/2016 03:20:12: Precision = "float"
05/03/2016 13:22:22: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn 08/16/2016 03:20:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
05/03/2016 13:22:22: CNTKCommandTrainInfo: speechTrain : 1 08/16/2016 03:20:12: CNTKCommandTrainInfo: speechTrain : 1
05/03/2016 13:22:22: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1 08/16/2016 03:20:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
05/03/2016 13:22:22: ############################################################################## 08/16/2016 03:20:12: ##############################################################################
05/03/2016 13:22:22: # # 08/16/2016 03:20:12: # #
05/03/2016 13:22:22: # Action "train" # 08/16/2016 03:20:12: # Action "train" #
05/03/2016 13:22:22: # # 08/16/2016 03:20:12: # #
05/03/2016 13:22:22: ############################################################################## 08/16/2016 03:20:12: ##############################################################################
05/03/2016 13:22:22: CNTKCommandTrainBegin: speechTrain 08/16/2016 03:20:12: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using CPU SimpleNetworkBuilder Using CPU
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
@ -284,13 +300,25 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
label set 0: 129 classes label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
05/03/2016 13:22:23: Creating virgin network. 08/16/2016 03:20:12: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network... Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -319,7 +347,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
Validating --> B2 = LearnableParameter() : -> [132 x 1] Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132] Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132] Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
@ -336,70 +364,70 @@ Validating network, final pass.
Post-processing network complete. Post-processing network complete.
05/03/2016 13:22:23: Created model with 25 nodes on CPU. 08/16/2016 03:20:12: Created model with 25 nodes on CPU.
05/03/2016 13:22:23: Training criterion node(s): 08/16/2016 03:20:12: Training criterion node(s):
05/03/2016 13:22:23: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax 08/16/2016 03:20:12: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
05/03/2016 13:22:23: Evaluation criterion node(s): 08/16/2016 03:20:12: Evaluation criterion node(s):
08/16/2016 03:20:12: EvalErrorPrediction = ErrorPrediction
05/03/2016 13:22:23: EvalClassificationError = ClassificationError
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] } { W0*features+B0 : [512 x 1 x *] (gradient)
000000BDD334C430: {[features Value[363 x *]] } W1*H1 : [512 x 1 x *] }
000000BDD334C4D0: {[W0 Value[512 x 363]] } { W0 : [512 x 363] (gradient)
000000BDD334C610: {[MeanOfFeatures Value[363]] } W0*features+B0 : [512 x 1 x *] }
000000BDD334C890: {[B0 Value[512 x 1]] } { H1 : [512 x 1 x *]
000000BDD334CCF0: {[W1 Value[512 x 512]] } W0*features : [512 x *] (gradient) }
000000BDD334CE30: {[B1 Value[512 x 1]] } { W1 : [512 x 512] (gradient)
000000BDD334D1F0: {[InvStdOfFeatures Value[363]] } W1*H1+B1 : [512 x 1 x *] }
000000BDD5BCA080: {[Prior Value[132]] } { H2 : [512 x 1 x *]
000000BDD5BCA120: {[EvalClassificationError Value[1]] } W1*H1 : [512 x 1 x *] (gradient) }
000000BDD5BCA260: {[W2 Value[132 x 512]] } { HLast : [132 x 1 x *]
000000BDD5BCA440: {[labels Value[132 x *]] } W2 : [132 x 512] (gradient) }
000000BDD5BCA6C0: {[MVNormalizedFeatures Value[363 x *]] } { B0 : [512 x 1] (gradient)
000000BDD5BCAE40: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] } H1 : [512 x 1 x *] (gradient)
000000BDD5BCAEE0: {[CrossEntropyWithSoftmax Gradient[1]] } W1*H1+B1 : [512 x 1 x *] (gradient)
000000BDD5BCAF80: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] } W2*H1 : [132 x 1 x *] }
000000BDD5BCB0C0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] } { B1 : [512 x 1] (gradient)
000000BDD5BCB160: {[ScaledLogLikelihood Value[132 x 1 x *]] } H2 : [512 x 1 x *] (gradient)
000000BDD5BCB340: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] } HLast : [132 x 1 x *] (gradient) }
000000BDD5BCB520: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
000000BDD5BCB5C0: {[B2 Gradient[132 x 1]] }
000000BDD5BCB700: {[W0*features Value[512 x *]] }
000000BDD5BCB7A0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
000000BDD5BCB8E0: {[LogOfPrior Value[132]] }
000000BDD5BCB980: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
000000BDD5BCBAC0: {[B2 Value[132 x 1]] }
000000BDD5BCBB60: {[CrossEntropyWithSoftmax Value[1]] }
000000BDD5BCBC00: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
000000BDD5BCBCA0: {[W2*H1 Gradient[132 x 1 x *]] }
05/03/2016 13:22:23: Precomputing --> 3 PreCompute nodes found. 08/16/2016 03:20:12: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
05/03/2016 13:22:23: MeanOfFeatures = Mean() 08/16/2016 03:20:12: Node 'B0' (LearnableParameter operation) : [512 x 1]
05/03/2016 13:22:23: InvStdOfFeatures = InvStdDev() 08/16/2016 03:20:12: Node 'B1' (LearnableParameter operation) : [512 x 1]
05/03/2016 13:22:23: Prior = Mean() 08/16/2016 03:20:12: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:12: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 03:20:12: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 03:20:12: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 03:20:12: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:12: MeanOfFeatures = Mean()
08/16/2016 03:20:12: InvStdOfFeatures = InvStdDev()
08/16/2016 03:20:12: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
05/03/2016 13:22:24: Precomputing --> Completed. 08/16/2016 03:20:15: Precomputing --> Completed.
05/03/2016 13:22:24: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples 08/16/2016 03:20:15: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
05/03/2016 13:22:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. 08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
05/03/2016 13:22:25: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.48531419 * 2048; EvalClassificationError = 0.90722656 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.288909s 08/16/2016 03:20:15: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.46427900 * 2048; EvalErrorPrediction = 0.91259766 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.28059s
05/03/2016 13:22:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn' 08/16/2016 03:20:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
05/03/2016 13:22:25: CNTKCommandTrainEnd: speechTrain 08/16/2016 03:20:15: CNTKCommandTrainEnd: speechTrain
05/03/2016 13:22:25: Action "train" complete. 08/16/2016 03:20:15: Action "train" complete.
05/03/2016 13:22:25: __COMPLETED__ 08/16/2016 03:20:15: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1,18 +1,24 @@
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
------------------------------------------------------------------- -------------------------------------------------------------------
Build info: Build info:
Built time: May 3 2016 13:15:46 Built time: Aug 16 2016 03:09:16
Last modified date: Tue Apr 26 23:35:31 2016 Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release Build type: Release
Build target: GPU Build target: GPU
With 1bit-SGD: no With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1 CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD Build Branch: HEAD
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12 Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on cntk-muc01 Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
------------------------------------------------------------------- -------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded ping [mpihelper]: all 1 nodes responded
05/03/2016 13:22:25: ------------------------------------------------------------------- 08/16/2016 03:20:17: -------------------------------------------------------------------
05/03/2016 13:22:25: Build info: 08/16/2016 03:20:17: Build info:
05/03/2016 13:22:25: Built time: May 3 2016 13:15:46 08/16/2016 03:20:17: Built time: Aug 16 2016 03:09:16
05/03/2016 13:22:25: Last modified date: Tue Apr 26 23:35:31 2016 08/16/2016 03:20:17: Last modified date: Fri Aug 12 05:28:23 2016
05/03/2016 13:22:25: Build type: Release 08/16/2016 03:20:17: Build type: Release
05/03/2016 13:22:25: Build target: GPU 08/16/2016 03:20:17: Build target: GPU
05/03/2016 13:22:25: With 1bit-SGD: no 08/16/2016 03:20:17: With 1bit-SGD: yes
05/03/2016 13:22:25: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5 08/16/2016 03:20:17: Math lib: mkl
05/03/2016 13:22:25: CUB_PATH: c:\src\cub-1.4.1 08/16/2016 03:20:17: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
05/03/2016 13:22:25: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda 08/16/2016 03:20:17: CUB_PATH: c:\src\cub-1.4.1
05/03/2016 13:22:25: Build Branch: HEAD 08/16/2016 03:20:17: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
05/03/2016 13:22:25: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12 08/16/2016 03:20:17: Build Branch: HEAD
05/03/2016 13:22:25: Built by svcphil on cntk-muc01 08/16/2016 03:20:17: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
05/03/2016 13:22:25: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\ 08/16/2016 03:20:17: Built by svcphil on Philly-Pool1
05/03/2016 13:22:25: ------------------------------------------------------------------- 08/16/2016 03:20:17: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:17: -------------------------------------------------------------------
08/16/2016 03:20:19: -------------------------------------------------------------------
08/16/2016 03:20:19: GPU info:
05/03/2016 13:22:25: Running on DPHAIM-22 at 2016/05/03 13:22:25 08/16/2016 03:20:19: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
05/03/2016 13:22:25: Command line: 08/16/2016 03:20:19: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] 08/16/2016 03:20:19: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:19: -------------------------------------------------------------------
08/16/2016 03:20:19: Running on DPHAIM-25 at 2016/08/16 03:20:19
08/16/2016 03:20:19: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:25: RootDir = ".." 08/16/2016 03:20:19: RootDir = ".."
ConfigDir = "$RootDir$/Config" ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data" DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output" OutputDir = "$RootDir$/Output"
@ -65,7 +79,7 @@ speechTrain = [
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -111,35 +125,36 @@ speechTrain = [
] ]
] ]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DeviceId=0 DeviceId=0
timestamping=true timestamping=true
speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]] speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:25: RootDir = ".." 08/16/2016 03:20:19: RootDir = ".."
ConfigDir = "../Config" ConfigDir = "../Config"
DataDir = "../Data" DataDir = "../Data"
OutputDir = "../Output" OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models" ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models"
deviceId = -1 deviceId = -1
command = speechTrain command = speechTrain
precision = "float" precision = "float"
traceLevel = "1" traceLevel = "1"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn" modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
parallelTrain = true parallelTrain = true
speechTrain = [ speechTrain = [
action = "train" action = "train"
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -185,36 +200,37 @@ speechTrain = [
] ]
] ]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DeviceId=0 DeviceId=0
timestamping=true timestamping=true
speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]] speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>> 08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:deviceId=0 configparameters: FeedForward.cntk:deviceId=0
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:parallelTrain=true configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=.. configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:speechTrain=[ configparameters: FeedForward.cntk:speechTrain=[
action = "train" action = "train"
SimpleNetworkBuilder = [ SimpleNetworkBuilder = [
layerSizes = 363:512:512:132 layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax" trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError" evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid" layerTypes = "Sigmoid"
applyMeanVarNorm = true applyMeanVarNorm = true
needPrior = true needPrior = true
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
labelType = "category" labelType = "category"
] ]
] ]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] ] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1 configparameters: FeedForward.cntk:traceLevel=1
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<< 08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: Commands: speechTrain 08/16/2016 03:20:19: Commands: speechTrain
05/03/2016 13:22:25: Precision = "float" 08/16/2016 03:20:19: Precision = "float"
05/03/2016 13:22:25: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn 08/16/2016 03:20:19: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
05/03/2016 13:22:25: CNTKCommandTrainInfo: speechTrain : 1 08/16/2016 03:20:19: CNTKCommandTrainInfo: speechTrain : 1
05/03/2016 13:22:25: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1 08/16/2016 03:20:19: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
05/03/2016 13:22:25: ############################################################################## 08/16/2016 03:20:19: ##############################################################################
05/03/2016 13:22:25: # # 08/16/2016 03:20:19: # #
05/03/2016 13:22:25: # Action "train" # 08/16/2016 03:20:19: # Action "train" #
05/03/2016 13:22:25: # # 08/16/2016 03:20:19: # #
05/03/2016 13:22:25: ############################################################################## 08/16/2016 03:20:19: ##############################################################################
05/03/2016 13:22:25: CNTKCommandTrainBegin: speechTrain 08/16/2016 03:20:19: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using GPU 0 SimpleNetworkBuilder Using GPU 0
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
@ -284,14 +300,26 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
label set 0: 129 classes label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
05/03/2016 13:22:25: Creating virgin network. 08/16/2016 03:20:19: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4 Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network... Post-processing network...
7 roots: 7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax() CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError() EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev() InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean() MeanOfFeatures = Mean()
PosteriorProb = Softmax() PosteriorProb = Softmax()
@ -320,7 +348,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
Validating --> B2 = LearnableParameter() : -> [132 x 1] Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *] Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1] Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *] Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132] Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132] Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
@ -337,70 +365,70 @@ Validating network, final pass.
Post-processing network complete. Post-processing network complete.
05/03/2016 13:22:26: Created model with 25 nodes on GPU 0. 08/16/2016 03:20:20: Created model with 25 nodes on GPU 0.
05/03/2016 13:22:26: Training criterion node(s): 08/16/2016 03:20:20: Training criterion node(s):
05/03/2016 13:22:26: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax 08/16/2016 03:20:20: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
05/03/2016 13:22:26: Evaluation criterion node(s): 08/16/2016 03:20:20: Evaluation criterion node(s):
08/16/2016 03:20:20: EvalErrorPrediction = ErrorPrediction
05/03/2016 13:22:26: EvalClassificationError = ClassificationError
Allocating matrices for forward and/or backward propagation. Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure: Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] } { W0*features+B0 : [512 x 1 x *] (gradient)
00000087D360C610: {[features Value[363 x *]] } W1*H1 : [512 x 1 x *] }
00000087EB4FEEF0: {[W0 Value[512 x 363]] } { H2 : [512 x 1 x *]
00000087EB4FF530: {[B1 Value[512 x 1]] } W1*H1 : [512 x 1 x *] (gradient) }
00000087EB4FF850: {[W1 Value[512 x 512]] } { HLast : [132 x 1 x *]
00000087EB4FFC10: {[W2 Value[132 x 512]] } W2 : [132 x 512] (gradient) }
00000087EB500070: {[B2 Value[132 x 1]] } { W0 : [512 x 363] (gradient)
00000087EB5001B0: {[MeanOfFeatures Value[363]] } W0*features+B0 : [512 x 1 x *] }
00000087EB500250: {[InvStdOfFeatures Value[363]] } { B0 : [512 x 1] (gradient)
00000087EB5004D0: {[B0 Value[512 x 1]] } H1 : [512 x 1 x *] (gradient)
00000087EDA2B150: {[labels Value[132 x *]] } W1*H1+B1 : [512 x 1 x *] (gradient)
00000087EDA2B330: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] } W2*H1 : [132 x 1 x *] }
00000087EDA2B3D0: {[Prior Value[132]] } { H1 : [512 x 1 x *]
00000087EDA2B6F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] } W0*features : [512 x *] (gradient) }
00000087EDA2B8D0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] } { W1 : [512 x 512] (gradient)
00000087EDA2BB50: {[CrossEntropyWithSoftmax Value[1]] } W1*H1+B1 : [512 x 1 x *] }
00000087EDA2BC90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] } { B1 : [512 x 1] (gradient)
00000087EDA2C0F0: {[EvalClassificationError Value[1]] } H2 : [512 x 1 x *] (gradient)
00000087EDA2C190: {[W0*features Value[512 x *]] } HLast : [132 x 1 x *] (gradient) }
00000087EDA2C2D0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
00000087EDA2C370: {[W2*H1 Gradient[132 x 1 x *]] }
00000087EDA2C410: {[B2 Gradient[132 x 1]] }
00000087EDA2C730: {[ScaledLogLikelihood Value[132 x 1 x *]] }
00000087EDA2C7D0: {[LogOfPrior Value[132]] }
00000087EDA2CAF0: {[MVNormalizedFeatures Value[363 x *]] }
00000087EDA2CB90: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
00000087EDA2CCD0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
00000087EDA2CEB0: {[CrossEntropyWithSoftmax Gradient[1]] }
00000087EDA2CFF0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
05/03/2016 13:22:26: Precomputing --> 3 PreCompute nodes found. 08/16/2016 03:20:20: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
05/03/2016 13:22:26: MeanOfFeatures = Mean() 08/16/2016 03:20:20: Node 'B0' (LearnableParameter operation) : [512 x 1]
05/03/2016 13:22:26: InvStdOfFeatures = InvStdDev() 08/16/2016 03:20:20: Node 'B1' (LearnableParameter operation) : [512 x 1]
05/03/2016 13:22:26: Prior = Mean() 08/16/2016 03:20:20: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:20: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 03:20:20: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 03:20:20: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 03:20:20: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:20: MeanOfFeatures = Mean()
08/16/2016 03:20:20: InvStdOfFeatures = InvStdDev()
08/16/2016 03:20:20: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
05/03/2016 13:22:27: Precomputing --> Completed. 08/16/2016 03:20:21: Precomputing --> Completed.
05/03/2016 13:22:27: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples 08/16/2016 03:20:21: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
05/03/2016 13:22:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED. 08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
05/03/2016 13:22:27: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.42832291 * 2048; EvalClassificationError = 0.91357422 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.052947s 08/16/2016 03:20:21: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.05551s
05/03/2016 13:22:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn' 08/16/2016 03:20:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
05/03/2016 13:22:27: CNTKCommandTrainEnd: speechTrain 08/16/2016 03:20:21: CNTKCommandTrainEnd: speechTrain
05/03/2016 13:22:27: Action "train" complete. 08/16/2016 03:20:21: Action "train" complete.
05/03/2016 13:22:27: __COMPLETED__ 08/16/2016 03:20:21: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -5,5 +5,5 @@
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
# cntkrun <CNTK config file name> <additional CNTK args> # cntkrun <CNTK config file name> <additional CNTK args>
cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]" || exit $? cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]" || exit $?

Просмотреть файл

@ -0,0 +1,682 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: Build info:
08/16/2016 10:01:47: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:47: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:47: Build type: release
08/16/2016 10:01:47: Build target: GPU
08/16/2016 10:01:47: With 1bit-SGD: yes
08/16/2016 10:01:47: Math lib: mkl
08/16/2016 10:01:47: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:47: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:47: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:47: Build Branch: HEAD
08/16/2016 10:01:47: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:47: Built by philly on 643085f7f8c2
08/16/2016 10:01:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: GPU info:
08/16/2016 10:01:47: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: Running on localhost at 2016/08/16 10:01:47
08/16/2016 10:01:47: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:47: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:47: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:deviceId=-1
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: Commands: speechTrain
08/16/2016 10:01:47: Precision = "float"
08/16/2016 10:01:47: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
08/16/2016 10:01:47: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:47: ##############################################################################
08/16/2016 10:01:47: # #
08/16/2016 10:01:47: # Action "train" #
08/16/2016 10:01:47: # #
08/16/2016 10:01:47: ##############################################################################
08/16/2016 10:01:47: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using CPU
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 10:01:48: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:48: Created model with 113 nodes on CPU.
08/16/2016 10:01:48: Training criterion node(s):
08/16/2016 10:01:48: ce = CrossEntropyWithSoftmax
08/16/2016 10:01:48: Evaluation criterion node(s):
08/16/2016 10:01:48: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
08/16/2016 10:01:48: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 10:01:48: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:01:48: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 10:01:48: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:48: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:48: featNorm.xMean = Mean()
08/16/2016 10:01:48: featNorm.xStdDev = InvStdDev()
08/16/2016 10:01:48: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:49: Precomputing --> Completed.
08/16/2016 10:01:50: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:50: Starting minibatch loop.
08/16/2016 10:01:53: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87313957 * 160; err = 0.90625000 * 160; time = 3.3910s; samplesPerSecond = 47.2
08/16/2016 10:01:56: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84521751 * 160; err = 0.69375000 * 160; time = 2.9626s; samplesPerSecond = 54.0
08/16/2016 10:01:58: Finished Epoch[ 1 of 1]: [Training] ce = 4.85644356 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=8.39953s
08/16/2016 10:01:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 10:01:59: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:59: Action "train" complete.
08/16/2016 10:01:59: __COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,683 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
08/16/2016 10:02:00: -------------------------------------------------------------------
08/16/2016 10:02:00: Build info:
08/16/2016 10:02:00: Built time: Aug 16 2016 09:41:57
08/16/2016 10:02:00: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:02:00: Build type: release
08/16/2016 10:02:00: Build target: GPU
08/16/2016 10:02:00: With 1bit-SGD: yes
08/16/2016 10:02:00: Math lib: mkl
08/16/2016 10:02:00: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:02:00: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:02:00: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:02:00: Build Branch: HEAD
08/16/2016 10:02:00: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:02:00: Built by philly on 643085f7f8c2
08/16/2016 10:02:00: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:02:00: -------------------------------------------------------------------
08/16/2016 10:02:01: -------------------------------------------------------------------
08/16/2016 10:02:01: GPU info:
08/16/2016 10:02:01: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: -------------------------------------------------------------------
08/16/2016 10:02:01: Running on localhost at 2016/08/16 10:02:01
08/16/2016 10:02:01: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:02:01: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:02:01: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:deviceId=0
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: Commands: speechTrain
08/16/2016 10:02:01: Precision = "float"
08/16/2016 10:02:01: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
08/16/2016 10:02:01: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:02:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:02:01: ##############################################################################
08/16/2016 10:02:01: # #
08/16/2016 10:02:01: # Action "train" #
08/16/2016 10:02:01: # #
08/16/2016 10:02:01: ##############################################################################
08/16/2016 10:02:01: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using GPU 0
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 10:02:01: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:02:01: Created model with 113 nodes on GPU 0.
08/16/2016 10:02:01: Training criterion node(s):
08/16/2016 10:02:01: ce = CrossEntropyWithSoftmax
08/16/2016 10:02:01: Evaluation criterion node(s):
08/16/2016 10:02:01: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
08/16/2016 10:02:01: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 10:02:01: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:02:01: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 10:02:01: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:02:01: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:02:01: featNorm.xMean = Mean()
08/16/2016 10:02:01: featNorm.xStdDev = InvStdDev()
08/16/2016 10:02:01: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:02:02: Precomputing --> Completed.
08/16/2016 10:02:02: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:02:03: Starting minibatch loop.
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 0.5069s; samplesPerSecond = 315.6
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 0.4852s; samplesPerSecond = 329.8
08/16/2016 10:02:04: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=1.33633s
08/16/2016 10:02:04: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 10:02:05: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:02:05: Action "train" complete.
08/16/2016 10:02:05: __COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,681 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
08/16/2016 03:20:22: -------------------------------------------------------------------
08/16/2016 03:20:22: Build info:
08/16/2016 03:20:22: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:22: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:22: Build type: Release
08/16/2016 03:20:22: Build target: GPU
08/16/2016 03:20:22: With 1bit-SGD: yes
08/16/2016 03:20:22: Math lib: mkl
08/16/2016 03:20:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:22: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:22: Build Branch: HEAD
08/16/2016 03:20:22: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:22: Built by svcphil on Philly-Pool1
08/16/2016 03:20:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:22: -------------------------------------------------------------------
08/16/2016 03:20:23: -------------------------------------------------------------------
08/16/2016 03:20:23: GPU info:
08/16/2016 03:20:23: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: -------------------------------------------------------------------
08/16/2016 03:20:23: Running on DPHAIM-25 at 2016/08/16 03:20:23
08/16/2016 03:20:23: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:23: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:23: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:deviceId=-1
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: Commands: speechTrain
08/16/2016 03:20:23: Precision = "float"
08/16/2016 03:20:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
08/16/2016 03:20:23: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:23: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:23: ##############################################################################
08/16/2016 03:20:23: # #
08/16/2016 03:20:23: # Action "train" #
08/16/2016 03:20:23: # #
08/16/2016 03:20:23: ##############################################################################
08/16/2016 03:20:23: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using CPU
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 03:20:24: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 03:20:24: Created model with 113 nodes on CPU.
08/16/2016 03:20:24: Training criterion node(s):
08/16/2016 03:20:24: ce = CrossEntropyWithSoftmax
08/16/2016 03:20:24: Evaluation criterion node(s):
08/16/2016 03:20:24: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
08/16/2016 03:20:24: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 03:20:24: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:24: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 03:20:24: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:24: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:24: featNorm.xMean = Mean()
08/16/2016 03:20:24: featNorm.xStdDev = InvStdDev()
08/16/2016 03:20:24: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 03:20:27: Precomputing --> Completed.
08/16/2016 03:20:28: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 03:20:28: Starting minibatch loop.
08/16/2016 03:20:31: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87950134 * 160; err = 0.90625000 * 160; time = 3.6415s; samplesPerSecond = 43.9
08/16/2016 03:20:35: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84555817 * 160; err = 0.69375000 * 160; time = 3.6742s; samplesPerSecond = 43.5
08/16/2016 03:20:38: Finished Epoch[ 1 of 1]: [Training] ce = 4.85900003 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=9.76851s
08/16/2016 03:20:38: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 03:20:39: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:39: Action "train" complete.
08/16/2016 03:20:39: __COMPLETED__

Просмотреть файл

@ -0,0 +1,682 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
08/16/2016 03:20:41: -------------------------------------------------------------------
08/16/2016 03:20:41: Build info:
08/16/2016 03:20:41: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:41: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:41: Build type: Release
08/16/2016 03:20:41: Build target: GPU
08/16/2016 03:20:41: With 1bit-SGD: yes
08/16/2016 03:20:41: Math lib: mkl
08/16/2016 03:20:41: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:41: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:41: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:41: Build Branch: HEAD
08/16/2016 03:20:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:41: Built by svcphil on Philly-Pool1
08/16/2016 03:20:41: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:41: -------------------------------------------------------------------
08/16/2016 03:20:43: -------------------------------------------------------------------
08/16/2016 03:20:43: GPU info:
08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: -------------------------------------------------------------------
08/16/2016 03:20:43: Running on DPHAIM-25 at 2016/08/16 03:20:43
08/16/2016 03:20:43: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:43: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:43: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:deviceId=0
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: Commands: speechTrain
08/16/2016 03:20:43: Precision = "float"
08/16/2016 03:20:43: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
08/16/2016 03:20:43: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:43: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:43: ##############################################################################
08/16/2016 03:20:43: # #
08/16/2016 03:20:43: # Action "train" #
08/16/2016 03:20:43: # #
08/16/2016 03:20:43: ##############################################################################
08/16/2016 03:20:43: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using GPU 0
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 03:20:43: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 03:20:44: Created model with 113 nodes on GPU 0.
08/16/2016 03:20:44: Training criterion node(s):
08/16/2016 03:20:44: ce = CrossEntropyWithSoftmax
08/16/2016 03:20:44: Evaluation criterion node(s):
08/16/2016 03:20:44: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
08/16/2016 03:20:44: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 03:20:44: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:44: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 03:20:44: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:44: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:44: featNorm.xMean = Mean()
08/16/2016 03:20:44: featNorm.xStdDev = InvStdDev()
08/16/2016 03:20:44: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 03:20:45: Precomputing --> Completed.
08/16/2016 03:20:46: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 03:20:46: Starting minibatch loop.
08/16/2016 03:20:47: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 1.1338s; samplesPerSecond = 141.1
08/16/2016 03:20:48: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 1.0409s; samplesPerSecond = 153.7
08/16/2016 03:20:49: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=2.90303s
08/16/2016 03:20:50: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 03:20:51: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:51: Action "train" complete.
08/16/2016 03:20:51: __COMPLETED__

Просмотреть файл

@ -5,5 +5,5 @@
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
# cntkrun <CNTK config file name> <additional CNTK args> # cntkrun <CNTK config file name> <additional CNTK args>
cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] parallelTrain=false" || exit $? cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false" || exit $?

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,4 +6,4 @@
. $TEST_DIR/../run-timit-test-common . $TEST_DIR/../run-timit-test-common
# cntkrun <CNTK config file name> <additional CNTK arg> # cntkrun <CNTK config file name> <additional CNTK arg>
cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments" || exit $? cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments TIMIT_TrainAdaptLR=[reader=[useMersenneTwisterRand=true]] TIMIT_TrainAdaptLR=[cvReader=[useMersenneTwisterRand=true]]" || exit $?

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,7 +6,7 @@
. $TEST_DIR/../run-timit-test-common . $TEST_DIR/../run-timit-test-common
# Train: # Train:
cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments" || exit $? cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments TIMIT_TrainSimple=[reader=[useMersenneTwisterRand=true]]" || exit $?
# Validate: # Validate:
cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $? cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $?

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше