merged from master. Undid the ClassificationError baseline updates due to merge conflicts

This commit is contained in:
Frank Seide 2016-08-22 14:36:28 -07:00
Родитель 1e68b3c289 8493f118da
Коммит 5b969bac70
380 изменённых файлов: 292772 добавлений и 1787443 удалений

Просмотреть файл

@ -34,48 +34,29 @@
<UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip>
</PropertyGroup>
<Choose>
<When Condition="Exists('$(ACML_PATH)')">
<PropertyGroup>
<MathLibrary>ACML</MathLibrary>
<MathLibraryName>ACML</MathLibraryName>
<MathIncludePath>$(ACML_PATH)\include</MathIncludePath>
<MathLibraryPath>$(ACML_PATH)\lib</MathLibraryPath>
<MathLinkLibrary>libacml_mp_dll.lib</MathLinkLibrary>
<MathDelayLoad>libacml_mp_dll.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(ACML_PATH)\lib\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)libacml_mp_dll.dll;$(OutDir)libifcoremd.dll;$(OutDir)libifportmd.dll;$(OutDir)libiomp*.dll;$(OutDir)libmmd.dll;$(OutDir)svml_dispmd.dll;</UnitTestDlls>
<MathDefine>USE_ACML</MathDefine>
</PropertyGroup>
</When>
<!-- See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl on how to configure to build CNTK with MKL -->
<When Condition="'$(CNTK_MKL)' == '1'">
<PropertyGroup>
<MathLibrary>MKL</MathLibrary>
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
<MathDefine>USE_MKL</MathDefine>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
</PropertyGroup>
</When>
</Choose>
<PropertyGroup>
<MathLibrary>MKL</MathLibrary>
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
<MathDefine>USE_MKL</MathDefine>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
</PropertyGroup>
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
</PropertyGroup>
<PropertyGroup Condition="$(UseZip)">
<ZipInclude>$(ZLIB_PATH)\include;$(ZLIB_PATH)\lib\libzip\include;</ZipInclude>

Просмотреть файл

@ -1150,6 +1150,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Examples\E
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\UnitTests\BrainScriptTests\BrainScriptTests.vcxproj", "{9F999212-AFC5-4EAC-AA78-F7247D46C456}"
ProjectSection(ProjectDependencies) = postProject
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
EndProjectSection
EndProject

Просмотреть файл

@ -9,8 +9,6 @@
# that provides
# BUILDTYPE= One of release or debug
# defaults to release
# ACML_PATH= path to ACML library installation
# only needed if MATHLIB=acml
# MKL_PATH= path to CNTK custom MKL installation
# only needed if MATHLIB=mkl
# CNTK_CUSTOM_MKL_VERSION=2
@ -21,8 +19,8 @@
# defaults to /usr/include/nvidia/gdk
# GDK_NVML_LIB_PATH= path to CUDA GDK (stub) library path, so $(GDK_NVML_LIB_PATH)/libnvidia-ml.so exists
# defaults to /usr/src/gdk/nvml/lib
# MATHLIB= One of acml or mkl
# defaults to acml
# MATHLIB= mkl
# defaults to mkl
# CUDA_PATH= Path to CUDA
# If not specified, GPU will not be enabled
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
@ -60,8 +58,8 @@ BUILDTYPE=release
endif
ifndef MATHLIB
$(info DEFAULTING MATHLIB=acml)
MATHLIB = acml
$(info DEFAULTING MATHLIB=mkl)
MATHLIB = mkl
endif
#### Configure based on options above
@ -137,13 +135,6 @@ else
COMMON_FLAGS +=-DCPUONLY
endif
ifeq ("$(MATHLIB)","acml")
INCLUDEPATH += $(ACML_PATH)/include
LIBPATH += $(ACML_PATH)/lib
LIBS += -lacml_mp -liomp5 -lm -lpthread
COMMON_FLAGS += -DUSE_ACML
endif
ifeq ("$(MATHLIB)","mkl")
INCLUDEPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/include
LIBS += -lm
@ -418,6 +409,10 @@ CNTKLIBRARY_TESTS_SRC =\
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \
Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp \
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp \
Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \
Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))
@ -933,22 +928,24 @@ UNITTEST_BRAINSCRIPT_SRC = \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ParserTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ComputationNetworkTests.cpp \
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/stdafx.cpp
UNITTEST_BRAINSCRIPT_SRC+=$(COMMON_SRC)
UNITTEST_BRAINSCRIPT_SRC += $(COMPUTATION_NETWORK_LIB_SRC)
UNITTEST_BRAINSCRIPT_SRC += $(SEQUENCE_TRAINING_LIB_SRC)
UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC))
UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC)))
UNITTEST_BRAINSCRIPT := $(BINDIR)/brainscripttests
ALL += $(UNITTEST_BRAINSCRIPT)
SRC += $(UNITTEST_BRAINSCRIPT_SRC)
$(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ)
$(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) | $(CNTKMATH_LIB)
@echo $(SEPARATOR)
@mkdir -p $(dir $@)
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH)
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT)

Просмотреть файл

@ -72,18 +72,6 @@ void DoTrain(const ConfigRecordType& config)
bool makeMode = config(L"makeMode", true);
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
// determine the network-creation function
// We have several ways to create that network.
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
auto dataReader = CreateObject<DataReader>(config, L"reader");
shared_ptr<DataReader> cvDataReader;
if (config.Exists(L"cvReader"))
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
shared_ptr<SGD<ElemType>> optimizer;
if (config.Exists(L"optimizer"))
{
@ -95,8 +83,39 @@ void DoTrain(const ConfigRecordType& config)
optimizer = make_shared<SGD<ElemType>>(configSGD);
}
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
int startEpoch = optimizer->DetermineStartEpoch(makeMode);
if (startEpoch == optimizer->GetMaxEpochs())
{
LOGPRINTF(stderr, "No further training is necessary.\n");
return;
}
wstring modelFileName = optimizer->GetModelNameForEpoch(int(startEpoch) - 1);
bool loadNetworkFromCheckpoint = startEpoch >= 0;
fprintf(stderr, "\n");
if (loadNetworkFromCheckpoint)
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
else
LOGPRINTF(stderr, "Creating virgin network.\n");
// determine the network-creation function
// We have several ways to create that network.
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
// create or load from checkpoint
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
auto dataReader = CreateObject<DataReader>(config, L"reader");
shared_ptr<DataReader> cvDataReader;
if (config.Exists(L"cvReader"))
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
optimizer->InitMPI(MPIWrapper::GetInstance());
optimizer->Train(createNetworkFn, deviceId, dataReader.get(), cvDataReader.get(), makeMode);
optimizer->Train(net, deviceId, dataReader.get(), cvDataReader.get(), startEpoch, loadNetworkFromCheckpoint);
}
namespace Microsoft { namespace MSR { namespace ScriptableObjects {
@ -189,9 +208,8 @@ void DoDumpNodes(const ConfigParameters& config)
if (!printValues && !printMetadata)
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
ComputationNetwork net(CPUDEVICE); // always use CPU
net.Load<ElemType>(modelPath); // TODO: we have a function now to combine this and the previous line
net.DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
}
template void DoDumpNodes<float>(const ConfigParameters& config);

Просмотреть файл

@ -10,7 +10,7 @@
#include "stdafx.h"
#ifdef _WIN32
#include <crtdbg.h>
#endif
#endif
#include "Basics.h"
#include "Actions.h"

Просмотреть файл

@ -53,8 +53,6 @@ if "%p_CNTK_MKL%" == "1" (
) else (
echo #define _MATHLIB_ "mkl">> buildinfo.h$$
)
) else (
echo #define _MATHLIB_ "acml">> buildinfo.h$$
)
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$

Просмотреть файл

@ -7,6 +7,12 @@
#pragma once
#ifdef SWIG
#define final
#define explicit
#define static_assert(condition, message)
#endif
#include "CNTKLibraryInternals.h"
#include <memory>
@ -14,10 +20,12 @@
#include <array>
#include <stdarg.h>
#include <assert.h>
#include <map>
#include <unordered_map>
#include <unordered_set>
#include <string>
#include <sstream>
#include <iosfwd>
#include<algorithm>
namespace CNTK
@ -236,7 +244,7 @@ namespace CNTK
}
///
/// Creates and returns a new shape contructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
/// Creates and returns a new shape constructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
///
NDShape AppendShape(const NDShape& shape) const
{
@ -665,35 +673,52 @@ namespace CNTK
///
/// Denotes an Axis of a Variable and is used for specifying the axes parameters of certain Functions such as reductions.
/// Besides the static axes corresponding to each of the axes of the Variable's shape, Input and Output Variables
/// also have one or more dynamic axes (corresponding to the sequence dimensions) and one implicit batch axis denoting the axes
/// along which multiple sequences are batched in the Values corresponding to the variable when performing computations.
/// Besides the static axes corresponding to each of the axes of the Variable's shape, Variables of kind 'Input' and any
/// 'Output' Variables dependent on an 'Input' Variable also have 2 additional dynamic axes whose dimensions are known only
/// when the Variable is bound to actual data during compute (viz. sequence axis and batch axis denoting the axis along which
/// multiple sequences are batched)
///
class Axis final
{
CNTK_API static const std::wstring s_staticAxisNamePrefix;
public:
///
/// Construct an Axis object denoting a static axis with the specified index.
///
Axis(size_t staticAxisIdx)
explicit Axis(size_t staticAxisIdx)
: m_staticAxisIdx(staticAxisIdx)
{
const wchar_t* staticAxisNamePrefix = L"staticAxis_";
m_name = staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
m_name = s_staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
}
///
/// Construct a dynamic axis with the specified name.
///
Axis(const std::wstring& name)
explicit Axis(const std::wstring& name)
: m_staticAxisIdx(SIZE_MAX), m_name(name)
{
if (m_name.length() > s_staticAxisNamePrefix.length())
{
auto prefix = m_name.substr(0, s_staticAxisNamePrefix.length());
auto suffix = m_name.substr(s_staticAxisNamePrefix.length(), m_name.length() - s_staticAxisNamePrefix.length());
if (prefix == s_staticAxisNamePrefix)
{
if (suffix == L"0")
*this = Axis(0);
else
{
auto suffixVal = std::stoul(suffix);
if (suffixVal != 0)
*this = Axis(suffixVal);
}
}
}
}
///
/// Returns a boolean indicating if 'this' Axis corresponds to a static axis
///
bool IsStaticAxis() const { return m_staticAxisIdx == SIZE_MAX; }
bool IsStaticAxis() const { return m_staticAxisIdx != SIZE_MAX; }
///
/// Returns the axis index if 'this' Axis is a static axis. Throws an exception otherwise.
@ -714,12 +739,7 @@ namespace CNTK
///
/// Static Axis object representing the batch axis.
///
CNTK_API static const Axis& BatchAxis();
///
/// Special Axis object denoting all the axes of the Value object in whose context it is used.
///
CNTK_API static const Axis& AllAxes();
CNTK_API static const Axis& DefaultBatchAxis();
///
/// Name of 'this' axis
@ -753,7 +773,20 @@ namespace CNTK
{
return !(first == second);
}
}
namespace std {
template <> struct hash<CNTK::Axis>
{
size_t operator()(const CNTK::Axis& x) const
{
return std::hash<std::wstring>()(x.Name());
}
};
}
namespace CNTK
{
///
/// Enumeration type denoting the kind of a symbolic Variable object
///
@ -780,47 +813,76 @@ namespace CNTK
template <typename T>
friend struct std::hash;
CNTK_API static const std::vector<Axis> s_defaultInputVariableDynamicAxes;
public:
///
/// Create an 'Input' Variable.
///
Variable(const NDShape& shape, CNTK::DataType dataType)
: Variable(shape, dataType, L"")
Variable(const NDShape& shape, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, dataType, L"", dynamicAxes)
{}
///
/// Create an 'Input' Variable.
///
Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name)
: Variable(shape, dataType, std::wstring(name))
Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, dataType, std::wstring(name), dynamicAxes)
{}
///
/// Create an 'Input' Variable.
///
Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, false, name)
Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, false, dataType, name, dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data.
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name = L"")
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, isSparse, name)
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, false, L"", dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data.
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, std::wstring(name), dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data.
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, false, name, dynamicAxes)
{}
///
/// Create an 'Input' Variable and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"")
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, false, name)
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, dataType, needsGradient, std::wstring(name), dynamicAxes)
{}
///
/// Create an 'Input' Variable and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, false, dataType, needsGradient, name, dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"")
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, isSparse, name)
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, isSparse, dataType, needsGradient, L"", dynamicAxes)
{}
///
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
///
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, dynamicAxes, isSparse, name)
{}
///
@ -860,7 +922,7 @@ namespace CNTK
///
/// Returns a boolean value indicating if 'this' variable denotes sparse data
///
bool IsSparse() const { return (m_dataFields->m_isSparse); }
bool IsSparse() const { return m_dataFields->m_isSparse; }
///
/// Returns a boolean value indicating if 'this' variable is an Input
@ -941,6 +1003,14 @@ namespace CNTK
VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector<Axis>& dynamicAxes, bool isSparse, const std::wstring& name)
: m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_dynamicAxes(dynamicAxes), m_isSparse(isSparse), m_name(name)
{
// Validate that each of the dynamic axes are unique
std::unordered_set<Axis> uniqueDynamicAxis;
for (auto& currentDynamicAxis : dynamicAxes)
{
auto retVal = uniqueDynamicAxis.insert(currentDynamicAxis);
if (!retVal.second)
InvalidArgument("Dynamic axis named %S is specified more than once for Variable object", currentDynamicAxis.Name().c_str());
}
}
private:
@ -1079,7 +1149,7 @@ namespace CNTK
/// Contruct a Placeholder with the specified NDShape
///
explicit Placeholder(const NDShape& shape, const std::wstring& name = L"")
: Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, {Axis::DefaultDynamicAxis()}, name)
: Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() }, name)
{}
///
@ -1097,13 +1167,15 @@ namespace CNTK
}
namespace std {
template <> struct hash<CNTK::Axis>
template <> struct hash<CNTK::NDShape>
{
size_t operator()(const CNTK::Axis& x) const
size_t operator()(const CNTK::NDShape& x) const
{
return std::hash<std::wstring>()(x.Name());
return std::hash<std::wstring>()(x.AsString());
}
};
template <> struct hash<CNTK::Variable>
{
@ -1441,6 +1513,21 @@ namespace CNTK
///
CNTK_API FunctionPtr Softmax(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in hardmax operation on specified tensor input operand
///
CNTK_API FunctionPtr Hardmax(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in transpose dimensions operation on specified tensor input operand
///
CNTK_API FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name = L"");
///
/// Create an instance of the slice operation on specified tensor input operand
///
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in elementwise tensor addition operation with the specified input operands.
///
@ -1497,6 +1584,13 @@ namespace CNTK
///
CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in matrix multiplication operation with the transpose of the left input operand
/// and the specified right operand. Only accepts left operands of ranks 1 or 2.
/// TODO: Specify the constraints on the shapes of the operands.
///
CNTK_API FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
///
@ -1518,7 +1612,6 @@ namespace CNTK
///
CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L"");
//CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, Axis axis, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in operation for getting the future value along the lone dynamic axis of the specified operand.
@ -1532,6 +1625,16 @@ namespace CNTK
///
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in sum reduction operation on specified tensor input operand along the specified axis
///
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
///
/// Create an instance of the CNTK built-in LogSum reduction operation on specified tensor input operand along the specified axis
///
CNTK_API FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
///
/// Per dimension mean-variance normalization of the specified input operand.
///
@ -1630,6 +1733,7 @@ namespace CNTK
NDShape,
Vector,
Dictionary,
NDArrayView,
};
static const char* TypeName(Type type)
@ -1654,6 +1758,8 @@ namespace CNTK
return "Vector";
case Type::Dictionary:
return "Dictionary";
case Type::NDArrayView:
return "NDArrayView";
default:
LogicError("Unknown DictionaryValue::Type");
}
@ -1687,13 +1793,21 @@ namespace CNTK
DictionaryValue(const wchar_t* value)
: DictionaryValue(std::wstring(value))
{}
// Due to SWIG we had to flatten this template for vector<DictionaryValue>
DictionaryValue(const std::vector<CNTK::DictionaryValue>& value) : m_valueType(GetValueType<std::vector<CNTK::DictionaryValue>>())
{
AllocateDataPtr(value);
}
template <typename T>
DictionaryValue(const T& value) : m_valueType(GetValueType<T>())
{
static_assert(std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value,
static_assert((std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType");
AllocateDataPtr(value);
@ -1706,6 +1820,12 @@ namespace CNTK
*this = other;
}
DictionaryValue(DictionaryValue&& other) : m_valueType(Type::Bool)
{
// The m_valueType must have been set to a non-ptr type to prevent an attempt to interpret
// the underlying underlying uninitialized value as a ptr and free it.
*this = std::move(other);
}
DictionaryValue& operator=(const DictionaryValue& other)
{
if (this != &other)
@ -1723,11 +1843,33 @@ namespace CNTK
AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>());
else if (other.m_valueType == Type::Dictionary)
AllocateDataPtr(other.GetValue<Dictionary>());
else if (other.m_valueType == Type::NDArrayView)
AllocateDataPtr(other.GetValue<NDArrayView>());
}
return *this;
}
DictionaryValue& operator=(DictionaryValue&& other)
{
FreeDataPtr();
m_valueType = other.m_valueType;
m_data = other.m_data;
if (other.m_valueType == Type::String ||
other.m_valueType == Type::NDShape ||
other.m_valueType == Type::Vector ||
other.m_valueType == Type::Dictionary ||
other.m_valueType == Type::NDArrayView)
{
other.m_data.m_ptr = nullptr;
}
other.m_valueType = Type::None;
return *this;
}
~DictionaryValue()
{
FreeDataPtr();
@ -1764,7 +1906,8 @@ namespace CNTK
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value>::type* = nullptr>
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value>::type* = nullptr>
const T& GetValue() const
{
VerifyType<T>();
@ -1781,21 +1924,25 @@ namespace CNTK
return m_valueType;
}
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us);
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us);
CNTK_API bool operator==(const DictionaryValue& other) const;
CNTK_API bool operator!=(const DictionaryValue& other) const;
friend CNTK_API std::istream& operator>>(std::istream& stream, DictionaryValue& us);
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const DictionaryValue& us);
private:
template <typename T>
static Type GetValueType()
{
static_assert(std::is_same<T, bool>::value ||
static_assert((std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value ||
std::is_same<T, NDShape>::value ||
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value,
std::is_same<T, std::vector<DictionaryValue>>::value ||
std::is_same<T, Dictionary>::value ||
std::is_same<T, NDArrayView>::value),
"Unsupported ValueType");
if (std::is_same<T, bool>::value) return Type::Bool;
@ -1806,6 +1953,7 @@ namespace CNTK
if (std::is_same<T, NDShape>::value) return Type::NDShape;
if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector;
if (std::is_same<T, Dictionary>::value) return Type::Dictionary;
if (std::is_same<T, NDArrayView>::value) return Type::NDArrayView;
}
template <typename T>
@ -1831,6 +1979,8 @@ namespace CNTK
FreePtrAsType<std::vector<DictionaryValue>>();
else if (m_valueType == Type::Dictionary)
FreePtrAsType<Dictionary>();
else if (m_valueType == Type::Dictionary)
FreePtrAsType<NDArrayView>();
}
Type m_valueType;
@ -1884,9 +2034,11 @@ namespace CNTK
return Contains(key.c_str());
}
CNTK_API bool operator==(const Dictionary& other) const;
CNTK_API bool operator!=(const Dictionary& other) const;
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us);
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us);
friend CNTK_API std::istream& operator>>(std::istream& stream, Dictionary& us);
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const Dictionary& us);
private:
std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData;
@ -1924,6 +2076,9 @@ namespace CNTK
///
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {}
///
/// Destruct this Learner.
///
virtual ~Learner() {}
protected:
@ -1935,37 +2090,127 @@ namespace CNTK
};
///
/// A collection of key-value pairs that represents training parameter schedule in
/// terms of the number of processed samples.
/// This class provides a number of convenience constructors to allow easy conversion
/// from a single value, a vector of values and a list of pairs to the training schedule.
///
template <typename T>
class TrainingParameterSchedule
{
public:
///
/// Create a schedule with a constant parameter value.
///
TrainingParameterSchedule(T value)
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
{}
///
/// Create a schedule where the parameter changes its value every 'unit' samples:
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
/// and so on. The last value is then used repeatedly until the end of training.
///
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 1;
for (const auto& value : schedule)
{
m_schedule[m_unit * i++] = value;
}
}
///
/// Create a schedule using the list of key-value pairs, where the key specifies
/// the number of 'units' the parameter should maintain the corresponding value.
/// The value from the last pair is used repeatedly until the end of training.
/// For example, {{1, 0.05}, {2, 0.1}, {1, 0.005}} and unit = 100, corresponds to
/// a schedule where the value of '0.05' is used for the first 100 samples, then
/// '0.1' is used for the second 200 samples, after which the values is switched
/// to '0.005'.
///
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
: m_unit(unit)
{
// TODO: 0 will be used to mean "the entire sweep"
if (unit == 0)
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
if (schedule.size() == 0)
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
size_t i = 0;
for (const auto& it : schedule)
{
if (it.first == 0)
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
i += it.first;
m_schedule[m_unit * i] = it.second;
}
}
///
/// Returns a value corresponding to the absolute sample count from the beginning of training.
///
CNTK_API const T& operator[](size_t samleCount) const;
private:
std::map<size_t, T> m_schedule;
size_t m_unit;
};
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
typedef TrainingParameterSchedule<double> MomentumsPerSample;
///
/// Create an instance of the CNTK built-in SGD learner.
///
/// TODO: add additional SGD parameters here (a collection of learning rate values)
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters, double learningRatePerSample);
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates);
///
/// Create an instance of the CNTK built-in Momentum SGD learner.
///
/// TODO: add additional Momentum parameters here (a collection of momentum rate values)
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
///
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in AdaGrad learner.
///
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier = true);
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier = true);
///
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
///
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters);
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
///
/// Create an instance of the CNTK built-in RMSProp learner.
///
CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
double gamma,
double inc,
double dec,
@ -1975,7 +2220,7 @@ namespace CNTK
///
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
/// using the specified learners and training data either explicilty supplied as Value objects or from
/// using the specified learners and training data either explicitly supplied as Value objects or from
/// a MinibatchSource object.
///
class Trainer
@ -2063,7 +2308,7 @@ namespace CNTK
};
///
/// Abstraction for generating minbatches of samples for training/evaluation.
/// Abstraction for generating minibatches of samples for training/evaluation.
///
class MinibatchSource : public std::enable_shared_from_this<MinibatchSource>
{
@ -2079,10 +2324,14 @@ namespace CNTK
/// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual
/// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the
/// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream.
/// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function.
/// The return value indicates if the MinibatchSource will return any further data in subsequent calls of this function.
///
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0;
///
/// Destruct this MinibatchSource.
///
virtual ~MinibatchSource() {}
// TODO: Methods to save and restore from checkpoints

Просмотреть файл

@ -7,6 +7,12 @@
#pragma once
#ifdef SWIG
#define final
#define explicit
#define static_assert(condition, message)
#endif
#ifdef _WIN32
#ifdef CNTKV2LIBRARYDLL
#define CNTK_API __declspec(dllexport)
@ -47,8 +53,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
template <typename ElementType>
class ComputationNode;
class File;
}}}
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
@ -133,7 +137,7 @@ namespace CNTK
#define NOT_IMPLEMENTED \
{ \
fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
CNTK::LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
}
#endif
}
@ -144,6 +148,7 @@ namespace CNTK
class CompositeFunction;
class Function;
class Variable;
class Axis;
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
// that objects are deleted on the same side of the library DLL where they are allocated
@ -174,4 +179,15 @@ namespace CNTK
class MinibatchSource;
typedef std::shared_ptr<MinibatchSource> MinibatchSourcePtr;
namespace Internal
{
CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L"");
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");
CNTK_API FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
CNTK_API FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name = L"");
}
}

Просмотреть файл

@ -15,6 +15,7 @@
#include "RecurrentNodes.h"
#include "EvaluationNodes.h"
#include "TrainingNodes.h"
#include "ReshapingNodes.h"
using namespace Microsoft::MSR::CNTK;
@ -32,6 +33,7 @@ namespace CNTK
Variable var;
NDShape varShape = AsNDShape(node->GetSampleLayout());
// The CNTK sample layouts may have trailing axes with dimension size of 1 which are automatically
// added when converting from NDShape to CNTK internal TensorShapes and are not present in the original
// shapes specified by the user. These should be truncated.
@ -57,11 +59,10 @@ namespace CNTK
if (node->HasMBLayout())
{
// TODO: Currently only default dynamic axis is supported
const std::wstring defaultCNTKDynamicAxisName = L"";
if (inputNode->GetRequestedDynamicAxis() != defaultCNTKDynamicAxisName)
LogicError("Found dynamic axis named '%S' while currently only default dynamic axis named '%S' is supported!", node->GetMBLayout()->GetAxisName(), defaultCNTKDynamicAxisName.c_str());
auto inputNodeInternalDynamicAxisName = inputNode->GetRequestedDynamicAxis();
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName());
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName(), inputVarDynamicAxes);
}
else
{
@ -121,6 +122,40 @@ namespace CNTK
opType = PrimitiveOpType::Reciprocal;
else if (node->OperationName() == OperationNameOf(SoftmaxNode))
opType = PrimitiveOpType::Softmax;
else if (node->OperationName() == OperationNameOf(HardmaxNode))
opType = PrimitiveOpType::Hardmax;
else if (node->OperationName() == OperationNameOf(TransposeDimensionsNode))
{
auto transposeDimensionsNode = node->As<TransposeDimensionsNode<ElementType>>();
primitiveFunctionConfigParameters[L"axis1"] = (size_t)transposeDimensionsNode->Axis1();
primitiveFunctionConfigParameters[L"axis2"] = (size_t)transposeDimensionsNode->Axis2();
opType = PrimitiveOpType::TransposeAxes;
}
else if (node->OperationName() == OperationNameOf(WhereNode))
{
auto whereNode = node->As<WhereNode<ElementType>>();
auto internalDynamicAxisName = whereNode->DynamicAxisName();
std::vector<Axis> dynamicAxes = DynamicAxesFromInternalDynamicAxisName(internalDynamicAxisName);
std::vector<std::wstring> dynamicAxesNames;
for (auto axis : dynamicAxes)
dynamicAxesNames.push_back(axis.Name());
primitiveFunctionConfigParameters[L"newDynamicAxes"] = AsDictionaryValueVector(dynamicAxesNames);
opType = PrimitiveOpType::Where;
}
else if (node->OperationName() == OperationNameOf(SliceNode))
{
auto sliceNode = node->As<SliceNode<ElementType>>();
primitiveFunctionConfigParameters[L"axis"] = Axis(sliceNode->Axis() - 1).Name();
primitiveFunctionConfigParameters[L"beginIndex"] = sliceNode->BeginIndex();
primitiveFunctionConfigParameters[L"endIndex"] = sliceNode->EndIndex();
opType = PrimitiveOpType::Slice;
}
else if (node->OperationName() == OperationNameOf(SumElementsNode))
opType = PrimitiveOpType::SumAll;
else if (node->OperationName() == OperationNameOf(PlusNode))
opType = PrimitiveOpType::Plus;
else if (node->OperationName() == OperationNameOf(MinusNode))
@ -139,11 +174,23 @@ namespace CNTK
opType = PrimitiveOpType::Greater;
else if (node->OperationName() == OperationNameOf(GreaterEqualNode))
opType = PrimitiveOpType::GreaterEqual;
else if (node->OperationName() == OperationNameOf(PackedIndexNode))
opType = PrimitiveOpType::PackedIndex;
else if (node->OperationName() == OperationNameOf(GatherPackedNode))
{
std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::GatherPacked;
}
else if (node->OperationName() == OperationNameOf(TimesNode))
{
primitiveFunctionConfigParameters[L"numOutputAxes"] = DictionaryValue((size_t)node->As<TimesNode<ElementType>>()->OutputRank());
primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TimesNode<ElementType>>()->OutputRank();
opType = PrimitiveOpType::Times;
}
else if (node->OperationName() == OperationNameOf(TransposeTimesNode))
{
primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TransposeTimesNode<ElementType>>()->OutputRank();
opType = PrimitiveOpType::TransposeTimes;
}
else if (node->OperationName() == OperationNameOf(PastValueNode))
{
if (inputVars.size() == 1)
@ -151,7 +198,7 @@ namespace CNTK
auto initialStateVar = Constant({}, node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.insert(inputVars.begin(), initialStateVar);
}
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<PastValueNode<ElementType>>()->TimeStep());
primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<PastValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::PastValue;
}
else if (node->OperationName() == OperationNameOf(FutureValueNode))
@ -161,7 +208,7 @@ namespace CNTK
auto initialStateVar = Constant({}, node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
inputVars.insert(inputVars.begin(), initialStateVar);
}
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<FutureValueNode<ElementType>>()->TimeStep());
primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
opType = PrimitiveOpType::FutureValue;
}
else if (node->OperationName() == OperationNameOf(SquareErrorNode))
@ -176,8 +223,14 @@ namespace CNTK
std::swap(inputVars[0], inputVars[1]);
opType = PrimitiveOpType::ClassificationError;
}
else if (node->OperationName() == OperationNameOf(SumElementsNode))
opType = PrimitiveOpType::ReduceSum;
else if (node->OperationName() == OperationNameOf(ReduceElementsNode))
{
auto reduceElementsNode = node->As<ReduceElementsNode<ElementType>>();
primitiveFunctionConfigParameters[L"CNTKInternalReductionAxisIndex"] = (size_t)reduceElementsNode->ReductionAxis();
primitiveFunctionConfigParameters[L"ReductionOpName"] = reduceElementsNode->ReductionOpName();
opType = PrimitiveOpType::ReduceElements;
}
else if (node->OperationName() == OperationNameOf(ConvolutionNode))
{
auto convolutionNode = node->As<ConvolutionNode<ElementType>>();

Просмотреть файл

@ -14,21 +14,17 @@ namespace CNTK
return GPUDevice(0);
}
/*static*/ const std::wstring Axis::s_staticAxisNamePrefix = L"staticAxis_";
/*static*/ const Axis& Axis::DefaultDynamicAxis()
{
static Axis s_defaultDynamicAxis(L"defaultDynamicAxis");
return s_defaultDynamicAxis;
}
/*static*/ const Axis& Axis::BatchAxis()
/*static*/ const Axis& Axis::DefaultBatchAxis()
{
static Axis s_batchAxis(L"batchAxis");
static Axis s_batchAxis(L"defaultBatchAxis");
return s_batchAxis;
}
/*static*/ const Axis& Axis::AllAxes()
{
static Axis s_allAxes(L"allAxes");
return s_allAxes;
}
}

Просмотреть файл

@ -10,6 +10,10 @@
#include "Utils.h"
#include "ComputationNode.h"
#include "ReshapingNodes.h"
#include "EvaluationNodes.h"
#include "TrainingNodes.h"
#include "LinearAlgebraNodes.h"
#include "InputAndParamNodes.h"
using namespace Microsoft::MSR::CNTK;
@ -72,6 +76,17 @@ namespace CNTK
}
}
/*static*/ const std::wstring PrimitiveFunction::InternalSumReductionOpName = L"Sum";
/*static*/ const std::wstring PrimitiveFunction::InternalLogSumReductionOpName = L"LogSum";
/*static*/ const std::wstring PrimitiveFunction::InternalMeanReductionOpName = L"Mean";
/*static*/ const std::wstring PrimitiveFunction::InternalMaxReductionOpName = L"Max";
/*static*/ const std::wstring PrimitiveFunction::InternalMinReductionOpName = L"Min";
/*static*/ const std::wstring PrimitiveFunction::InternalAllReductionOpName = L"All";
/*static*/ const std::wstring PrimitiveFunction::InternalAnyReductionOpName = L"Any";
/*static*/ std::wstring CompositeFunction::s_internalDefaultDynamicAxisName = L"";
/*static*/ std::wstring CompositeFunction::s_internalNoSequenceAxisName = L"noSequenceAxis";
// Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables
// should have been replaced before performing any Forward compute of 'this' Function.
/*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
@ -122,22 +137,46 @@ namespace CNTK
computationNodePtr->SetLearningRateMultiplier(0.0);
NDArrayViewPtr value = variable.IsConstant() ? Constant(variable).Value() : Parameter(variable).Value();
auto matrix = variable.IsConstant() ? value->GetMatrix<ElementType>()->AsReference() : value->GetWritableMatrix<ElementType>()->AsReference();
computationNodePtr->Value() = std::move(matrix);
std::shared_ptr<const Matrix<ElementType>> valueMatrix = variable.IsConstant() ? value->GetMatrix<ElementType>() : value->GetWritableMatrix<ElementType>();
if (variable.IsParameter() || (valueMatrix->GetDeviceId() == network->GetDeviceId()))
computationNodePtr->Value() = valueMatrix->AsReference();
else
{
Matrix<ElementType> clonedMatrix(valueMatrix->GetNumRows(), valueMatrix->GetNumCols(), network->GetDeviceId(), valueMatrix->GetMatrixType(), valueMatrix->GetFormat());
clonedMatrix.AssignValuesOf(*valueMatrix);
computationNodePtr->Value() = std::move(clonedMatrix);
}
}
else if (variable.IsInput())
{
// TODO: Support inputs with > 1 dynamic axes
if (variable.DynamicAxes().size() != 1)
LogicError("Currently only Input variables with one dynamic axis are supported");
// TODO: Input variables currently are required to have the default batch axis
auto dynamicAxes = variable.DynamicAxes();
auto foundDefaultBatchAxis = std::find(dynamicAxes.begin(), dynamicAxes.end(), Axis::DefaultBatchAxis());
if (foundDefaultBatchAxis == dynamicAxes.end())
LogicError("Currently Input Variables are required to have the DefaultBatchAxis as one of their dynamic axes");
auto dynamicAxis = variable.DynamicAxes()[0];
if (dynamicAxis != Axis::DefaultDynamicAxis())
LogicError("Currently only Input variables with DefaultDynamicAxis are supported");
if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()));
if (dynamicAxes.back() != Axis::DefaultBatchAxis())
LogicError("Currently Input Variables are required to have the DefaultBatchAxis as their last dynamic axes");
// TODO: Support inputs with > 1 dynamic axes
if ((dynamicAxes.size() < 1) || (dynamicAxes.size() > 2))
LogicError("Currently only Input variables with 1 or 2 dynamic axis are supported");
std::wstring internalDynamicAxisName;
if (dynamicAxes.size() == 1)
internalDynamicAxisName = s_internalNoSequenceAxisName;
else if (dynamicAxes[0] == Axis::DefaultDynamicAxis())
internalDynamicAxisName = s_internalDefaultDynamicAxisName;
else
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()));
internalDynamicAxisName = dynamicAxes[0].Name();
if (!internalDynamicAxisName.empty())
network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {});
if (IsSparseInput(variable))
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
else
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
if (variable.NeedsGradient())
{
@ -219,11 +258,29 @@ namespace CNTK
computationNodePtr = builder.Reciprocal(input0Node, function->Name());
break;
case PrimitiveOpType::Softmax:
if (functionInputs[0].Shape().NumAxes() > 1)
InvalidArgument("Softmax operation can only be applied to a 1D input");
computationNodePtr = builder.Softmax(input0Node, function->Name());
break;
case PrimitiveOpType::Hardmax:
computationNodePtr = builder.Hardmax(input0Node, function->Name());
break;
case PrimitiveOpType::TransposeAxes:
{
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
// The axis ids passed to the internal CNTK TransposeDimensionsNode are 1 based instead of 0 based
computationNodePtr = New<TransposeDimensionsNode<ElementType>>(network->GetDeviceId(), function->Name(), (int)(axis1.StaticAxisIndex() + 1), (int)(axis2.StaticAxisIndex() + 1));
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
case PrimitiveOpType::Where:
{
auto dynamicAxes = variable.DynamicAxes();
auto internalCNTKWhereNodeDynamicAxisName = (dynamicAxes == std::vector<Axis>({ Axis::DefaultBatchAxis() })) ? CompositeFunction::s_internalNoSequenceAxisName : dynamicAxes[0].Name();
computationNodePtr = New<WhereNode<ElementType>>(network->GetDeviceId(), function->Name(), internalCNTKWhereNodeDynamicAxisName);
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
case PrimitiveOpType::Pooling:
{
PoolingType poolingType = (PoolingType)(functionConfig[L"poolingType"].GetValue<size_t>());
@ -235,6 +292,9 @@ namespace CNTK
computationNodePtr = builder.Pooling(input0Node, AsCNTKPoolKind(poolingType), AsTensorShape(poolingWindowsShape, true), AsTensorShape(strides, true), autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), ImageLayoutKind::CHW, function->Name());
break;
}
case PrimitiveOpType::SumAll:
computationNodePtr = builder.Sum(input0Node, function->Name());
break;
case PrimitiveOpType::Plus:
computationNodePtr = builder.Plus(input0Node, input1Node, function->Name());
break;
@ -268,6 +328,12 @@ namespace CNTK
computationNodePtr = builder.Times(input0Node, input1Node, numOutputAxes, function->Name());
break;
}
case PrimitiveOpType::TransposeTimes:
{
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
computationNodePtr = network->AddNodeToNetAndAttachInputs(New<TransposeTimesNode<ElementType>>(network->GetDeviceId(), function->Name(), numOutputAxes), { input0Node, input1Node });
break;
}
case PrimitiveOpType::Convolution:
{
NDShape outputMapCount, kernelShape;
@ -296,35 +362,25 @@ namespace CNTK
{
Variable initialStateVar = functionInputs[0];
Variable inputOperandVar = functionInputs[1];
// TODO: Current we only support a scalar initial state
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
if (inputOperandVar.Shape().NumAxes() != 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 static axis");
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
if (inputOperandVar.DynamicAxes().size() != 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 dynamic axis");
// Get the intial state of the PastValue/FutureValue operation
ElementType initStateValue;
NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice());
tempView.CopyFrom(*Constant(initialStateVar).Value());
size_t stepSize = primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>();
if (op == PrimitiveOpType::PastValue)
computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name());
computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
else
computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name());
computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
break;
}
case PrimitiveOpType::ReduceSum:
case PrimitiveOpType::ReduceElements:
{
// TODO: Use the new ReduceElements node instead of the legacy SumElements node for reduction. Currently ReduceElements has incorrect MBLayout inference.
//computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), L"Sum", 0), { input0Node });
computationNodePtr = builder.Sum(input0Node, function->Name());
auto CNTKInternalReductionAxisIndex = (int)functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
auto reductionOpName = functionConfig[L"ReductionOpName"].GetValue<std::wstring>();
computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), reductionOpName, CNTKInternalReductionAxisIndex), { input0Node });
break;
}
case PrimitiveOpType::BatchNormalization:
@ -353,6 +409,25 @@ namespace CNTK
computationNodePtr = variableToNodeMap[variable];
break;
case PrimitiveOpType::PackedIndex:
computationNodePtr = New<PackedIndexNode<ElementType>>(network->GetDeviceId(), function->Name());
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node, input1Node });
break;
case PrimitiveOpType::GatherPacked:
computationNodePtr = New<GatherPackedNode<ElementType>>(network->GetDeviceId(), function->Name());
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input1Node, input0Node });
break;
case PrimitiveOpType::Slice:
{
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
// Internal CNTK SliceNode takes 1 based axis indices instead of 0 based
computationNodePtr = New<SliceNode<ElementType>>(network->GetDeviceId(), function->Name(), beginIndex, endIndex, (int)(axis.StaticAxisIndex() + 1));
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
break;
}
default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
break;
@ -486,11 +561,11 @@ namespace CNTK
if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes())
return{ value->Data()->GetMatrix<ElementType>(), nullptr };
if (value->Data()->Shape().NumAxes() != (var.Shape().NumAxes() + var.DynamicAxes().size() + 1))
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by 1 + number of dynamic axes");
if (value->Data()->Shape().NumAxes() < (var.Shape().NumAxes() + var.DynamicAxes().size()))
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by number of dynamic axes");
if (var.DynamicAxes().size() > 1)
LogicError("More than one dynamic axis for a variable is currently unsupported");
if (var.DynamicAxes().size() > 2)
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().NumAxes()];
size_t numSequences = value->Data()->Shape()[var.Shape().NumAxes() + 1];
@ -618,9 +693,9 @@ namespace CNTK
sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps());
}
// Reshuffle to data to unpack and uninterleave the CNTK form data
// Now generate the gather indices
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId());
// Reshuffle to data to unpack and uninterleave the CNTK form packed data
// Now generate the scatter indices
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat());
std::vector<size_t> sequencesShorterThanLongestSequence;
for (size_t i = 0; i < numSequences; ++i)
@ -659,15 +734,15 @@ namespace CNTK
}
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape));
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, readOnly, tensorView);
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView);
return MakeSharedObject<Value>(data, mask);
}
template <typename ElementType>
/*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/)
{
if (var.DynamicAxes().size() > 1)
LogicError("More than one dynamic axis for a variable is currently unsupported");
if (var.DynamicAxes().size() > 2)
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
if (AsDataType<ElementType>() != var.GetDataType())
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(var.GetDataType()));
@ -732,7 +807,7 @@ namespace CNTK
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
auto nodeLayout = computationNode->GetMBLayout();
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
InvalidArgument("The layout of the specified gradient Value is incompatible with the layout of the corresponding Variable computed during Forward call");
computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
}
@ -814,12 +889,9 @@ namespace CNTK
}
if (varValue == nullptr)
{
auto data = MakeSharedObject<NDArrayView>(var.GetDataType(), valueShape, AsDeviceDescriptor(computationNode->ValuePtr()->GetDeviceId()));
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
varValue = MakeSharedObject<Value>(data, mask);
}
varValue->CopyFrom(*nodeValue);
varValue = nodeValue->DeepClone();
else
varValue->CopyFrom(*nodeValue);
}
void CompositeFunction::GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs)
@ -984,7 +1056,7 @@ namespace CNTK
FunctionPtr Round(const Variable& operand, const std::wstring& name/* = L""*/)
{
return Floor(Plus(operand, Constant(NDShape({}), 0.5f)), name);
return Floor(Plus(operand, ScalarConstant(operand.GetDataType(), 0.5f)), name);
}
FunctionPtr Floor(const Variable& operand, const std::wstring& name/* = L""*/)
@ -1012,6 +1084,71 @@ namespace CNTK
return UnaryOp(PrimitiveOpType::Softmax, operand, Dictionary(), name);
}
FunctionPtr Hardmax(const Variable& operand, const std::wstring& name/* = L""*/)
{
return UnaryOp(PrimitiveOpType::Hardmax, operand, Dictionary(), name);
}
FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name /*= L""*/)
{
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
LogicError("TransposeAxes currently does not support transposing dynamic axes");
auto additionalProperties = Dictionary();
additionalProperties[L"axis1"] = axis1.Name();
additionalProperties[L"axis2"] = axis2.Name();
return UnaryOp(PrimitiveOpType::TransposeAxes, operand, std::move(additionalProperties), name);
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
{
if ((endIndex - beginIndex) <= 0)
InvalidArgument("CNTK::Slice: endIndex (%d) - beginIndex (%d) must be a positive number", endIndex, beginIndex);
if (axis == Axis::DefaultBatchAxis())
LogicError("Slice is currently unsupported along the batch axis");
if (axis.IsStaticAxis())
return Internal::Slice(operand, axis, beginIndex, endIndex, name);
auto operandAxes = operand.DynamicAxes();
auto findAxis = std::find(operandAxes.begin(), operandAxes.end(), axis);
if (findAxis == operandAxes.end())
InvalidArgument("The specified dynamic axis named %S does not match any of the dynamic axes of the operand", axis.Name().c_str());
auto beginFlagsLambda = [beginIndex, operand]() {
return (beginIndex > 0) ? Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, beginIndex)) : Internal::IsWithin(operand, beginIndex);
};
auto endFlagsLambda = [endIndex, operand]() {
return (endIndex > 0) ? Internal::IsWithin(operand, endIndex) : Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, endIndex));
};
FunctionPtr flags;
if (beginIndex == 0)
flags = endFlagsLambda();
else if (endIndex == 0)
flags = beginFlagsLambda();
else
flags = ElementTimes(beginFlagsLambda(), endFlagsLambda());
// Since we are slicing along a dynamic axis, the output variable's dynamic axes will be different than the operand
std::vector<Axis> newDynamicAxes;
for (auto operandAxis : operandAxes)
{
if (operandAxis == axis)
{
// If we are selecting just one frame from the dynamic axis, we can remove that axis
if ((endIndex - beginIndex) > 1)
newDynamicAxes.push_back(CompositeFunction::NextAutoGeneratedDynamicAxis());
}
else
newDynamicAxes.push_back(operandAxis);
}
return Internal::Gather(operand, flags, newDynamicAxes);
}
FunctionPtr BinaryOp(PrimitiveOpType op, const Variable& leftOperand, const Variable& rightOperand, Dictionary&& opConfig, const std::wstring& name)
{
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(op, std::vector<Variable>({ leftOperand, rightOperand }), std::move(opConfig), name), name);
@ -1074,6 +1211,13 @@ namespace CNTK
return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, std::move(additionalProperties), name);
}
FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes /*= 1*/, const std::wstring& name/* = L""*/)
{
auto additionalProperties = Dictionary();
additionalProperties[L"numOutputAxes"] = numOutputAxes;
return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
}
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/)
{
return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name);
@ -1081,18 +1225,20 @@ namespace CNTK
FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{
return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name);
return ReduceSum(Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction)), name);
//return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name);
}
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
{
return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name);
return ReduceSum(Minus(ScalarConstant(prediction.GetDataType(), 1.0f), TransposeTimes(labels, Hardmax(prediction))), name);
//return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name);
}
FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
{
if (operand.DynamicAxes().size() != 1)
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis");
if (operand.DynamicAxes().size() != 2)
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
@ -1101,8 +1247,8 @@ namespace CNTK
FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
{
if (operand.DynamicAxes().size() != 1)
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis");
if (operand.DynamicAxes().size() != 2)
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
auto additionalProperties = Dictionary();
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
@ -1111,7 +1257,17 @@ namespace CNTK
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/)
{
return UnaryOp(PrimitiveOpType::ReduceSum, operand, Dictionary(), name);
return UnaryOp(PrimitiveOpType::SumAll, operand, Dictionary(), name);
}
FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
{
return Internal::ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, axis, name);
}
FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
{
return Internal::ReduceElements(operand, PrimitiveFunction::InternalLogSumReductionOpName, axis, name);
}
FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name /*= L""*/)
@ -1207,4 +1363,94 @@ namespace CNTK
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
}
namespace Internal
{
FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name /*= L""*/)
{
return BinaryOp(PrimitiveOpType::PackedIndex, operand, index, Dictionary(), name);
}
FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name /*= L""*/)
{
return BinaryOp(PrimitiveOpType::GatherPacked, operand, packedIndex, Dictionary(), name);
}
FunctionPtr ZeroesLike(const Variable& operand)
{
if (operand.Shape().NumAxes() > 1)
LogicError("ZerosLike currently does not support operands with more than 1 static axes");
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
return Minus(rowSliceFunc, rowSliceFunc);
}
FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/)
{
if (offset == 0)
InvalidArgument("Internal::CNTK::IsWithin: The offset must be positive");
if (offset > 0)
return PastValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), offset, name);
else
return FutureValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), -offset, name);
}
FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
auto additionalProperties = Dictionary();
std::vector<std::wstring> newDynamicAxesNames;
for (auto axis : newDynamicAxes)
newDynamicAxesNames.push_back(axis.Name());
additionalProperties[L"newDynamicAxes"] = AsDictionaryValueVector(newDynamicAxesNames);
return UnaryOp(PrimitiveOpType::Where, condition, std::move(additionalProperties), name);
}
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
{
return Internal::GatherPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)));
}
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
{
auto additionalProperties = Dictionary();
additionalProperties[L"axis"] = axis.Name();
additionalProperties[L"beginIndex"] = (size_t)beginIndex;
additionalProperties[L"endIndex"] = (size_t)endIndex;
return UnaryOp(PrimitiveOpType::Slice, operand, std::move(additionalProperties), name);
}
FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name /*= L""*/)
{
using namespace std::placeholders;
if (axis.IsStaticAxis())
{
auto additionalProperties = Dictionary();
additionalProperties[L"CNTKInternalReductionAxisIndex"] = (size_t)(axis.StaticAxisIndex() + 1);
additionalProperties[L"ReductionOpName"] = reductionOpName;
return UnaryOp(PrimitiveOpType::ReduceElements, operand, std::move(additionalProperties), name);
}
if (axis == Axis::DefaultBatchAxis())
LogicError("Reduction is currently unsupported along the batch axis");
if (reductionOpName != PrimitiveFunction::InternalSumReductionOpName)
LogicError("%S reduction along dynamic axis is currently unsupported", reductionOpName.c_str());
std::function<FunctionPtr(const Variable& leftOperand, const Variable& rightOperand)> reductionFunctor;
if (reductionOpName == PrimitiveFunction::InternalSumReductionOpName)
reductionFunctor = std::bind(Plus, _1, _2, L"");
// We are reducing over a dynamic axis which is currently implemented using recurrence
auto cumulativeSumFunctionPlaceholder = Placeholder(operand.Shape());
auto prevAccumulatedValuesFunction = PastValue(ScalarConstant(operand.GetDataType(), 0.0f), cumulativeSumFunctionPlaceholder, 1);
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0);
}
}
}

Просмотреть файл

@ -27,7 +27,12 @@ namespace CNTK
Abs,
Reciprocal,
Softmax,
Hardmax,
TransposeAxes,
Where,
Slice,
Pooling,
SumAll,
Plus,
Minus,
ElementTimes,
@ -37,14 +42,17 @@ namespace CNTK
LessEqual,
Greater,
GreaterEqual,
PackedIndex,
GatherPacked,
Times,
TransposeTimes,
Convolution,
SquaredError,
CrossEntropyWithSoftmax,
ClassificationError,
PastValue,
FutureValue,
ReduceSum,
ReduceElements,
BatchNormalization,
Combine,
};
@ -77,7 +85,12 @@ namespace CNTK
{ PrimitiveOpType::Abs, "Abs" },
{ PrimitiveOpType::Reciprocal, "Reciprocal" },
{ PrimitiveOpType::Softmax, "Softmax" },
{ PrimitiveOpType::Hardmax, "Hardmax" },
{ PrimitiveOpType::TransposeAxes, "TransposeAxes" },
{ PrimitiveOpType::Where, "Where" },
{ PrimitiveOpType::Slice, "Slice" },
{ PrimitiveOpType::Pooling, "Pooling" },
{ PrimitiveOpType::SumAll, "SumAll" },
{ PrimitiveOpType::Plus, "Plus" },
{ PrimitiveOpType::Minus, "Minus" },
{ PrimitiveOpType::ElementTimes, "ElementTimes" },
@ -87,14 +100,17 @@ namespace CNTK
{ PrimitiveOpType::LessEqual, "LessEqual" },
{ PrimitiveOpType::Greater, "Greater" },
{ PrimitiveOpType::GreaterEqual, "GreaterEqual" },
{ PrimitiveOpType::PackedIndex, "PackedIndex" },
{ PrimitiveOpType::GatherPacked, "GatherPacked" },
{ PrimitiveOpType::Times, "Times" },
{ PrimitiveOpType::TransposeTimes, "TransposeTimes" },
{ PrimitiveOpType::Convolution, "Convolution" },
{ PrimitiveOpType::SquaredError, "SquaredError" },
{ PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" },
{ PrimitiveOpType::ClassificationError, "ClassificationError" },
{ PrimitiveOpType::PastValue, "PastValue" },
{ PrimitiveOpType::FutureValue, "FutureValue" },
{ PrimitiveOpType::ReduceSum, "ReduceSum" },
{ PrimitiveOpType::ReduceElements, "ReduceElements" },
{ PrimitiveOpType::BatchNormalization, "BatchNormalization" },
{ PrimitiveOpType::Combine, "Combine" }
};
@ -107,6 +123,15 @@ namespace CNTK
class PrimitiveFunction final : public Function
{
public:
static const std::wstring InternalSumReductionOpName;
static const std::wstring InternalLogSumReductionOpName;
static const std::wstring InternalMeanReductionOpName;
static const std::wstring InternalMaxReductionOpName;
static const std::wstring InternalMinReductionOpName;
static const std::wstring InternalAllReductionOpName;
static const std::wstring InternalAnyReductionOpName;
public:
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"")
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig))
@ -242,16 +267,26 @@ namespace CNTK
DataType outputDataType = inputs[0].GetDataType();
// We currently require that the inputs' dynamic axes if any match
std::vector<Axis> outputDynamicAxes = inputs[0].DynamicAxes();
for (auto inputVar : inputs)
std::vector<Axis> outputDynamicAxes;
if (op == PrimitiveOpType::Where)
;
else if ((op == PrimitiveOpType::PackedIndex) || (op == PrimitiveOpType::GatherPacked))
{
auto currentInputDynamicAxes = inputVar.DynamicAxes();
if (outputDynamicAxes.empty())
outputDynamicAxes = currentInputDynamicAxes;
else
outputDynamicAxes = inputs[1].DynamicAxes();
}
else
{
outputDynamicAxes = inputs[0].DynamicAxes();
for (auto inputVar : inputs)
{
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes))
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand");
auto currentInputDynamicAxes = inputVar.DynamicAxes();
if (outputDynamicAxes.empty())
outputDynamicAxes = currentInputDynamicAxes;
else
{
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes))
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand");
}
}
}
@ -268,9 +303,38 @@ namespace CNTK
case PrimitiveOpType::Abs:
case PrimitiveOpType::Reciprocal:
case PrimitiveOpType::Softmax:
case PrimitiveOpType::Hardmax:
assert(inputs.size() == 1);
if (((op == PrimitiveOpType::Softmax) || (op == PrimitiveOpType::Hardmax)) && (inputs[0].Shape().NumAxes() > 1))
InvalidArgument("Softmax/Hardmax operation can only be applied to a 1D input");
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes));
break;
case PrimitiveOpType::TransposeAxes:
{
assert(inputs.size() == 1);
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
LogicError("TransposeAxes operation currently does not support transposing dynamic axes");
auto transposedTensorShape = AsTensorShape(inputs[0].Shape(), true);
transposedTensorShape.SwapDimsInPlace(axis1.StaticAxisIndex(), axis2.StaticAxisIndex());
outputs.push_back(Variable(AsNDShape(transposedTensorShape), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Where:
{
assert(inputs.size() == 1);
std::vector<Axis> newDynamicAxes;
auto newDynamicAxesNames = AsBasicElementTypeVector<std::wstring>(functionConfig[L"newDynamicAxes"].GetValue<std::vector<DictionaryValue>>());
for (auto axisName : newDynamicAxesNames)
newDynamicAxes.push_back(Axis(axisName));
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, newDynamicAxes));
break;
}
case PrimitiveOpType::Pooling:
{
assert(inputs.size() == 1);
@ -282,6 +346,10 @@ namespace CNTK
outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[0].Shape(), poolingWindowsShape, { 1 }, strides, { true }, autoPadding, lowerPad, upperPad, false), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::SumAll:
assert(inputs.size() == 1);
outputs.push_back(Variable({}, outputDataType, owner, std::vector<Axis>({})));
break;
case PrimitiveOpType::Plus:
case PrimitiveOpType::Minus:
case PrimitiveOpType::ElementTimes:
@ -297,15 +365,26 @@ namespace CNTK
case PrimitiveOpType::Times:
{
assert(inputs.size() == 2);
// TODO: Support dynamic axes on the left operand
if (!inputs[0].DynamicAxes().empty())
LogicError("Dynamic axes are currently unsupported for left operand of a Times operation");
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::TransposeTimes:
{
assert(inputs.size() == 2);
auto numLeftOperandAxes = inputs[0].Shape().NumAxes();
if (numLeftOperandAxes > 2)
InvalidArgument("TransposeTimes operation only supports left operands of rank 1 or 2");
NDShape transposedLeftOperandShape(2, 1);
for (size_t i = 0; i < numLeftOperandAxes; ++i)
transposedLeftOperandShape[transposedLeftOperandShape.NumAxes() - i - 1] = inputs[0].Shape()[i];
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
outputs.push_back(Variable(TimesOpOutputShape(transposedLeftOperandShape, inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Convolution:
{
assert(inputs.size() == 2);
@ -341,26 +420,45 @@ namespace CNTK
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
reductionAxes.push_back(i);
outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, {}));
outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, std::vector<Axis>({})));
break;
}
case PrimitiveOpType::PastValue:
case PrimitiveOpType::FutureValue:
{
assert(inputs.size() == 2);
Variable initialStateVar = inputs[0];
Variable inputOperandVar = inputs[1];
// TODO: Current we only support a scalar initial state
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
if (inputOperandVar.Shape().NumAxes() > 1)
LogicError("Currently PastValue/FutureValue Function only supports input operand with <= 1 static axis");
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
if (inputOperandVar.DynamicAxes().size() != 2)
LogicError("Currently PastValue/FutureValue Function only supports input operand with with 2 dynamic axis (1 sequence-axis and 1 batch-axis)");
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
break;
case PrimitiveOpType::ReduceSum:
}
case PrimitiveOpType::ReduceElements:
{
assert(inputs.size() == 1);
// TODO: For reductions, we should remove any of the dynamic axes from 'outputDynamicAxes' that are being reduced over.
// Currently we only support reductions that reduce over all axes
std::vector<Axis> reductionOutputDynamicAxes = {};
auto CNTKInternalReductionAxisIndex = functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
std::vector<size_t> reductionAxes;
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
reductionAxes.push_back(i);
// TODO: Do not use a integer literal for the special value of axis id that indicates all static axes
if (CNTKInternalReductionAxisIndex == 0)
{
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
reductionAxes.push_back(i);
}
else
reductionAxes.push_back(CNTKInternalReductionAxisIndex - 1);
outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, reductionOutputDynamicAxes));
outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, inputs[0].DynamicAxes()));
break;
}
case PrimitiveOpType::BatchNormalization:
@ -369,6 +467,60 @@ namespace CNTK
case PrimitiveOpType::Combine:
outputs = inputs;
break;
case PrimitiveOpType::PackedIndex:
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
break;
case PrimitiveOpType::GatherPacked:
{
bool sourceHasDynamicAxis = !inputs[0].DynamicAxes().empty();
NDShape outputShape;
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...
if (sourceHasDynamicAxis)
outputShape = inputs[0].Shape();
else
{
if (inputs[0].Shape().NumAxes() > 1)
outputShape = outputShape.SubShape(0, outputShape.NumAxes() - 1);
else
outputShape = {};
}
outputs.push_back(Variable(outputShape, outputDataType, owner, outputDynamicAxes));
break;
}
case PrimitiveOpType::Slice:
{
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
if (!axis.IsStaticAxis())
LogicError("Built-in Slice operation currently does not support slicing along dynamic axis");
if (axis.StaticAxisIndex() >= inputs[0].Shape().NumAxes())
InvalidArgument("The specified axis index (%d) for the Slice operation is outside the bounds of the available axes of the input", (int)axis.StaticAxisIndex());
size_t sliceAxisDim = inputs[0].Shape()[axis.StaticAxisIndex()];
int realBeginIndex = (beginIndex >= 0) ? beginIndex : beginIndex + sliceAxisDim;
int realEndIndex = (endIndex > 0) ? endIndex : endIndex + sliceAxisDim;
if ((sliceAxisDim < realEndIndex) || (realEndIndex < realBeginIndex) || (realBeginIndex < 0))
RuntimeError("Slice operation: Index range [%d,%d), interpreted as [%d,%d), is invalid for input ([%S]).",
beginIndex,
endIndex,
realBeginIndex,
realEndIndex,
inputs[0].Shape().AsString().c_str());
auto outputTensorShape = AsTensorShape(inputs[0].Shape(), true);
// propagate as much as we can
if ((axis.StaticAxisIndex() < outputTensorShape.GetRank()) && (0 <= realBeginIndex) && (realBeginIndex <= realEndIndex) && (realEndIndex <= sliceAxisDim))
outputTensorShape.NarrowTo(axis.StaticAxisIndex(), realBeginIndex, realEndIndex);
outputs.push_back(Variable(AsNDShape(outputTensorShape), outputDataType, owner, outputDynamicAxes));
break;
}
default:
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
break;
@ -417,6 +569,17 @@ namespace CNTK
std::unordered_map<StreamInfo, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndInvStdDevs,
const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/);
public:
static std::wstring s_internalDefaultDynamicAxisName;
static std::wstring s_internalNoSequenceAxisName;
static Axis NextAutoGeneratedDynamicAxis()
{
static std::atomic<unsigned int> nextAutoGeneratedDynamicAxis(0);
static const std::wstring autoGeneratedDynamicAxisNamePrefix = L"autoGeneratedDynamicAxis_";
return Axis(autoGeneratedDynamicAxisNamePrefix + std::to_wstring(nextAutoGeneratedDynamicAxis++));
}
public:
static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
{
@ -524,4 +687,17 @@ namespace CNTK
// the next 'Backward' call.
std::unordered_set<Variable> m_currentBackpropRoots;
};
inline std::vector<CNTK::Axis> DynamicAxesFromInternalDynamicAxisName(const std::wstring& internalDynamicAxisName)
{
std::vector<CNTK::Axis> inputVarDynamicAxes;
if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalDefaultDynamicAxisName)
inputVarDynamicAxes = { CNTK::Axis::DefaultDynamicAxis(), CNTK::Axis::DefaultBatchAxis() };
else if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalNoSequenceAxisName)
inputVarDynamicAxes = { CNTK::Axis::DefaultBatchAxis() };
else
inputVarDynamicAxes = { CNTK::Axis(internalDynamicAxisName), CNTK::Axis::DefaultBatchAxis() };
return inputVarDynamicAxes;
}
}

Просмотреть файл

@ -8,19 +8,18 @@
#include "Utils.h"
#define UPDATE_FUNCTION \
switch (smoothedGradientValue->GetDataType()) \
switch (smoothedGradientValue->GetDataType()) \
{ \
case DataType::Float: \
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \
case DataType::Double: \
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
break; \
default: \
NOT_IMPLEMENTED; \
}
using namespace Microsoft::MSR::CNTK;
using namespace std;
@ -141,7 +140,7 @@ namespace CNTK
// L1 regularizer with proximal gradient descent method
if (m_additionalOptions.l1RegularizationWeight > 0)
{
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
@ -154,48 +153,49 @@ namespace CNTK
return arrayView->GetWritableTensorView<ElementType>();
}
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters)
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients /* = true */)
: Learner(parameters),
m_learningRatePerSample(0.0),
m_sampleCount(0)
m_learningRates(learningRates),
m_sampleCount(0),
m_minibatchCount(0)
{
const unordered_set<Parameter>& parameterSet = parameters;
for (const auto& parameter : parameterSet)
for (const auto& parameter : parameters)
{
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
// Should the device be specified on the per-parameter basis?
NDArrayViewPtr view;
if (parameter.GetDataType() == DataType::Float)
if (!allocateSmoothGradients)
{
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), parameter.Value()->Device());
continue;
}
else
{
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), parameter.Value()->Device());
}
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
m_smoothedGradientValues.insert(make_pair(parameter, view));
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
}
}
void LearnerBase::ResetSmoothedGradients()
/*static*/ NDArrayViewPtr LearnerBase::AllocateNDArrayView(const Parameter& parameter, const NDShape& shape)
{
for (const auto& parameter : Parameters())
if (parameter.GetDataType() == DataType::Float)
{
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const auto& data = smoothedGradientValue;
switch (data->GetDataType())
{
case DataType::Float:
data->SetValue(0.0f);
break;
case DataType::Double:
data->SetValue(0.0);
break;
default:
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
}
return MakeSharedObject<NDArrayView>(float(0.0), shape, parameter.Value()->Device());
}
else
{
return MakeSharedObject<NDArrayView>(0.0, shape, parameter.Value()->Device());
}
}
/*static*/ NDShape LearnerBase::GetMatrixShape(const Parameter& parameter)
{
if (parameter.GetDataType() == DataType::Float)
{
auto matrix = GetMatrix<float>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
}
else
{
auto matrix = GetMatrix<double>(parameter.Value());
return { matrix->GetNumRows(), matrix->GetNumCols() };
}
}
@ -219,17 +219,19 @@ namespace CNTK
#endif
#if DUMPOUTPUT
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
learningRate, momentum, trainingSampleCount);
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
LearnerType().c_str(), m_GaussianNoiseInjectStd);
LearnerType().c_str(), m_additionalOptions.gaussianNoiseInjectionStdDev);
Print(gradientValue, "Gradient Update");
Print(smoothedGradientValue, "Smoothed Gradient Input");
#endif
UPDATE_FUNCTION;
#if DUMPOUTPUT
Print(parameterValue, "Parameter Update");
Print(parameter.Value(), "Parameter Update");
#endif
#ifdef _DEBUG
@ -239,6 +241,7 @@ namespace CNTK
#endif
}
m_sampleCount += trainingSampleCount;
m_minibatchCount++;
return false;
}
@ -265,9 +268,16 @@ namespace CNTK
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
Dictionary checkpoint;
checkpoint[L"checkpointVersion"] = checkpointVersion;
checkpoint[L"sampleCount"] = m_sampleCount;
checkpoint[L"minibatchCount"] = m_minibatchCount;
// TODO: should we also save learning rate schedule into the checkpoint?
// If that is the case, need to be able to override this method in subclasses
// and save momentum schedule as well.
for (const auto& parameter : Parameters())
{
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
@ -277,31 +287,48 @@ namespace CNTK
{
LogicError("Parameter names must be unique");
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
// Potentially, could store things like dimensions, element size, format, etc., but
// that seems to be redundant, since all of that is passed in the constructor.
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue);
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
checkpoint[parameter.Name()] = *smoothedGradientValue;
}
return checkpoint;
}
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
{
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
m_sampleCount = checkpoint[L"sampleCount"].GetValue<size_t>();
m_minibatchCount = checkpoint[L"minibatchCount"].GetValue<size_t>();
size_t version = checkpoint[L"minibatchCount"].GetValue<size_t>();
if (checkpointVersion != version)
{
// At the moment, we only support one version, so this should never happen.
LogicError("Unsupported checkpoint version.");
}
for (const auto& parameter : Parameters())
{
if (!checkpoint.Contains(parameter.Name()))
{
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
}
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
const NDArrayView& checkpointedValue = checkpoint[parameter.Name()].GetValue<NDArrayView>();
if (smoothedGradientValue->GetDataType() != checkpointedValue.GetDataType())
{
LogicError("A value restored from a checkpoint for the smoothed gradient data type for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const DictionaryValue& state = checkpoint[parameter.Name()];
if (smoothedGradientValue->Shape() != checkpointedValue.Shape())
{
LogicError("A value restored from a checkpoint for the smoothed gradient shape for parameter %ls does not match the expected value",
parameter.Name().c_str());
}
const auto& data = smoothedGradientValue;
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
smoothedGradientValue->CopyFrom(checkpointedValue);
}
}
@ -313,23 +340,25 @@ namespace CNTK
template <typename ElementType>
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
{
UNUSED(trainingSampleCount);
const auto& parameterValue = parameter.Value();
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
learningRate, momentum, m_useNesterovAcceleration);
}
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
: LearnerBase(parameters), m_needAveMultiplier(needAveMultiplier)
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier)
: LearnerBase(parameters, learningRates),
m_needAveMultiplier(needAveMultiplier)
{
}
@ -348,15 +377,23 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters)
: LearnerMomentumSGD(parameters)
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false)
{
for (const auto& parameter : parameters)
{
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
}
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -373,21 +410,33 @@ namespace CNTK
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
learningRate, ElementType(m_momentumPerSample));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix, learningRate, momentum);
}
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
: LearnerBase(parameters),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier)
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false),
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
m_needAveMultiplier(needAveMultiplier)
{
for (const auto& parameter : parameters)
{
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
// TODO: verify that this is correct.
size_t factor = 3;
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
{
factor = 4;
}
auto shape = GetMatrixShape(parameter);
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], factor * shape[1]});
m_smoothedGradientValues.insert(make_pair(parameter, view));
}
}
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
@ -405,12 +454,12 @@ namespace CNTK
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
ElementType(m_gamma), ElementType(m_inc),
ElementType(m_max), ElementType(m_dec),
ElementType(m_min), m_needAveMultiplier);
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
}
@ -418,34 +467,35 @@ namespace CNTK
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView);
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView);
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, double learningRatePerSample)
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates)
{
return MakeSharedObject<LearnerSGD>(parameters, learningRatePerSample);
return MakeSharedObject<LearnerSGD>(parameters, learningRates);
}
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters)
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerMomentumSGD>(parameters);
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums);
}
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters)
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerNesterov>(parameters);
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums);
}
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, bool needAveMultiplier)
{
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier);
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier);
}
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters)
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
{
return MakeSharedObject<LearnerFSAdaGrad>(parameters);
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums);
}
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier)
{
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier);
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier);
}
}

Просмотреть файл

@ -9,6 +9,7 @@
namespace CNTK
{
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
// A collection of additional options that are applicable for all standard learners
// (after these options are set, they retain their value for the entire lifespan of a learner).
struct AdditionalLearningOptions
@ -18,7 +19,6 @@ namespace CNTK
double gaussianNoiseInjectionStdDev = 0.0;
bool gradientClippingWithTruncation = true;
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
std::unordered_map<Parameter, double> learningRateMultipliers;
};
// An abstract base class at the root of the standard learners hierarchy
@ -33,32 +33,16 @@ namespace CNTK
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
{
m_additionalOptions = additionalOptions;
}
// TODO: should this be called ResetMomentum?
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
void ResetSmoothedGradients();
// TODO: move learning rate and momentum scheduling and adjustment functionality
// inside the learner and drop these setters.
void SetLearningRate(double value) { m_learningRatePerSample = value; }
protected:
LearnerBase(const std::unordered_set<Parameter>& parameters);
LearnerBase(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true);
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
double ParameterDependentLearningRate(const Parameter& parameter) const
{
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
}
std::string LearnerType() const;
double m_learningRatePerSample;
LearningRatesPerSample m_learningRates;
AdditionalLearningOptions m_additionalOptions;
@ -91,6 +75,16 @@ namespace CNTK
template <typename ElementType>
void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const;
// Returns an NDArrayView with the required shape, with the same data type as parameter value
// and allocated on the same device.
static NDArrayViewPtr AllocateNDArrayView(const Parameter& parameter, const NDShape& shape);
// Retrieves the shape of the matrix corresponding to the parameter value.
static NDShape GetMatrixShape(const Parameter& parameter);
size_t m_sampleCount;
size_t m_minibatchCount;
private:
// Templatized update function, it invokes preprocess and postprocess using the provided
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
@ -101,18 +95,20 @@ namespace CNTK
static bool HasNan(const NDArrayViewPtr& value, const char* name);
static void Print(const NDArrayViewPtr& value, const char* msg);
size_t m_sampleCount;
static const size_t checkpointVersion = 1;
};
// Vanilla gradient descent optimization algorithm.
class LearnerSGD : public LearnerBase
{
public:
LearnerSGD(const std::unordered_set<Parameter>& parameters, double learningRatePerSample = 0)
: LearnerBase(parameters), m_momentumPerSample(0.0), m_useNesterovAcceleration(false)
{
SetLearningRate(learningRatePerSample);
}
LearnerSGD(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool allocateSmoothGradients = true)
: LearnerBase(parameters, learningRates, allocateSmoothGradients),
m_momentums(0.0),
m_useNesterovAcceleration(false)
{ }
protected:
@ -121,7 +117,8 @@ namespace CNTK
template <typename ElementType>
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
double m_momentumPerSample;
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
MomentumsPerSample m_momentums;
bool m_useNesterovAcceleration;
};
@ -129,20 +126,25 @@ namespace CNTK
class LearnerMomentumSGD : public LearnerSGD
{
public:
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters)
: LearnerSGD(parameters)
{}
void SetMomentum(double value) { m_momentumPerSample = value; }
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums,
bool allocateSmoothGradients = true)
: LearnerSGD(parameters, learningRates, allocateSmoothGradients)
{
m_momentums = momentums;
}
};
// Nesterov's accelerated SGDLearnerBase descent.
class LearnerNesterov : public LearnerSGD
class LearnerNesterov : public LearnerMomentumSGD
{
public:
LearnerNesterov(const std::unordered_set<Parameter>& parameters)
: LearnerSGD(parameters)
LearnerNesterov(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums)
: LearnerMomentumSGD(parameters, learningRates, momentums)
{
m_useNesterovAcceleration = true;
}
@ -152,7 +154,9 @@ namespace CNTK
{
public:
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier);
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
bool needAveMultiplier);
protected:
bool m_needAveMultiplier;
@ -167,7 +171,9 @@ namespace CNTK
{
public:
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters);
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters,
const LearningRatesPerSample& learningRates,
const MomentumsPerSample& momentums);
protected:
@ -182,7 +188,9 @@ namespace CNTK
public:
LearnerRMSProp(const std::unordered_set<Parameter>& parameters,
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier);
const LearningRatesPerSample& learningRates,
double gamma, double inc, double dec, double max, double min,
bool needAveMultiplier);
protected:

Просмотреть файл

@ -49,10 +49,12 @@ namespace CNTK
m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) });
}
/*virtual*/ std::unordered_map<StreamInfo, MinibatchData> CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/
/*virtual*/ const std::unordered_map<StreamInfo, MinibatchData>&
CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/
{
std::unordered_map<StreamInfo, MinibatchData> minibatchData;
m_minibatchData.clear();
if (!m_epochEndReached)
{
// TODO: Support different minibatch sizes for different streams
@ -117,7 +119,9 @@ namespace CNTK
auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i];
if (currentStreamDesc->m_elementType == ElementType::tfloat)
{
auto dataMatrix = std::make_shared<Matrix<float>>(CPUDEVICE);
auto CNTKMatrixType = (currentStreamDesc->m_storageType == StorageType::dense) ? DENSE : SPARSE;
auto CNTKMatrixFormat = (currentStreamDesc->m_storageType == StorageType::dense) ? matrixFormatDense : matrixFormatSparseCSC;
auto dataMatrix = std::make_shared<Matrix<float>>(0, 0, CPUDEVICE, CNTKMatrixType, CNTKMatrixFormat);
size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements();
// TODO: Eliminate the unnecessary CPU to CPU copy
@ -127,14 +131,14 @@ namespace CNTK
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();
minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
m_minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
}
else
LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!");
}
}
return minibatchData;
return m_minibatchData;
}
void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource,

Просмотреть файл

@ -19,8 +19,8 @@ namespace CNTK
virtual const std::unordered_set<StreamInfo>& StreamInfos() override { return m_streamInfos; }
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override;
virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override;
private:
std::unordered_set<StreamInfo> m_streamInfos;
@ -28,5 +28,6 @@ namespace CNTK
bool m_epochEndReached;
size_t m_prevMinibatchSize;
size_t m_epochSize;
std::unordered_map<StreamInfo, MinibatchData> m_minibatchData;
};
}

Просмотреть файл

@ -61,11 +61,12 @@ namespace CNTK
LogicError("The gradient value for a Parameter cannot have an associated mask!");
}
auto trainingLossArguments = m_trainingLossVar.Owner()->Arguments();
auto labelsVar = *(std::find_if(trainingLossArguments.begin(), trainingLossArguments.end(), [](const Variable& var) {
return var.IsInput();
}));
auto argumentValue = arguments.at(labelsVar);
auto trainingLossArgument = *(m_trainingLossVar.Owner()->Arguments().begin());
// Find the argument whose dynamic axes match the criterion operation's dynamic axes (i.e. label dynamic axes)
auto argumentValue = std::find_if(arguments.begin(), arguments.end(), [trainingLossArgument](const std::pair<Variable, ValuePtr>& currentPair) {
return (currentPair.first.DynamicAxes() == trainingLossArgument.DynamicAxes());
})->second;
auto argumentData = argumentValue->Data();
auto argumentDataShape = argumentData->Shape();
auto mask = argumentValue->Mask();

Просмотреть файл

@ -6,20 +6,100 @@
#include "stdafx.h"
#include "CNTKLibrary.h"
#include "Utils.h"
#include "File.h"
#include <istream>
#include <ostream>
using namespace std;
namespace CNTK
{
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
struct BinaryOStreamWrapper
{
BinaryOStreamWrapper(ostream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryOStreamWrapper&>::type
operator<<(const T& value)
{
m_stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
return *this ;
}
BinaryOStreamWrapper& operator<<(const wstring& str)
{
*this << str.length();
m_stream.write(reinterpret_cast<const char*>(str.c_str()), str.length() * sizeof(wchar_t));
return *this;
}
operator ostream& () { return m_stream; }
ostream& m_stream;
BinaryOStreamWrapper(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper(BinaryOStreamWrapper&&) = delete; BinaryOStreamWrapper& operator=(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper& operator=(BinaryOStreamWrapper&&) = delete;
};
// This wrapper redefines operator>> in terms of unformatted (binary) read operation.
struct BinaryIStreamWrapper
{
BinaryIStreamWrapper(istream& s) : m_stream(s) {}
template<typename T>
typename std::enable_if<std::is_pod<T>::value, BinaryIStreamWrapper&>::type
operator>>(T& value)
{
static_assert(sizeof(T) <= sizeof(size_t), "size_t is the largest supported type.");
m_stream.read(buf, sizeof(T));
value = *(reinterpret_cast<T*>(buf));
return *this ;
}
BinaryIStreamWrapper& operator>>(wstring& str)
{
size_t length;
*this >> length;
str.resize(length);
for (size_t i = 0; i < length; ++i)
{
m_stream.read(buf, sizeof(wchar_t));
str[i] = *(reinterpret_cast<wchar_t*>(buf));
}
return *this;
}
operator istream& () const { return m_stream ;}
istream& m_stream;
char buf[sizeof(size_t)];
BinaryIStreamWrapper(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper(BinaryIStreamWrapper&&) = delete; BinaryIStreamWrapper& operator=(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper& operator=(BinaryIStreamWrapper&&) = delete;
};
template <typename T>
T* CreateDataPtr(const T& value)
{
return new T(value);
}
template <>
NDArrayView* CreateDataPtr<NDArrayView>(const NDArrayView& value)
{
// TODO: replace this copy with an alias to value.
NDArrayView* viewPtr = new NDArrayView(value.GetDataType(), value.Shape(), DeviceDescriptor::CPUDevice());
viewPtr->CopyFrom(value);
return viewPtr;
}
template <typename T>
void DictionaryValue::AllocateDataPtr(const T& value)
{
static_assert(is_same<T, NDShape>::value ||
is_same<T, wstring>::value ||
is_same<T, vector<DictionaryValue>>::value ||
is_same<T, Dictionary>::value, "AllocateDataPtr called with invalid type");
m_data.m_ptr = new T(value);
is_same<T, Dictionary>::value ||
is_same<T, NDArrayView>::value,
"AllocateDataPtr called with invalid type");
m_data.m_ptr = CreateDataPtr<T>(value);
}
template <typename T>
@ -31,12 +111,163 @@ namespace CNTK
m_data.m_ptr = nullptr;
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
template <typename ElementType>
bool AreEqual(NDArrayView& view1, NDArrayView& view2)
{
if (view1.GetDataType() != view2.GetDataType() ||
view1.Shape() != view2.Shape())
{
return false;
}
ElementType* data1 = nullptr;
ElementType* data2 = nullptr;
if (view1.Device().Type() == DeviceKind::CPU)
{
data1 = view1.WritableDataBuffer<ElementType>();
data2 = view2.WritableDataBuffer<ElementType>();
}
else
{
NDArrayViewPtr temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view1.Shape(), DeviceDescriptor::CPUDevice());
temp1CpuDataView->CopyFrom(view1);
data1 = temp1CpuDataView->WritableDataBuffer<ElementType>();
NDArrayViewPtr temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view2.Shape(), DeviceDescriptor::CPUDevice());
temp2CpuDataView->CopyFrom(view2);
data2 = temp2CpuDataView->WritableDataBuffer<ElementType>();
}
size_t numElements = view1.Shape().TotalSize();
for (size_t i = 0; i < numElements; ++i)
{
if (data1[i] != data2[i])
{
return false;
}
}
return true;
}
bool DictionaryValue::operator==(const DictionaryValue& other) const
{
if (this == &other)
{
return true;
}
if (m_valueType != other.m_valueType)
{
return false;
}
switch (m_valueType)
{
case DictionaryValue::Type::Bool:
return (m_data.m_boolean == other.m_data.m_boolean);
case DictionaryValue::Type::SizeT:
return (m_data.m_sizeT == other.m_data.m_sizeT);
case DictionaryValue::Type::Float:
return (m_data.m_float == other.m_data.m_float);
case DictionaryValue::Type::Double:
return (m_data.m_double == other.m_data.m_double);
case DictionaryValue::Type::String:
{
wstring* strPtr1 = reinterpret_cast<wstring*>(m_data.m_ptr);
wstring* strPtr2 = reinterpret_cast<wstring*>(other.m_data.m_ptr);
return (*strPtr1 == *strPtr2);
}
case DictionaryValue::Type::NDShape:
{
NDShape* shapePtr1 = reinterpret_cast<NDShape*>(m_data.m_ptr);
NDShape* shapePtr2 = reinterpret_cast<NDShape*>(other.m_data.m_ptr);
return (*shapePtr1 == *shapePtr2);
}
case DictionaryValue::Type::Vector:
{
vector<DictionaryValue>* vectorPtr1 = reinterpret_cast<vector<DictionaryValue>*>(m_data.m_ptr);
vector<DictionaryValue>* vectorPtr2 = reinterpret_cast<vector<DictionaryValue>*>(other.m_data.m_ptr);
return (*vectorPtr1 == *vectorPtr2);
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr1 = reinterpret_cast<Dictionary*>(m_data.m_ptr);
Dictionary* dictPtr2 = reinterpret_cast<Dictionary*>(other.m_data.m_ptr);
return (*dictPtr1 == *dictPtr2);
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr1 = reinterpret_cast<NDArrayView*>(m_data.m_ptr);
NDArrayView* viewPtr2 = reinterpret_cast<NDArrayView*>(other.m_data.m_ptr);
switch (viewPtr1->GetDataType())
{
case DataType::Float:
return AreEqual<float>(*viewPtr1, *viewPtr2);
case DataType::Double:
return AreEqual<double>(*viewPtr1, *viewPtr2);
default:
NOT_IMPLEMENTED;
}
}
default:
NOT_IMPLEMENTED;
}
}
bool DictionaryValue::operator!=(const DictionaryValue& other) const
{
return !(*this == other);
}
BinaryOStreamWrapper& operator<<(BinaryOStreamWrapper& stream, const NDShape& us)
{
auto size = us.NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << us[i];
}
return stream;
}
template <typename T>
void Write(BinaryOStreamWrapper& stream, const NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
const T* buffer = view.DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream << buffer[i];
}
}
template <typename T>
void Read(BinaryIStreamWrapper& stream, NDArrayView& view)
{
assert(view.Device().Type() == DeviceKind::CPU);
auto numElements = view.Shape().TotalSize();
T* buffer = view.WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
stream >> buffer[i];
}
}
istream& operator>>(istream& stdStream, DictionaryValue& us)
{
BinaryIStreamWrapper stream(stdStream);
size_t version;
stream >> version;
stream >> us.m_valueType;
unsigned int type;
stream >> type;
us.m_valueType = static_cast<DictionaryValue::Type>(type);
switch (us.ValueType())
{
@ -52,28 +283,72 @@ namespace CNTK
case DictionaryValue::Type::Double:
stream >> us.m_data.m_double;
break;
case DictionaryValue::Type::String:
{
wstring* strPtr = new wstring();
stream >> *strPtr;
us.m_data.m_ptr = strPtr;
break;
}
case DictionaryValue::Type::NDShape:
{
size_t size;
stream >> size;
vector<size_t> dims(size);
NDShape* shapePtr = new NDShape(size);
for (auto i = 0; i < size; i++)
{
stream >> dims[i];
stream >> shapePtr->operator[](i);
}
us.AllocateDataPtr(NDShape(dims));
us.m_data.m_ptr = shapePtr;
break;
}
case DictionaryValue::Type::Vector:
{
{
size_t size;
stream >> size;
vector<DictionaryValue> values(size);
vector<DictionaryValue>* vectorPtr = new vector<DictionaryValue>(size);
for (auto i = 0; i < size; i++)
{
stream >> values[i];
stream >> vectorPtr->at(i);
}
us.AllocateDataPtr(values);
us.m_data.m_ptr = vectorPtr;
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = new Dictionary();
stream >> *dictPtr;
us.m_data.m_ptr = dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
unsigned int type;
stream >> type;
DataType dtype = static_cast<DataType>(type);
size_t size;
stream >> size;
NDShape shape(size);
for (auto i = 0; i < size; i++)
{
stream >> shape[i];
}
NDArrayView* viewPtr = new NDArrayView(dtype, shape, DeviceDescriptor::CPUDevice());
switch (dtype)
{
case DataType::Float:
Read<float>(stream, *viewPtr);
break;
case DataType::Double:
Read<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(dtype));
}
us.m_data.m_ptr = viewPtr;
break;
}
default:
@ -82,11 +357,13 @@ namespace CNTK
return stream;
}
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
ostream& operator<<(ostream& stdStream, const DictionaryValue& us)
{
BinaryOStreamWrapper stream(stdStream);
stream << us.version;
stream << us.ValueType();
stream << static_cast<unsigned int>(us.ValueType());
switch (us.ValueType())
{
@ -102,15 +379,16 @@ namespace CNTK
case DictionaryValue::Type::Double:
stream << us.m_data.m_double;
break;
case DictionaryValue::Type::String:
{
wstring* stringPtr = reinterpret_cast<wstring*>(us.m_data.m_ptr);
stream << *stringPtr;
break;
}
case DictionaryValue::Type::NDShape:
{
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
auto size = shapePtr->NumAxes();
stream << size;
for (auto i = 0; i < size; i++)
{
stream << shapePtr->operator[](i);
}
stream << *shapePtr;
break;
}
case DictionaryValue::Type::Vector:
@ -121,7 +399,31 @@ namespace CNTK
stream << size;
for (auto i = 0; i < size; i++)
{
stream << vectorPtr->operator[](i);
stream << vectorPtr->at(i);
}
break;
}
case DictionaryValue::Type::Dictionary:
{
Dictionary* dictPtr = reinterpret_cast<Dictionary*>(us.m_data.m_ptr);
stream << *dictPtr;
break;
}
case DictionaryValue::Type::NDArrayView:
{
NDArrayView* viewPtr = reinterpret_cast<NDArrayView*>(us.m_data.m_ptr);
stream << static_cast<unsigned int>(viewPtr->GetDataType());
stream << viewPtr->Shape();
switch (viewPtr->GetDataType())
{
case DataType::Float:
Write<float>(stream, *viewPtr);
break;
case DataType::Double:
Write<double>(stream, *viewPtr);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
break;
}
@ -148,7 +450,7 @@ namespace CNTK
Dictionary& Dictionary::operator=(const Dictionary& other)
{
assert(this != &other);
m_dictionaryData.reset(new std::unordered_map<std::wstring, DictionaryValue>(*(other.m_dictionaryData)));
m_dictionaryData.reset(new unordered_map<wstring, DictionaryValue>(*(other.m_dictionaryData)));
return *this;
}
@ -183,20 +485,51 @@ namespace CNTK
return (m_dictionaryData->find(key) != m_dictionaryData->end());
}
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
bool Dictionary::operator==(const Dictionary& other) const
{
if (this == &other)
{
return true;
}
if (m_dictionaryData->size() != other.m_dictionaryData->size())
{
return false;
}
for (auto& kv : *m_dictionaryData)
{
auto result = other.m_dictionaryData->find(kv.first);
if (result == other.m_dictionaryData->end() || kv.second != result->second)
{
return false;
}
}
return true;
}
bool Dictionary::operator!=(const Dictionary& other) const
{
return !(*this == other);
}
ostream& operator<<(ostream& stdStream, const Dictionary& us)
{
BinaryOStreamWrapper stream(stdStream);
stream << us.version;
stream << us.m_dictionaryData->size();
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
for (auto& kv : *(us.m_dictionaryData))
{
stream << it->first;
stream << it->second;
stream << kv.first;
stream << kv.second;
}
return stream;
}
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
istream& operator>>(istream& stdStream, Dictionary& us)
{
BinaryIStreamWrapper stream(stdStream);
size_t version;
stream >> version;
size_t size;
@ -206,113 +539,36 @@ namespace CNTK
{
wstring key;
stream >> key;
DictionaryValue value;
stream >> value;
us.m_dictionaryData->insert(make_pair(key, value));
stream >> us[key];
}
return stream;
}
// Returns the element whose key is greater than the required sample count
// or the last element if no such key exists.
template <typename T>
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
{
if (viewPtr->IsSparse())
assert(m_schedule.size() > 0);
auto it = m_schedule.upper_bound(sampleCount);
if (it == m_schedule.end())
{
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
--it;
}
auto numElements = viewPtr->Shape().TotalSize();
vector<DictionaryValue> values(numElements);
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
cpuDataViewPtr->CopyFrom(*viewPtr);
}
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
T v = buffer[i];
values[i] = DictionaryValue(v);
}
return values;
return it->second;
}
template <typename T>
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
if (viewPtr->IsSparse())
{
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
}
auto numElements = viewPtr->Shape().TotalSize();
if (values.size() != numElements)
{
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
values.size(), numElements);
}
NDArrayViewPtr cpuDataViewPtr = viewPtr;
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
}
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
for (auto i = 0; i < numElements; ++i)
{
buffer[i] = values[i].GetValue<T>();
}
if ((viewPtr->Device().Type() != DeviceKind::CPU))
{
viewPtr->CopyFrom(*cpuDataViewPtr);
}
}
// TODO: we store the type info for every element in the vector, which is extremely redundant.
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
return SerializeToVector<float>(viewPtr);
case DataType::Double:
return SerializeToVector<double>(viewPtr);
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
{
switch (viewPtr->GetDataType())
{
case DataType::Float:
DeserializeFromVector<float>(viewPtr, values);
break;
case DataType::Double:
DeserializeFromVector<double>(viewPtr, values);
break;
default:
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
}
}
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value);
template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value);
template void DictionaryValue::AllocateDataPtr<NDArrayView>(const NDArrayView& value);
template void DictionaryValue::FreePtrAsType<NDShape>();
template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>();
template void DictionaryValue::FreePtrAsType<wstring>();
template void DictionaryValue::FreePtrAsType<Dictionary>();
template void DictionaryValue::FreePtrAsType<NDArrayView>();
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
}

Просмотреть файл

@ -167,9 +167,6 @@ namespace CNTK
return var.IsInput() && var.IsSparse();
}
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces)
{
@ -250,7 +247,8 @@ namespace CNTK
static_assert(std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value, "Unsupported ValueType");
std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
std::vector<DictionaryValue> dictionaryValueVector;
for (auto value : basicElementTypeVector)
@ -265,7 +263,8 @@ namespace CNTK
static_assert(std::is_same<T, bool>::value ||
std::is_same<T, size_t>::value ||
std::is_same<T, float>::value ||
std::is_same<T, double>::value, "Unsupported ValueType");
std::is_same<T, double>::value ||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
std::vector<T> basicElementTypeVector;
for (auto value : dictionaryValueVector)
@ -313,4 +312,19 @@ namespace CNTK
return{ paddedOutputMapCount, kernelShape };
}
inline CNTK::Constant ScalarConstant(CNTK::DataType dataType, float value, const CNTK::DeviceDescriptor& device = CNTK::DeviceDescriptor::CPUDevice())
{
if (dataType == CNTK::DataType::Float)
return CNTK::Constant({}, value, device);
else if (dataType == CNTK::DataType::Double)
return CNTK::Constant({}, (double)value, device);
else
LogicError("CNTK::ScalarConstant: Unsupported DataType %s", DataTypeName(dataType));
}
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
{
return std::pow(momentumPerSample, minibatchSize);
}
}

Просмотреть файл

@ -7,6 +7,8 @@
namespace CNTK
{
/*static*/ const std::vector<Axis> Variable::s_defaultInputVariableDynamicAxes = { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() };
Variable::Variable(const FunctionPtr& function)
: Variable(function->Output())
{

Просмотреть файл

@ -11,6 +11,13 @@
#define __UNIX__
#endif
#ifdef _MSC_VER
// TODO: thread_local is supported in VS2015. Remove this macro when we uprade to VS2015
#define THREAD_LOCAL __declspec(thread)
#else
#define THREAD_LOCAL thread_local
#endif
// ===========================================================================
// compiler differences
// ===========================================================================

Просмотреть файл

@ -11,6 +11,7 @@
#include <stdio.h>
#include <vector>
#include <algorithm>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK {
@ -24,6 +25,31 @@ static inline size_t rand(const size_t begin, const size_t end)
return begin + randno % (end - begin);
}
// Rand based on Mersenne Twister.
// We use our own distribution in order to match baselines between different operating systems,
// because uniform_distribution is not guranteed to provide the same numbers on different platforms.
// TODO: Switching to Boost would eliminate this problem.
static inline size_t RandMT(const size_t begin, const size_t end, std::mt19937_64& rng)
{
const size_t randomNumber = rng();
return begin + randomNumber % (end - begin);
}
// Rand based on Mersenne Twister.
// We use our own distribution in order to match baselines between different operating systems,
// instead of using std::shuffle which uses unitform_distribution internally.
// TODO: Switching to Boost would eliminate this problem.
template <typename TVector>
inline void RandomShuffleMT(TVector& v, std::mt19937_64& rng)
{
foreach_index(currentLocation, v)
{
// Pick a random location a location and swap with current
const size_t randomLocation = RandMT(0, v.size(), rng);
std::swap(v[currentLocation], v[randomLocation]);
}
}
class RandomOrdering // note: NOT thread-safe at all
{
// constants for randomization

Просмотреть файл

@ -258,13 +258,20 @@ public:
m_evalOrders[rootNode] = nodes;
}
bool EvalOrderExists(const ComputationNodeBasePtr& rootNode) const
{
return m_evalOrders.find(rootNode) != m_evalOrders.end();
}
// get depth-first traversal order
// TODO: This is currently not immutable because it gets patched w.r.t. recurrent loops. Ideally we don't patch. Need to review and verify that it is sufficient.
const std::list<ComputationNodeBasePtr>& GetEvalOrder(const ComputationNodeBasePtr& rootNode) const
{
auto iter = m_evalOrders.find(rootNode);
if (iter == m_evalOrders.end())
{
LogicError("GetEvalOrder: Called without prior call to FormEvalOrder() for %ls %ls operation", rootNode->NodeName().c_str(), rootNode->OperationName().c_str());
}
return iter->second;
}

Просмотреть файл

@ -76,6 +76,9 @@ void ComputationNetwork::CopySubTree(const ComputationNetwork& fromNet,
ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName);
if (!fromNet.EvalOrderExists(fromRoot))
const_cast<ComputationNetwork&>(fromNet).FormEvalOrder(fromRoot);
for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
{
wstring fromNodeName = fromNode->NodeName();
@ -353,6 +356,9 @@ void ComputationNetwork::SetLearnableNodesBelowLearningRateMultiplier(const floa
else
{
// for calculating a specific node
if (!EvalOrderExists(rootNode))
const_cast<ComputationNetwork&>(*this).FormEvalOrder(rootNode);
for (const auto& node : GetAllNodesForRoot(rootNode))
{
if (node->OperationName() == OperationNameOf(LearnableParameter))

Просмотреть файл

@ -32,15 +32,16 @@
#define CNTK_MODEL_VERSION_1 1
#define CNTK_MODEL_VERSION_2 2
#define CNTK_MODEL_VERSION_3 3
#define CNTK_MODEL_VERSION_4 4 // PastValue
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
#define CNTK_MODEL_VERSION_4 4 // PastValue
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
#define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes
#define CNTK_MODEL_VERSION_11 11 // Times() m_inputRank to support parameter-rank inference
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_11
#define CNTK_MODEL_VERSION_11 11 // dynamic axis name for where nodes
#define CNTK_MODEL_VERSION_12 12 // Times() m_inputRank to support parameter-rank inference
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_12
extern bool g_shareNodeValueMatrices;

Просмотреть файл

@ -365,6 +365,7 @@ public:
TensorShape outputShape;
// If 2D convolution syntax is used then some of the tensor dimensions need to be inferred.
if (m_convolution2D)
// NOTE: when m_convolution2D is true, it's a legacy branch. Code should not enter here any more.
{
// Need to update some tensors with correct input dims.
auto inDims = ImageDimensions(GetInputSampleLayout(inputIdx), m_imageLayout);
@ -396,6 +397,8 @@ public:
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
}
else
{
@ -414,9 +417,12 @@ public:
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
}
if (m_imageLayout == ImageLayoutKind::CHW)
SetDims(outputShape, HasMBLayout());
else // legacy format
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
}
// ConvolveGeometry always uses CHW.
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
// update LearnableParameter if it has 0 dimensions (to be inferred)
// Typically this would be the #inputChannels (C).

Просмотреть файл

@ -450,9 +450,9 @@ public:
assert(dimsA.size() == m_outputRank + numReductionDims);
while (numReductionDims < inputRank)
{
dimsA.push_back(0);
numReductionDims++;
}
dimsA.push_back(0);
numReductionDims++;
}
}
// fill in the missing ones
@ -561,8 +561,8 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
public:
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
: Base(deviceId, name, /*outputRank=*/1, /*inputRank=*/1)
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
: Base(deviceId, name, outputRank, /*inputRank=*/1)
{
}
};
@ -665,6 +665,9 @@ public:
m_axis1 = 1, m_axis2 = 2; // default
}
int Axis1() const { return m_axis1; }
int Axis2() const { return m_axis2; }
private:
// compute the transposed tensor shape (in-place)
void TransposeShape(TensorShape& shape) const

Просмотреть файл

@ -300,7 +300,7 @@ template <class ElemType>
if (!m_pMBLayout)
{
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
m_pMBLayout->SetUniqueAxisName(L"WhereNodeAxis");
m_pMBLayout->SetUniqueAxisName(m_dynamicAxisName);
}
// we map scalars to scalars
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)

Просмотреть файл

@ -217,6 +217,9 @@ public:
virtual bool /*ComputationNodeBase::*/ InputUsedInComputingInputNodesGradients(size_t childIndex) const override;
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
std::wstring ReductionOpName() const { return m_operation; }
int ReductionAxis() const { return m_axis; }
private:
// operation attributes
int m_axis;
@ -341,11 +344,12 @@ public:
fstream << m_axis;
}
private:
// these implement numpy-style negative bound values to index from the end
size_t BeginIndex() const { return m_beginIndex >= 0 ? (size_t)m_beginIndex : (size_t)(m_beginIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
int Axis() const { return m_axis; }
private:
// determine the tensor shape that represents slice of the input that we are taking
TensorShape GetInputSlice(size_t rank, const FrameRange & fr) const
@ -655,10 +659,11 @@ class WhereNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
static const std::wstring TypeName() { return L"Where"; }
static const std::wstring DefaultWhereNodeDynamicAxisName() { return L"WhereNodeAxis"; }
public:
DeclareConstructorFromConfigWithNumInputs(WhereNode);
WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
Base(deviceId, name)
WhereNode(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName = DefaultWhereNodeDynamicAxisName()) :
Base(deviceId, name), m_dynamicAxisName(dynamicAxisName)
{
MarkValueNonSharable();
}
@ -669,11 +674,29 @@ public:
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
virtual void Validate(bool isFinalValidationPass) override;
virtual void Load(File& fstream, size_t modelVersion) override
{
Base::Load(fstream, modelVersion);
if (modelVersion >= CNTK_MODEL_VERSION_11)
fstream >> m_dynamicAxisName;
else
m_dynamicAxisName = DefaultWhereNodeDynamicAxisName();
}
virtual void Save(File& fstream) const override
{
Base::Save(fstream);
fstream << m_dynamicAxisName;
}
std::wstring DynamicAxisName() const { return m_dynamicAxisName; }
private:
// buffers for creating the result sequences (kept as object state to avoid memory allocations)
std::vector<std::vector<size_t>> m_indexSequenceBuffer; // [sequenceIndex][t] for creating the result sequences
std::vector<size_t> m_rowAllocationsBuffer; // [row] for determining new MBLayout packing
std::vector<std::pair<size_t, size_t>> m_placementBuffer; // [sequenceIndex] assigned location for a sequence
std::wstring m_dynamicAxisName;
};
// -----------------------------------------------------------------------

Просмотреть файл

@ -37,14 +37,8 @@
#pragma warning(disable : 4244) // unreachable code; triggered for unknown reasons
#pragma warning(disable : 4702) // conversion from 'double' to 'float'
#ifdef USE_ACML
// Download ACML 5.3.1 (e.g., acml5.3.1-ifort64.exe) or above
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
// Install the ifort64_mp variant (compiled with intel compiler) of the library
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.1\ifort64_mp or the folder you installed acml
// to point to your folder for the include file and link library
#include <acml.h> // requires ACML 5.3.1 and above
#elif defined(USE_MKL)
#ifdef USE_MKL
// requires MKL 10.0 and above
#include <mkl.h>
#else
@ -57,12 +51,6 @@
#include <lapacke.h>
#endif
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
#define BLAS_COLMAJOR
#else
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
#endif
#define SWAP(a, b) \
{ \
(a) ^= (b); \
@ -912,11 +900,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
#pragma omp parallel for
foreach_column (j, us)
{
#ifdef USE_ACML
dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
#else
cblas_dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
#endif
}
}
else
@ -926,11 +910,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
{
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
#else
cblas_scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
#endif
}
}
}
@ -2844,20 +2824,12 @@ ElemType CPUMatrix<ElemType>::SumOfAbsElements() const
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
return (ElemType) dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
#else
return (ElemType) cblas_dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
#endif
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
return sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
#else
return cblas_sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
#endif
}
}
@ -3028,11 +3000,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
#pragma omp parallel for
foreach_column (j, c)
{
#ifdef USE_ACML
c(0, j) = (ElemType) dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
#endif
}
}
else
@ -3041,11 +3009,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
foreach_column (j, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
c(0, j) = snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
#else
c(0, j) = cblas_snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
#endif
}
}
}
@ -3058,11 +3022,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
#pragma omp parallel for
foreach_row (i, c)
{
#ifdef USE_ACML
c(i, 0) = dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
#else
c(i, 0) = cblas_dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
#endif
}
}
else
@ -3071,11 +3031,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
foreach_row (i, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
#else
c(i, 0) = cblas_snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
#endif
}
}
}
@ -4486,34 +4442,22 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
int m, n, k, l;
int lda, ldb, ldc;
#ifdef USE_ACML
char transA, transB;
#else
CBLAS_TRANSPOSE mklTransA;
CBLAS_TRANSPOSE mklTransB;
#endif
if (transposeA)
{
m = (int) a.GetNumCols();
k = (int) a.GetNumRows();
lda = k;
#ifdef USE_ACML
transA = (char) MatrixTranspose::Trans;
#else
mklTransA = CBLAS_TRANSPOSE::CblasTrans;
#endif
}
else
{
m = (int) a.GetNumRows();
k = (int) a.GetNumCols();
lda = m;
#ifdef USE_ACML
transA = (char) MatrixTranspose::NoTrans;
#else
mklTransA = CBLAS_TRANSPOSE::CblasNoTrans;
#endif
}
if (transposeB)
@ -4521,22 +4465,14 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
l = (int) b.GetNumCols();
n = (int) b.GetNumRows();
ldb = n;
#ifdef USE_ACML
transB = (char) MatrixTranspose::Trans;
#else
mklTransB = CBLAS_TRANSPOSE::CblasTrans;
#endif
}
else
{
l = (int) b.GetNumRows();
n = (int) b.GetNumCols();
ldb = l;
#ifdef USE_ACML
transB = (char) MatrixTranspose::NoTrans;
#else
mklTransB = CBLAS_TRANSPOSE::CblasNoTrans;
#endif
}
assert(m > 0 && k > 0 && l > 0 && n > 0); // converting from size_t to int may cause overflow
@ -4553,20 +4489,12 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
dgemm(transA, transB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
#else
cblas_dgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
#endif
cblas_dgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
sgemm(BLAS_COLMAJOR transA, transB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
#else
cblas_sgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
#endif
cblas_sgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
}
}
@ -4611,9 +4539,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
dgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &info);
#elif defined(USE_MKL)
#ifdef USE_MKL
double wkopt;
int lwork = -1;
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
@ -4622,16 +4548,13 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, reinterpret_cast<double*>(W.Data()), &lwork, &info);
#else
std::vector<double> superb(std::max(std::min(m, n) - 1, 1));
info = LAPACKE_dgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()),
info = LAPACKE_dgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()),
reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &superb[0]);
#endif
}
else
{
#ifdef USE_ACML
#pragma warning(suppress : 4244)
sgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &info);
#elif defined(USE_MKL)
#ifdef USE_MKL
float wkopt;
int lwork = -1;
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
@ -4640,7 +4563,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, reinterpret_cast<float*>(W.Data()), &lwork, &info);
#else
std::vector<float> superb(std::max(std::min(m, n) - 1, 1));
info = LAPACKE_sgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()),
info = LAPACKE_sgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()),
reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &superb[0]);
#endif
}
@ -4837,20 +4760,12 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
#else
cblas_daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
#endif
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
#else
cblas_saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
#endif
}
}
else if (a.GetNumElements() == 1) // scalar, add to all elements
@ -4889,11 +4804,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
#pragma omp parallel for
foreach_column (j, c)
{
#ifdef USE_ACML
daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
#else
cblas_daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
#endif
}
}
else
@ -4902,11 +4813,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
foreach_column (j, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
#else
cblas_saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
#endif
}
}
}
@ -4925,11 +4832,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
#pragma omp parallel for
foreach_row (i, c)
{
#ifdef USE_ACML
daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
#else
cblas_daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
#endif
}
}
else
@ -4938,11 +4841,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
foreach_row (i, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
#else
cblas_saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
#endif
}
}
}
@ -5163,20 +5062,12 @@ template <class ElemType>
}
else if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx); // TODO: Use overloads.
#else
cblas_dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx);
#endif
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
#else
cblas_sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
#endif
}
}
@ -5224,11 +5115,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
#pragma omp parallel for
foreach_column (j, c)
{
#ifdef USE_ACML
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
}
}
else
@ -5237,11 +5124,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
foreach_column (j, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
}
}
}
@ -5256,11 +5139,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
#pragma omp parallel for
foreach_row (i, c)
{
#ifdef USE_ACML
c(i, 0) = ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#endif
}
}
else
@ -5269,11 +5148,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
foreach_row (i, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#endif
}
}
}
@ -5298,20 +5173,12 @@ ElemType CPUMatrix<ElemType>::InnerProductOfMatrices(const CPUMatrix<ElemType>&
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
return (ElemType) ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
#else
return (ElemType) cblas_ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
#endif
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
return (ElemType) sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
#else
return (ElemType) cblas_sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
#endif
}
}
@ -5539,21 +5406,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
{
for (long j = 0; j < n; j++)
{
#ifdef USE_ACML
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
}
for (long j = 0; j < n; j++)
{
for (long i = 1; i < negnumber + 1; i++)
{
#ifdef USE_ACML
c(i, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#else
c(i, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#endif
}
}
}
@ -5561,21 +5420,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
{
for (long j = 0; j < n; j++)
{
#ifdef USE_ACML
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#else
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
#endif
}
for (long j = 0; j < n; j++)
{
for (long i = 1; i < negnumber + 1; i++)
{
#ifdef USE_ACML
c(i, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#else
c(i, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
#endif
}
}
}
@ -5593,11 +5444,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
#pragma omp parallel for
foreach_row (i, c)
{
#ifdef USE_ACML
c(i, 0) = (ElemType) ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#else
c(i, 0) = (ElemType) cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
#endif
}
}
else
@ -5606,11 +5453,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
foreach_row (i, c)
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#else
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
#endif
}
}
}
@ -6025,13 +5868,11 @@ int CPUMatrix<ElemType>::SetNumThreads(int numThreads)
omp_set_num_threads(numThreads);
numThreads = omp_get_max_threads();
#ifdef USE_ACML
acmlsetnumthreads(numThreads);
#elif defined(USE_MKL)
mkl_set_num_threads(numThreads);
#elif defined(USE_OPENBLAS)
openblas_set_num_threads(numThreads);
#endif
#ifdef USE_MKL
mkl_set_num_threads(numThreads);
#elif defined(USE_OPENBLAS)
openblas_set_num_threads(numThreads);
#endif
#endif
return numThreads;
}

Просмотреть файл

@ -23,15 +23,7 @@
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
#ifdef USE_ACML
// use ACML as default.
// Download ACML 5.3.0 (e.g., acml5.3.0-ifort64.exe) or above
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
// Install the ifort64 variant (compiled with intel compiler) of the library
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.0\ifort64_mp or the folder you installed acml
// to point to your folder for the include file and link library
#include <acml.h> // requires ACML 5.3.0 and above
#elif defined(USE_MKL)
#ifdef USE_MKL
// requires MKL 10.0 and above
#include <mkl.h>
#else
@ -53,12 +45,6 @@
// return 42;
//}
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
#define BLAS_COLMAJOR
#else
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
#endif
// TODO: Move to CommonMatrix.h
#define IDX2C(i, j, ld) (((j) * (ld)) + (i)) // 0 based indexing
@ -261,11 +247,23 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
RequireSizeAndAllocate(v.GetNumRows(), v.GetNumCols(), v.NzSize());
let nz = v.NzCount();
auto matrixFormat = v.GetFormat();
if (((matrixFormat == matrixFormatSparseBlockCol) || (matrixFormat == matrixFormatSparseBlockRow)) && (v.GetBlockIdShift() > 0))
NOT_IMPLEMENTED;
if (nz > 0)
{
memcpy(NzValues(), v.NzValues(), v.NzSize());
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
if ((matrixFormat == matrixFormatSparseCSC) || (matrixFormat == matrixFormatSparseCSR))
{
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
}
else
{
memcpy(GetBlockIds(), v.GetBlockIds(), v.GetBlockSize());
}
}
if (v.m_sliceViewOffset > 0)
{
@ -384,6 +382,66 @@ CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoGatherColumnsOf(ElemType
return *this;
}
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
template <class ElemType>
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha)
{
VerifyWritable(__func__);
if ((a.GetFormat() != matrixFormatSparseCSC) || (GetFormat() != matrixFormatSparseCSC))
NOT_IMPLEMENTED;
if (idx.GetNumRows() != 1) // index is 1-dimensional only
InvalidArgument("DoScatterColumnsOf: Map must be a row vector.");
if (beta != 0)
NOT_IMPLEMENTED;
if (NzCount() != 0)
InvalidArgument("CPUSparseMatrix::DoScatterColumnsOf: The target matrix cannot have pre-existing non-zero values when being scattered into");
size_t numNonZeroElements = a.NzCount();
if (beta == 0)
RequireSizeAndAllocate(GetNumRows(), GetNumCols(), numNonZeroElements);
// Setup the Secondary index
std::vector<int> columnElementCounts(GetNumCols(), 0);
size_t numColsToWrite = idx.GetNumCols();
for (long j = 0; j < numColsToWrite; j++)
{
auto jOutF = idx(0, j); // this is the column we need to write to
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
continue;
size_t jOut = (size_t)jOutF;
columnElementCounts[jOut] = a.SecondaryIndexLocation()[j + 1] - a.SecondaryIndexLocation()[j];
}
// TODO: Replace with std::exclusive_scan when we switch to C++17
for (size_t i = 1; i <= GetNumCols(); ++i)
SecondaryIndexLocation()[i] = SecondaryIndexLocation()[i - 1] + columnElementCounts[i - 1];
size_t offset = a.SecondaryIndexLocation()[0];
// TODO: Does it make sense to parallelize this?
for (long j = 0; j < numColsToWrite; j++)
{
auto jOutF = idx(0, j); // this is the column we need to write to
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
continue;
size_t jOut = (size_t)jOutF;
auto start = SecondaryIndexLocation()[jOut];
auto end = SecondaryIndexLocation()[jOut + 1];
for (auto p = start; p < end; p++, offset++)
{
GetUnCompIndex()[p] = a.GetUnCompIndex()[offset];
Buffer()[p] = a.Buffer()[offset] * alpha;
}
}
return *this;
}
template <class ElemType>
void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const
{
@ -587,13 +645,7 @@ void CPUSparseMatrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYP
}
template <class ElemType>
ElemType* CPUSparseMatrix<ElemType>::Data() const
{
return Buffer() + GetCompIndex()[m_sliceViewOffset];
}
template <class ElemType>
ElemType* CPUSparseMatrix<ElemType>::Data()
ElemType* CPUSparseMatrix<ElemType>::Data() const
{
return (Buffer() +
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
@ -1340,20 +1392,12 @@ ElemType CPUSparseMatrix<ElemType>::SumOfAbsElements() const
if (sizeof(ElemType) == sizeof(double))
{
#ifdef USE_ACML
return (ElemType) dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
#else
return (ElemType) cblas_dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
#endif
}
else
{
#pragma warning(suppress : 4244)
#ifdef USE_ACML
return sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
#else
return cblas_sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
#endif
}
}
@ -1495,7 +1539,6 @@ template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
//template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
template char* CPUSparseMatrix<char>::Data() const;
template char* CPUSparseMatrix<char>::Data();
template void CPUSparseMatrix<char>::Reset(void);
template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
@ -1518,7 +1561,6 @@ template void CPUSparseMatrix<short>::SetValue(size_t, size_t, short);
template void CPUSparseMatrix<short>::SetValue(CPUSparseMatrix<short> const&);
//template void CPUSparseMatrix<short>::SetValue(GPUSparseMatrix<short> const&);
template short* CPUSparseMatrix<short>::Data() const;
template short* CPUSparseMatrix<short>::Data();
template void CPUSparseMatrix<short>::Reset(void);
template void CPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
template void CPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);

Просмотреть файл

@ -92,13 +92,13 @@ public:
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
CPUSparseMatrix<ElemType>& DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
size_t BufferSize() const
{
return GetSizeAllocated() * sizeof(ElemType);
}
ElemType* Data() const;
ElemType* Data();
inline size_t GetNumElemAllocated() const
{
return GetSizeAllocated();
@ -262,7 +262,8 @@ public:
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const
{
return GetUnCompIndex() + GetCompIndex()[m_sliceViewOffset];
return (GetUnCompIndex() +
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
} // this is the major index, row/col ids in CSC/CSR format
size_t MajorIndexCount() const

Просмотреть файл

@ -237,7 +237,7 @@ std::pair<size_t, size_t> TracingGPUMemoryAllocator::GetFreeAndTotalMemoryInMBs(
// deviceId - the device on which the operation will take place
void PrepareDevice(DEVICEID_TYPE deviceId)
{
static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED;
THREAD_LOCAL static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED;
// and if we last set the device to be this device we are good
if (deviceId == currentDevice)
return;

Просмотреть файл

@ -227,6 +227,5 @@
<Target Name="CheckDependencies">
<Error Condition="'$(MathLibrary)' == 'MKL' And '$(CNTK_MKL_PATH)' == ''" Text="CNTK custom MKL location not specified, see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
<Error Condition="'$(MathLibrary)' == 'MKL' And !Exists('$(CNTKCustomMKLPath)')" Text="CNTK custom MKL not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
<Error Condition="'$(MathLibrary)' == 'ACML' And !Exists('$(ACML_PATH)')" Text="ACML not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#acml for instructions." />
</Target>
</Project>

Просмотреть файл

@ -1081,7 +1081,7 @@ Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matr
DISPATCH_MATRIX_ON_FLAG(&a, this,
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
{ NOT_IMPLEMENTED; },
{ m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
{ NOT_IMPLEMENTED; });
return *this;

Просмотреть файл

@ -377,8 +377,8 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
// second, remove trailing slash if there is any
// TODO: when gcc -v is 4.9 or greater, this should be: std::regex_replace(rootpath, L"\\/+$", wstring());
size_t stringPos = 0;
for (stringPos = rootpath.length() - 1; stringPos >= 0; stringPos--)
int stringPos = 0;
for (stringPos = (int) (rootpath.length() - 1); stringPos >= 0; stringPos--)
{
if (rootpath[stringPos] != L'/')
{
@ -517,11 +517,11 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
m_lattices->setverbosity(m_verbosity);
// now get the frame source. This has better randomization and doesn't create temp files
bool minimizeReaderMemoryFootprint = readerConfig(L"minimizeReaderMemoryFootprint", true);
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims,
bool useMersenneTwisterRand = readerConfig(L"useMersenneTwisterRand", false);
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(useMersenneTwisterRand, infilesmulti, labelsmulti, m_featDims, m_labelDims,
numContextLeft, numContextRight, randomize,
*m_lattices, m_latticeMap, m_frameMode,
minimizeReaderMemoryFootprint, m_expandToUtt));
m_expandToUtt));
m_frameSource->setverbosity(m_verbosity);
}
else if (EqualCI(readMethod, L"rollingWindow"))

Просмотреть файл

@ -12,7 +12,8 @@
#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
#include "minibatchsourcehelpers.h"
#include "minibatchiterator.h"
#include "unordered_set"
#include <unordered_set>
#include <random>
namespace msra { namespace dbn {
@ -38,6 +39,10 @@ class minibatchutterancesourcemulti : public minibatchsource
// const std::vector<unique_ptr<latticesource>> &lattices;
const latticesource &lattices;
// Flag indicating whether to use Mersenne Twister random generator.
bool m_useMersenneTwister;
std::mt19937_64 m_rng;
// std::vector<latticesource> lattices;
// word-level transcripts (for MMI mode when adding best path to lattices)
const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts; // (used for getting word-level transcripts)
@ -413,6 +418,7 @@ class minibatchutterancesourcemulti : public minibatchsource
// When true we use a rolling window of randomized framerefs to minimize memory
// footprint, instead of using a large vector listing all frames in the training corpus
// Functionally, the 2 methods are identical.
// When it is true, we also use Mersenne Twister random generator for randomization.
const bool m_minimizeMemoryFootprint;
// [globalt-sweepts] -> (chunk, utt, frame) lookup table for randomized frames --this can be REALLY big!
@ -429,6 +435,10 @@ class minibatchutterancesourcemulti : public minibatchsource
size_t m_currentRangeEndChunkIdx;
size_t m_nextFramePosNotYetRandomized;
// If m_minimizeMemoryFootprint is true, Mersenne Twister is used for randomization
// because rand has problems in distributed case.
std::mt19937_64 m_rng;
public:
framerandomizer(const std::vector<std::vector<chunk>>& randomizedChunks, bool minimizeMemoryFootprint)
: m_randomizedChunks(randomizedChunks), m_minimizeMemoryFootprint(minimizeMemoryFootprint), m_currentRangeBeginChunkIdx(0), m_currentRangeEndChunkIdx(0), m_nextFramePosNotYetRandomized(0)
@ -496,7 +506,9 @@ class minibatchutterancesourcemulti : public minibatchsource
for (;;) // (randomization retry loop)
{
size_t tswap = Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range
size_t tswap = m_minimizeMemoryFootprint ?
Microsoft::MSR::CNTK::RandMT(postbegin, postend, m_rng) :
Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range
// We want to swap 't' to 'tswap' and 'tswap' to 't'.
// - Both may have been swapped before.
// - Both must stay within the randomization window of their respective position.
@ -542,11 +554,11 @@ class minibatchutterancesourcemulti : public minibatchsource
void reset(unsigned int randSeed)
{
srand(randSeed);
size_t sweepts = m_randomizedChunks[0][0].globalts;
size_t totalFrames = m_randomizedChunks[0].back().globalte() - sweepts;
if (m_minimizeMemoryFootprint)
{
m_rng.seed(randSeed);
m_randomizedframerefsWindow.clear();
m_currentRangeBeginChunkIdx = m_randomizedChunks[0][0].windowbegin;
m_currentRangeEndChunkIdx = m_currentRangeBeginChunkIdx;
@ -554,6 +566,7 @@ class minibatchutterancesourcemulti : public minibatchsource
}
else
{
srand(randSeed + 1);
if (m_randomizedframerefs.size() != totalFrames)
m_randomizedframerefs.resize(totalFrames);
@ -866,10 +879,11 @@ public:
// constructor
// Pass empty labels to denote unsupervised training (so getbatch() will not return uids).
// This mode requires utterances with time stamps.
minibatchutterancesourcemulti(const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels,
minibatchutterancesourcemulti(bool useMersenneTwister, const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels,
std::vector<size_t> vdim, std::vector<size_t> udim, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t randomizationrange,
const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, bool minimizeMemoryFootprint, std::vector<bool> expandToUtt)
: vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, minimizeMemoryFootprint), expandToUtt(expandToUtt)
const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, std::vector<bool> expandToUtt)
: vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, useMersenneTwister), expandToUtt(expandToUtt),
m_useMersenneTwister(useMersenneTwister)
// [v-hansu] change framemode (lattices.empty()) into framemode (false) to run utterance mode without lattice
// you also need to change another line, search : [v-hansu] comment out to run utterance mode without lattice
{
@ -1251,8 +1265,16 @@ private:
randomizedchunkrefs[i].push_back(allchunks[i].begin() + j);
assert(randomizedchunkrefs[i].size() == allchunks[i].size());
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep)
if (m_useMersenneTwister)
{
m_rng.seed((unsigned long)sweep);
Microsoft::MSR::CNTK::RandomShuffleMT(randomizedchunkrefs[i], m_rng); // bring into random order (with random seed depending on sweep)
}
else
{
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep)
}
}
// place them onto the global timeline -> randomizedchunks[]
@ -1348,7 +1370,7 @@ private:
// check we got those setup right
// we now randomly shuffle randomizedutterancerefs[pos], while considering the constraints of what chunk range needs to be in memory
srand((unsigned int) sweep + 1);
m_useMersenneTwister ? m_rng.seed((unsigned long)sweep) : srand((unsigned int)sweep + 1);
for (size_t i = 0; i < randomizedutterancerefs.size(); i++)
{
// get valid randomization range, expressed in chunks
@ -1364,7 +1386,9 @@ private:
for (;;)
{
// pick a random location
const size_t j = Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window
const size_t j = m_useMersenneTwister ?
Microsoft::MSR::CNTK::RandMT(posbegin, posend, m_rng) :
Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window
if (i == j)
break; // the random gods say "this one points to its original position"... nothing wrong about that, but better not try to swap
@ -1416,7 +1440,7 @@ private:
}
else // frame mode
{
m_frameRandomizer.reset((unsigned int)sweep + 1);
m_frameRandomizer.reset((unsigned int)sweep);
}
return sweep;

Просмотреть файл

@ -21,7 +21,7 @@ public:
ByteReader() = default;
virtual ~ByteReader() = default;
virtual void Register(size_t seqId, const std::string& path) = 0;
virtual void Register(const std::map<std::string, size_t>& sequences) = 0;
virtual cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) = 0;
DISABLE_COPY_AND_MOVE(ByteReader);
@ -30,7 +30,7 @@ public:
class FileByteReader : public ByteReader
{
public:
void Register(size_t, const std::string&) override {}
void Register(const std::map<std::string, size_t>&) override {}
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
};
@ -40,7 +40,7 @@ class ZipByteReader : public ByteReader
public:
ZipByteReader(const std::string& zipPath);
void Register(size_t seqId, const std::string& path) override;
void Register(const std::map<std::string, size_t>& sequences) override;
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
private:

Просмотреть файл

@ -13,6 +13,7 @@
#include "ImageConfigHelper.h"
#include "StringUtil.h"
#include "ConfigUtil.h"
#include "TimerUtility.h"
namespace Microsoft { namespace MSR { namespace CNTK {
@ -135,6 +136,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
}
string precision = (ConfigValue)config("precision", "float");
m_verbosity = config(L"verbosity", 0);
// Feature stream.
ConfigParameters featureSection = inputs(featureNames[0]);
@ -144,6 +146,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
features->m_storageType = StorageType::dense;
features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
m_streams.push_back(features);
m_featureElementType = features->m_elementType;
// Label stream.
ConfigParameters label = inputs(labelNames[0]);
@ -179,6 +182,8 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
const auto& label = m_streams[configHelper.GetLabelStreamId()];
const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
m_verbosity = config(L"verbosity", 0);
// Expect data in HWC.
ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
@ -240,9 +245,13 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
size_t curId = 0;
std::string line;
PathReaderMap knownReaders;
ReaderSequenceMap readerSequences;
ImageSequenceDescription description;
description.m_numberOfSamples = 1;
Timer timer;
timer.Start();
auto& stringRegistry = corpus->GetStringRegistry();
for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
{
@ -296,9 +305,20 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
m_keyToSequence[description.m_key.m_sequence] = m_imageSequences.size();
m_imageSequences.push_back(description);
RegisterByteReader(description.m_id, description.m_path, knownReaders);
RegisterByteReader(description.m_id, description.m_path, knownReaders, readerSequences);
}
}
for (auto& reader : knownReaders)
{
reader.second->Register(readerSequences[reader.first]);
}
timer.Stop();
if (m_verbosity > 1)
{
fprintf(stderr, "ImageDeserializer: Read information about %d images in %.6g seconds\n", (int)m_imageSequences.size(), timer.ElapsedSeconds());
}
}
ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
@ -307,7 +327,7 @@ ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
return std::make_shared<ImageChunk>(sequenceDescription, *this);
}
void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders)
void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences)
{
assert(!path.empty());
@ -330,16 +350,19 @@ void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string&
{
reader = std::make_shared<ZipByteReader>(containerPath);
knownReaders[containerPath] = reader;
readerSequences[containerPath] = std::map<std::string, size_t>();
}
else
{
reader = (*r).second;
}
reader->Register(seqId, itemPath);
readerSequences[containerPath][itemPath] = seqId;
m_readers[seqId] = reader;
#else
UNUSED(seqId);
UNUSED(knownReaders);
UNUSED(readerSequences);
RuntimeError("The code is built without zip container support. Only plain image files are supported.");
#endif
}

Просмотреть файл

@ -72,7 +72,8 @@ private:
// Not using nocase_compare here as it's not correct on Linux.
using PathReaderMap = std::unordered_map<std::string, std::shared_ptr<ByteReader>>;
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders);
using ReaderSequenceMap = std::map<std::string, std::map<std::string, size_t>>;
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences);
cv::Mat ReadImage(size_t seqId, const std::string& path, bool grayscale);
// REVIEW alexeyk: can potentially use vector instead of map. Need to handle default reader and resizing though.
@ -80,6 +81,7 @@ private:
SeqReaderMap m_readers;
FileByteReader m_defaultReader;
int m_verbosity;
};
}}}

Просмотреть файл

@ -44,16 +44,46 @@ ZipByteReader::ZipPtr ZipByteReader::OpenZip()
});
}
void ZipByteReader::Register(size_t seqId, const std::string& path)
void ZipByteReader::Register(const std::map<std::string, size_t>& sequences)
{
auto zipFile = m_zips.pop_or_create([this]() { return OpenZip(); });
zip_stat_t stat;
zip_stat_init(&stat);
int err = zip_stat(zipFile.get(), path.c_str(), 0, &stat);
if (ZIP_ER_OK != err)
RuntimeError("Failed to get file info of %s, zip library error: %s", path.c_str(), GetZipError(err).c_str());
m_seqIdToIndex[seqId] = std::make_pair(stat.index, stat.size);
size_t numberOfEntries = 0;
size_t numEntries = zip_get_num_entries(zipFile.get(), 0);
for (size_t i = 0; i < numEntries; ++i) {
int err = zip_stat_index(zipFile.get(), i, 0, &stat);
if (ZIP_ER_OK != err)
RuntimeError("Failed to get file info for index %d, zip library error: %s", (int)i, GetZipError(err).c_str());
auto sequenceId = sequences.find(std::string(stat.name));
if (sequenceId == sequences.end())
{
continue;
}
else
{
m_seqIdToIndex[sequenceId->second] = std::make_pair(stat.index, stat.size);
numberOfEntries++;
}
}
m_zips.push(std::move(zipFile));
if (numberOfEntries != sequences.size())
{
// Not all sequences have been found. Let's print them out and throw.
for (const auto& s : sequences)
{
auto index = m_seqIdToIndex.find(s.second);
if (index == m_seqIdToIndex.end())
{
fprintf(stderr, "Sequence %s is not found in container %s.\n", s.first.c_str(), m_zipPath.c_str());
}
}
RuntimeError("Cannot retrieve image data for some sequences. For more detail, please see the log file.");
}
}
cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscale)

Просмотреть файл

@ -22,17 +22,10 @@
== Preeliminaries ==
To build the cpu version, you have to install intel MKL blas library
or ACML library first. Note that ACML is free, whereas MKL may not be.
for MKL:
1. Download from https://software.intel.com/en-us/intel-mkl
for ACML:
1. Download from
http://developer.amd.com/tools-and-sdks/archive/amd-core-math-library-acml/acml-downloads-resources/
We have seen some problems with some versions of the library on Intel
processors, but have had success with acml-5-3-1-ifort-64bit.tgz
for Kaldi:
1. In kaldi-trunk/tools/Makefile, uncomment # OPENFST_VERSION = 1.4.1, and
re-install OpenFst using the makefile.
@ -54,8 +47,7 @@ build in the directory "build" type
(For an in source build, just run configure in the $CNTK directory).
You will see various options for configure, as well as their default
values. CNTK needs a CPU math directory, either acml or mkl. If you
do not specify one and both are available, acml will be used. For GPU
values. CNTK needs a CPU math library (mkl). For GPU
use, a cuda and gdk directory are also required. Similary, to build
the kaldi plugin a kaldi directory is required. You may also specify
whether you want a debug or release build, as well as add additional

Просмотреть файл

@ -171,6 +171,12 @@ public:
// setup all the state variables and state tables for state machine
void Init();
// convenience function for setting the flags
inline unsigned int SetSequenceFlags()
{
return (m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak;
}
// Parser destructor
~SequenceParser();
@ -334,8 +340,7 @@ public:
case EndOfLine:
if (seqPos)
{
SequencePosition sequencePos(numbers->size(), labels->size(),
(m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak);
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
// add a sequence element to the list
seqPos->push_back(sequencePos);
sequencePositionLast = sequencePos;
@ -429,8 +434,7 @@ public:
// this could probably be fixed by taking another pass through the loop above, but this is easier
if (seqPos)
{
SequencePosition sequencePos(numbers->size(), labels->size(),
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
// add the final sequence element if needed
if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos))
{
@ -510,6 +514,7 @@ public:
using SequenceParser<NumType, LabelType>::m_totalNumbersConverted;
using SequenceParser<NumType, LabelType>::m_dimLabelsOut;
using SequenceParser<NumType, LabelType>::m_bufferStart;
using SequenceParser<NumType, LabelType>::SetSequenceFlags;
LMSequenceParser()
{
mFile = nullptr;
@ -594,8 +599,7 @@ public:
labels->push_back(std::move(vstr[i])); // TODO: is this an entire sequence, or multiple columns describing a single token?
// add a sequence element to the list
SequencePosition sequencePos(numbers->size(), labels->size(),
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
seqPos->push_back(sequencePos);
lineCount++;

Просмотреть файл

@ -80,8 +80,8 @@ void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
#ifdef _DEBUG
size_t epochStartFrame = config.m_epochIndex * m_epochSize;
fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": frames [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), data subset %" PRIu64 " of %" PRIu64 "\n",
config.m_epochIndex,
fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": samples [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), worker rank %" PRIu64 ", total workers %" PRIu64 "\n",
config.m_epochIndex + 1,
epochStartFrame,
epochStartFrame + m_epochSize,
m_globalSamplePosition,
@ -107,7 +107,7 @@ void BlockRandomizer::PrepareNewSweepIfNeeded(size_t samplePosition)
m_chunkRandomizer->Randomize((unsigned int)m_sweep);
// Resetting sequence randomizer.
m_sequenceRandomizer->Reset(m_sweep + 1);
m_sequenceRandomizer->Reset(m_sweep);
m_lastSeenChunkId = CHUNKID_MAX;
}
}
@ -138,8 +138,8 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
if (m_verbosity >= Debug)
fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n",
sequences.size(),
decimated.size(),
sequences.size(),
sampleCount,
m_sweep);

Просмотреть файл

@ -10,25 +10,6 @@
namespace Microsoft { namespace MSR { namespace CNTK {
// NOTE: This is an old code, used for legacy randomization to make sure we preserve the same behavior for the tests.
// TODO: Deprecate when the new randomizer is in place.
template <typename TVector>
void RandomShuffle(TVector& v, size_t randomSeed)
{
if (v.size() > RAND_MAX * static_cast<size_t>(RAND_MAX))
{
RuntimeError("RandomShuffle: too large set: need to change to different random generator!");
}
srand(static_cast<unsigned int>(randomSeed));
foreach_index(currentLocation, v)
{
// Pick a random location a location and swap with current
const size_t randomLocation = rand(0, v.size());
std::swap(v[currentLocation], v[randomLocation]);
}
}
ChunkRandomizer::ChunkRandomizer(IDataDeserializerPtr deserializer, size_t randomizationRangeInSamples, bool legacy) :
m_deserializer(deserializer), m_legacy(legacy), m_randomizationRangeInSamples(randomizationRangeInSamples)
{
@ -52,15 +33,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
randomizedChunkIndices.push_back(i);
}
if (m_legacy)
{
RandomShuffle(randomizedChunkIndices, seed);
}
else
{
std::mt19937 m_rng(static_cast<int>(seed));
std::shuffle(randomizedChunkIndices.begin(), randomizedChunkIndices.end(), m_rng);
}
m_rng.seed(seed);
RandomShuffleMT(randomizedChunkIndices, m_rng);
// Place randomized chunks on the timeline
m_randomizedChunks.clear();

Просмотреть файл

@ -7,6 +7,7 @@
#include <vector>
#include "DataDeserializer.h"
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK {
@ -68,6 +69,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
bool m_legacy;
// Randomization range in samples.
size_t m_randomizationRangeInSamples;
std::mt19937_64 m_rng;
};
typedef std::shared_ptr<ChunkRandomizer> ChunkRandomizerPtr;

Просмотреть файл

@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
// Resets the current sweep according to the randomization seed provided.
void SequenceRandomizer::Reset(size_t randSeed)
{
srand((unsigned int)randSeed);
m_rng.seed((unsigned long)randSeed);
m_sequenceWindow.clear();
m_chunkWindow.clear();
@ -197,7 +197,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
for (;;)
{
// Pick a sequence position from [posBegin, posEnd)
const size_t j = rand(posBegin, posEnd);
const size_t j = RandMT(posBegin, posEnd, m_rng);
// Pick up j sequence.
ChunkIdType jChunkIndex = GetChunkIndexForSequencePosition(j);

Просмотреть файл

@ -11,6 +11,7 @@
#include "DataDeserializer.h"
#include "ChunkRandomizer.h"
#include <deque>
#include <random>
namespace Microsoft { namespace MSR { namespace CNTK {
@ -164,6 +165,8 @@ private:
// General configuration
int m_verbosity;
std::mt19937_64 m_rng;
};
typedef std::shared_ptr<SequenceRandomizer> SequenceRandomizerPtr;

Просмотреть файл

@ -40,30 +40,10 @@ template SGD<double>::SGD(const ScriptableObjects::IConfigRecord&);
// -----------------------------------------------------------------------
template <class ElemType>
void SGD<ElemType>::Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId,
void SGD<ElemType>::Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader,
const bool makeMode)
IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint)
{
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
int startEpoch = DetermineStartEpoch(makeMode);
if (startEpoch == m_maxEpochs)
{
LOGPRINTF(stderr, "No further training is necessary.\n");
return;
}
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
bool loadNetworkFromCheckpoint = startEpoch >= 0;
fprintf(stderr, "\n");
if (loadNetworkFromCheckpoint)
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
else
LOGPRINTF(stderr, "Creating virgin network.\n");
// create or load from checkpoint
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
// log the device we are computing on
LOGPRINTF(stderr, "%s model with %d nodes", loadNetworkFromCheckpoint ? "Loaded" : "Created", (int)net->GetTotalNumberOfNodes());
if (net->GetDeviceId() < 0)

Просмотреть файл

@ -110,6 +110,8 @@ struct SGDParams : public ScriptableObjects::Object
// SGDParams(SGDParams&&) = default; // (does not compile in VS 2013; not critical)
size_t GetMaxEpochs() { return m_maxEpochs; }
protected:
// learning rate per sample provided outside
floatargvector m_learningRatesParam;
@ -342,10 +344,9 @@ public:
m_parallelizationMethod = ParallelizationMethod::none;
}
void Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId,
void Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader,
const bool makeMode = true);
IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint);
void Adapt(wstring origModelFileName, wstring refNodeName,
IDataReader* trainSetDataReader,
IDataReader* validationSetDataReader,
@ -483,6 +484,10 @@ public:
const double L1RegWeight,
const bool needAveMultiplier,
const bool useNesterovMomentum);
// return -1 if nothing exists
int DetermineStartEpoch(const bool makeMode);
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
protected:
// UpdateWeights - update the weights in
@ -517,10 +522,6 @@ protected:
/*out*/ size_t& minibatchSize);
wstring GetCheckPointFileNameForEpoch(const int epoch);
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
// return -1 if nothing exists
int DetermineStartEpoch(const bool makeMode);
GradientsUpdateType GradUpdateType() const
{

Просмотреть файл

@ -180,7 +180,7 @@ public:
m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) {
DistGradHeader::Destroy(ptr);
});
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false, m_traceLevel);
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/);
}
m_gradHeader->numEvalNode = evalNodes.size();

Просмотреть файл

@ -21,6 +21,7 @@ mkdir $DataDir
cp -R $DataSourceDir/MNIST/v0/Train-28x28_cntk_text.txt $DataDir || exit $?
cp -R $DataSourceDir/CIFAR/v0/cifar-10-batches-py $DataDir || exit $?
cp -R $TEST_DIR/../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt $DataDir || exit $?
cp -R $TEST_DIR/../../Text/SequenceClassification/Data/Train.ctf $DataDir || exit $?
pushd $DataDir

Просмотреть файл

@ -272,8 +272,8 @@ Post-processing network...
4 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
errTop1 = ClassificationError()
err = ErrorPrediction()
errTop1 = ErrorPrediction()
ol.z = Plus()
Validating network. 17 nodes to process in pass 1.
@ -292,9 +292,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2.
@ -314,8 +314,8 @@ Post-processing network complete.
05/13/2016 15:10:02: Evaluation criterion node(s):
05/13/2016 15:10:02: errTop1 = ClassificationError
05/13/2016 15:10:02: err = ClassificationError
05/13/2016 15:10:02: errTop1 = ErrorPrediction
05/13/2016 15:10:02: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -390,8 +390,8 @@ Post-processing network...
4 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
errTop1 = ClassificationError()
err = ErrorPrediction()
errTop1 = ErrorPrediction()
ol.z = Plus()
Validating network. 17 nodes to process in pass 1.
@ -410,9 +410,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2.

Просмотреть файл

@ -270,8 +270,8 @@ Post-processing network...
4 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
errTop1 = ClassificationError()
err = ErrorPrediction()
errTop1 = ErrorPrediction()
ol.z = Plus()
Validating network. 17 nodes to process in pass 1.
@ -290,9 +290,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2.
@ -312,8 +312,8 @@ Post-processing network complete.
05/13/2016 08:15:53: Evaluation criterion node(s):
05/13/2016 08:15:53: errTop1 = ClassificationError
05/13/2016 08:15:53: err = ClassificationError
05/13/2016 08:15:53: errTop1 = ErrorPrediction
05/13/2016 08:15:53: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -388,8 +388,8 @@ Post-processing network...
4 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
errTop1 = ClassificationError()
err = ErrorPrediction()
errTop1 = ErrorPrediction()
ol.z = Plus()
Validating network. 17 nodes to process in pass 1.
@ -408,9 +408,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
Validating network. 9 nodes to process in pass 2.

Просмотреть файл

@ -284,7 +284,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 27 nodes to process in pass 1.
@ -315,7 +315,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2.
@ -343,7 +343,7 @@ Post-processing network complete.
05/13/2016 15:10:11: Evaluation criterion node(s):
05/13/2016 15:10:11: err = ClassificationError
05/13/2016 15:10:11: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -429,7 +429,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 27 nodes to process in pass 1.
@ -460,7 +460,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -282,7 +282,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 27 nodes to process in pass 1.
@ -313,7 +313,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2.
@ -341,7 +341,7 @@ Post-processing network complete.
05/13/2016 08:16:18: Evaluation criterion node(s):
05/13/2016 08:16:18: err = ClassificationError
05/13/2016 08:16:18: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -427,7 +427,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 27 nodes to process in pass 1.
@ -458,7 +458,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -287,7 +287,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 36 nodes to process in pass 1.
@ -329,7 +329,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2.
@ -363,7 +363,7 @@ Post-processing network complete.
05/13/2016 15:10:29: Evaluation criterion node(s):
05/13/2016 15:10:29: err = ClassificationError
05/13/2016 15:10:29: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -462,7 +462,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 36 nodes to process in pass 1.
@ -502,7 +502,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -285,7 +285,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 36 nodes to process in pass 1.
@ -327,7 +327,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 16 nodes to process in pass 2.
@ -361,7 +361,7 @@ Post-processing network complete.
05/13/2016 08:16:58: Evaluation criterion node(s):
05/13/2016 08:16:58: err = ClassificationError
05/13/2016 08:16:58: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -460,7 +460,7 @@ Post-processing network...
3 roots:
ce = CrossEntropyWithSoftmax()
err = ClassificationError()
err = ErrorPrediction()
ol.z = Plus()
Validating network. 36 nodes to process in pass 1.
@ -500,7 +500,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 16 nodes to process in pass 2.

Просмотреть файл

@ -1,49 +1,62 @@
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
-------------------------------------------------------------------
Build info:
Built time: May 13 2016 14:50:25
Last modified date: Thu May 12 14:00:37 2016
Built time: Aug 16 2016 09:41:56
Last modified date: Fri Aug 12 07:32:43 2016
Build type: release
Build target: GPU
With 1bit-SGD: no
Math lib: acml
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
Built by philly on d8dc82703b0f
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on f67b30a647de
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
05/13/2016 15:10:47: Redirecting stderr to file -_Train_Test.log
05/13/2016 15:10:47: -------------------------------------------------------------------
05/13/2016 15:10:47: Build info:
Changed current directory to /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
08/16/2016 10:50:36: Redirecting stderr to file -_Train_Test.log
08/16/2016 10:50:36: -------------------------------------------------------------------
08/16/2016 10:50:36: Build info:
05/13/2016 15:10:47: Built time: May 13 2016 14:50:25
05/13/2016 15:10:47: Last modified date: Thu May 12 14:00:37 2016
05/13/2016 15:10:47: Build type: release
05/13/2016 15:10:47: Build target: GPU
05/13/2016 15:10:47: With 1bit-SGD: no
05/13/2016 15:10:47: Math lib: acml
05/13/2016 15:10:47: CUDA_PATH: /usr/local/cuda-7.5
05/13/2016 15:10:47: CUB_PATH: /usr/local/cub-1.4.1
05/13/2016 15:10:47: CUDNN_PATH: /usr/local/cudnn-4.0
05/13/2016 15:10:47: Build Branch: HEAD
05/13/2016 15:10:47: Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
05/13/2016 15:10:47: Built by philly on d8dc82703b0f
05/13/2016 15:10:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
05/13/2016 15:10:47: -------------------------------------------------------------------
08/16/2016 10:50:36: Built time: Aug 16 2016 09:41:56
08/16/2016 10:50:36: Last modified date: Fri Aug 12 07:32:43 2016
08/16/2016 10:50:36: Build type: release
08/16/2016 10:50:36: Build target: GPU
08/16/2016 10:50:36: With 1bit-SGD: no
08/16/2016 10:50:36: Math lib: mkl
08/16/2016 10:50:36: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:50:36: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:50:36: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:50:36: Build Branch: HEAD
08/16/2016 10:50:36: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:50:36: Built by philly on f67b30a647de
08/16/2016 10:50:36: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:50:36: -------------------------------------------------------------------
08/16/2016 10:50:37: -------------------------------------------------------------------
08/16/2016 10:50:37: GPU info:
05/13/2016 15:10:47: Running on localhost at 2016/05/13 15:10:47
05/13/2016 15:10:47: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
08/16/2016 10:50:37: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:50:37: -------------------------------------------------------------------
08/16/2016 10:50:37: Running on localhost at 2016/08/16 10:50:37
08/16/2016 10:50:37: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/13/2016 15:10:47: RootDir = "."
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:50:37: RootDir = "."
ConfigDir = "$RootDir$"
DataDir = "$RootDir$"
OutputDir = "$RootDir$/Output"
@ -53,7 +66,6 @@ precision = "float"
deviceId = 0
imageLayout = "cudnn"
initOnCPUOnly=true
prefetch = "true"
command = Train:Test
modelPath = "$ModelDir$/01_Convolution"
stderr = "$OutputDir$/01_Conv"
@ -86,7 +98,7 @@ Train = [
format = "dense"
]
]
]
]
]
Test = [
action = "test"
@ -104,42 +116,41 @@ Test = [
format = "dense"
]
]
]
]
]
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DeviceId=0
timestamping=true
Train=[SGD=[maxEpochs=10]]
Train=[SGD=[epochSize=100]]
stderr=-
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/13/2016 15:10:47: RootDir = "."
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:50:37: RootDir = "."
ConfigDir = "."
DataDir = "."
OutputDir = "./Output"
ModelDir = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models"
ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl"
ModelDir = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models"
ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl"
precision = "float"
deviceId = 0
imageLayout = "cudnn"
initOnCPUOnly=true
prefetch = "true"
command = Train:Test
modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution"
stderr = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv"
modelPath = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution"
stderr = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv"
traceLevel = 1
numMBsToShowResult = 500
Train = [
action = "train"
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
]
SGD = [
epochSize = 49984
@ -152,7 +163,7 @@ Train = [
]
reader = [
readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
input = [
features = [
dim = 3072
@ -163,14 +174,14 @@ Train = [
format = "dense"
]
]
]
]
]
Test = [
action = "test"
minibatchSize = 16
reader = [
readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
input = [
features = [
dim = 3072
@ -181,45 +192,44 @@ Test = [
format = "dense"
]
]
]
]
]
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
DeviceId=0
timestamping=true
Train=[SGD=[maxEpochs=10]]
Train=[SGD=[epochSize=100]]
stderr=-
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: 01_Conv.cntk:command=Train:Test
configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
configparameters: 01_Conv.cntk:deviceId=0
configparameters: 01_Conv.cntk:imageLayout=cudnn
configparameters: 01_Conv.cntk:initOnCPUOnly=true
configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models
configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl
configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models
configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl
configparameters: 01_Conv.cntk:numMBsToShowResult=500
configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:precision=float
configparameters: 01_Conv.cntk:prefetch=true
configparameters: 01_Conv.cntk:RootDir=.
configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
configparameters: 01_Conv.cntk:stderr=-
configparameters: 01_Conv.cntk:Test=[
action = "test"
minibatchSize = 16
reader = [
readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
input = [
features = [
dim = 3072
@ -230,7 +240,7 @@ configparameters: 01_Conv.cntk:Test=[
format = "dense"
]
]
]
]
]
configparameters: 01_Conv.cntk:timestamping=true
@ -238,7 +248,7 @@ configparameters: 01_Conv.cntk:traceLevel=1
configparameters: 01_Conv.cntk:Train=[
action = "train"
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
]
SGD = [
epochSize = 49984
@ -251,7 +261,7 @@ configparameters: 01_Conv.cntk:Train=[
]
reader = [
readerType = "CNTKTextFormatReader"
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
input = [
features = [
dim = 3072
@ -262,33 +272,57 @@ configparameters: 01_Conv.cntk:Train=[
format = "dense"
]
]
]
]
] [SGD=[maxEpochs=10]] [SGD=[epochSize=100]]
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/13/2016 15:10:47: Commands: Train Test
05/13/2016 15:10:47: Precision = "float"
05/13/2016 15:10:47: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
05/13/2016 15:10:47: CNTKCommandTrainInfo: Train : 10
05/13/2016 15:10:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:50:37: Commands: Train Test
08/16/2016 10:50:37: Precision = "float"
08/16/2016 10:50:37: CNTKModelPath: /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
08/16/2016 10:50:37: CNTKCommandTrainInfo: Train : 10
08/16/2016 10:50:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10
05/13/2016 15:10:47: ##############################################################################
05/13/2016 15:10:47: # #
05/13/2016 15:10:47: # Action "train" #
05/13/2016 15:10:47: # #
05/13/2016 15:10:47: ##############################################################################
08/16/2016 10:50:37: ##############################################################################
08/16/2016 10:50:37: # #
08/16/2016 10:50:37: # Action "train" #
08/16/2016 10:50:37: # #
08/16/2016 10:50:37: ##############################################################################
05/13/2016 15:10:47: CNTKCommandTrainBegin: Train
08/16/2016 10:50:37: CNTKCommandTrainBegin: Train
NDLBuilder Using GPU 0
05/13/2016 15:10:47: Creating virgin network.
08/16/2016 10:50:37: Creating virgin network.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 0.000000.
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- 0.000000.
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- 0.000000.
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- 0.000000.
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- 0.000000.
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- 0.000000.
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- gaussian(seed=1, range=0.023094*0.004300, onCPU=false).
SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- gaussian(seed=2, range=0.007071*1.414000, onCPU=false).
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- gaussian(seed=3, range=0.007071*1.414000, onCPU=false).
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- gaussian(seed=4, range=0.008333*12.000000, onCPU=false).
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- gaussian(seed=5, range=0.025000*1.500000, onCPU=false).
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1.
@ -326,7 +360,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 21 nodes to process in pass 2.
@ -334,165 +368,183 @@ Validating network. 21 nodes to process in pass 2.
Validating network, final pass.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
13 out of 34 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
05/13/2016 15:10:48: Created model with 34 nodes on GPU 0.
08/16/2016 10:50:38: Created model with 34 nodes on GPU 0.
05/13/2016 15:10:48: Training criterion node(s):
05/13/2016 15:10:48: CE = CrossEntropyWithSoftmax
08/16/2016 10:50:38: Training criterion node(s):
08/16/2016 10:50:38: CE = CrossEntropyWithSoftmax
05/13/2016 15:10:48: Evaluation criterion node(s):
05/13/2016 15:10:48: Err = ClassificationError
08/16/2016 10:50:38: Evaluation criterion node(s):
08/16/2016 10:50:38: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
Memory Sharing: Out of 63 matrices, 38 are shared as 17, and 25 are not shared.
(nil): {[Err Gradient[1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *]] [features Gradient[32 x 32 x 3 x *]] [labels Gradient[10 x *]] }
0x2485d28: {[OutputNodes.z Value[10 x 1 x *]] }
0x2485ee8: {[CE Value[1]] }
0x2486168: {[conv1_act.W Gradient[32 x 75]] [conv1_act.p Value[32 x 32 x 32 x *]] }
0x2486328: {[conv1_act.c Gradient[32 x 32 x 32 x *]] [conv1_act.y Value[32 x 32 x 32 x *]] }
0x24864e8: {[conv1_act.p Gradient[32 x 32 x 32 x *]] [pool1 Value[15 x 15 x 32 x *]] }
0x249a638: {[features Value[32 x 32 x 3 x *]] }
0x2975298: {[conv1_act.b Value[1 x 1 x 32]] }
0x2976b48: {[conv2_act.W Value[32 x 800]] }
0x2977ae8: {[conv2_act.b Value[1 x 1 x 32]] }
0x2979668: {[conv3_act.W Value[64 x 800]] }
0x2979f08: {[conv3_act.b Value[1 x 1 x 64]] }
0x297bae8: {[h1.W Value[64 x 3 x 3 x 64]] }
0x297c538: {[h1.b Value[64 x 1]] }
0x297d5c8: {[OutputNodes.W Value[10 x 64]] }
0x297ea98: {[OutputNodes.b Value[10]] }
0x2dd1458: {[featOffs Value[1 x 1]] }
0x2dd2678: {[labels Value[10 x *]] }
0x2dd2eb8: {[conv1_act.W Value[32 x 75]] }
0x7a59dd8: {[Err Value[1]] }
0x7a5d378: {[featScaled Value[32 x 32 x 3 x *]] }
0x7a5d6d8: {[conv1_act.c Value[32 x 32 x 32 x *]] }
0x7a5e478: {[conv2_act.c Value[15 x 15 x 32 x *]] }
0x7a5e638: {[conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.y Gradient[32 x 32 x 32 x *]] }
0x7a5e7f8: {[conv2_act.W Gradient[32 x 800]] [conv2_act.p Value[15 x 15 x 32 x *]] }
0x7a7ade8: {[conv2_act.c Gradient[15 x 15 x 32 x *]] [conv2_act.y Value[15 x 15 x 32 x *]] }
0x7a7afa8: {[conv2_act.p Gradient[15 x 15 x 32 x *]] [pool1 Gradient[15 x 15 x 32 x *]] [pool2 Value[7 x 7 x 32 x *]] }
0x7a7b168: {[conv3_act.c Value[7 x 7 x 64 x *]] }
0x7a7b328: {[conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.y Gradient[15 x 15 x 32 x *]] }
0x7a7b4e8: {[conv3_act.W Gradient[64 x 800]] [conv3_act.p Value[7 x 7 x 64 x *]] }
0x7a7b6a8: {[conv3_act.c Gradient[7 x 7 x 64 x *]] [conv3_act.y Value[7 x 7 x 64 x *]] }
0x7a7b868: {[conv3_act.p Gradient[7 x 7 x 64 x *]] [pool2 Gradient[7 x 7 x 32 x *]] [pool3 Value[3 x 3 x 64 x *]] }
0x7a7ba28: {[conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.y Gradient[7 x 7 x 64 x *]] [h1.t Value[64 x *]] }
0x7a7bbe8: {[h1.W Gradient[64 x 3 x 3 x 64]] [h1.z Value[64 x 1 x *]] }
0x7a7bda8: {[h1.t Gradient[64 x *]] [h1.y Value[64 x 1 x *]] }
0x7a7bf68: {[h1_d Value[64 x 1 x *]] }
0x7a7c128: {[h1.z Gradient[64 x 1 x *]] [pool3 Gradient[3 x 3 x 64 x *]] }
0x7a7c2e8: {[OutputNodes.t Value[10 x 1 x *]] [h1.b Gradient[64 x 1]] [h1.y Gradient[64 x 1 x *]] }
0x7a7cdc8: {[CE Gradient[1]] }
0x7a7cf88: {[OutputNodes.W Gradient[10 x 64]] [OutputNodes.z Gradient[10 x 1 x *]] }
0x7a7d148: {[OutputNodes.t Gradient[10 x 1 x *]] }
0x7a7d308: {[OutputNodes.b Gradient[10]] }
0x7a7d4c8: {[h1_d Gradient[64 x 1 x *]] }
{ conv1_act.W : [32 x 75] (gradient)
conv1_act.p : [32 x 32 x 32 x *] }
{ conv1_act.c : [32 x 32 x 32 x *] (gradient)
conv1_act.y : [32 x 32 x 32 x *] }
{ conv1_act.p : [32 x 32 x 32 x *] (gradient)
pool1 : [15 x 15 x 32 x *] }
{ conv1_act.b : [1 x 1 x 32] (gradient)
conv1_act.y : [32 x 32 x 32 x *] (gradient) }
{ conv2_act.W : [32 x 800] (gradient)
conv2_act.p : [15 x 15 x 32 x *] }
{ conv2_act.c : [15 x 15 x 32 x *] (gradient)
conv2_act.y : [15 x 15 x 32 x *] }
{ conv2_act.p : [15 x 15 x 32 x *] (gradient)
pool1 : [15 x 15 x 32 x *] (gradient)
pool2 : [7 x 7 x 32 x *] }
{ conv2_act.b : [1 x 1 x 32] (gradient)
conv2_act.y : [15 x 15 x 32 x *] (gradient) }
{ conv3_act.W : [64 x 800] (gradient)
conv3_act.p : [7 x 7 x 64 x *] }
{ conv3_act.c : [7 x 7 x 64 x *] (gradient)
conv3_act.y : [7 x 7 x 64 x *] }
{ conv3_act.p : [7 x 7 x 64 x *] (gradient)
pool2 : [7 x 7 x 32 x *] (gradient)
pool3 : [3 x 3 x 64 x *] }
{ conv3_act.b : [1 x 1 x 64] (gradient)
conv3_act.y : [7 x 7 x 64 x *] (gradient)
h1.t : [64 x *] }
{ h1.W : [64 x 3 x 3 x 64] (gradient)
h1.z : [64 x 1 x *] }
{ h1.t : [64 x *] (gradient)
h1.y : [64 x 1 x *] }
{ h1.z : [64 x 1 x *] (gradient)
pool3 : [3 x 3 x 64 x *] (gradient) }
{ OutputNodes.t : [10 x 1 x *]
h1.b : [64 x 1] (gradient)
h1.y : [64 x 1 x *] (gradient) }
{ OutputNodes.W : [10 x 64] (gradient)
OutputNodes.z : [10 x 1 x *] (gradient) }
05/13/2016 15:10:48: No PreCompute nodes found, skipping PreCompute step.
05/13/2016 15:10:48: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:38: Training 116906 parameters in 10 out of 10 parameter tensors and 29 nodes with gradient:
05/13/2016 15:10:48: Starting minibatch loop.
05/13/2016 15:10:51: Finished Epoch[ 1 of 10]: [Training] CE = 2.30242050 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.55904s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1'
08/16/2016 10:50:38: Node 'OutputNodes.W' (LearnableParameter operation) : [10 x 64]
08/16/2016 10:50:38: Node 'OutputNodes.b' (LearnableParameter operation) : [10]
08/16/2016 10:50:38: Node 'conv1_act.W' (LearnableParameter operation) : [32 x 75]
08/16/2016 10:50:38: Node 'conv1_act.b' (LearnableParameter operation) : [1 x 1 x 32]
08/16/2016 10:50:38: Node 'conv2_act.W' (LearnableParameter operation) : [32 x 800]
08/16/2016 10:50:38: Node 'conv2_act.b' (LearnableParameter operation) : [1 x 1 x 32]
08/16/2016 10:50:38: Node 'conv3_act.W' (LearnableParameter operation) : [64 x 800]
08/16/2016 10:50:38: Node 'conv3_act.b' (LearnableParameter operation) : [1 x 1 x 64]
08/16/2016 10:50:38: Node 'h1.W' (LearnableParameter operation) : [64 x 3 x 3 x 64]
08/16/2016 10:50:38: Node 'h1.b' (LearnableParameter operation) : [64 x 1]
05/13/2016 15:10:51: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:38: No PreCompute nodes found, or all already computed. Skipping pre-computation step.
05/13/2016 15:10:51: Starting minibatch loop.
05/13/2016 15:10:51: Finished Epoch[ 2 of 10]: [Training] CE = 2.30175842 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.011903s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
08/16/2016 10:50:38: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 0: frames [0..100] (first sequence at sample 0), data subset 0 of 1
05/13/2016 15:10:51: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:38: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 1 of 10]: [Training] CE = 2.30223602 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.51082s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1'
05/13/2016 15:10:51: Starting minibatch loop.
05/13/2016 15:10:51: Finished Epoch[ 3 of 10]: [Training] CE = 2.30054413 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012701s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
08/16/2016 10:50:41: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 1: frames [100..200] (first sequence at sample 100), data subset 0 of 1
05/13/2016 15:10:51: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 2 of 10]: [Training] CE = 2.30189240 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.012555s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
05/13/2016 15:10:51: Starting minibatch loop.
05/13/2016 15:10:51: Finished Epoch[ 4 of 10]: [Training] CE = 2.30022812 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.01144s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
08/16/2016 10:50:41: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 2: frames [200..300] (first sequence at sample 200), data subset 0 of 1
05/13/2016 15:10:51: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 3 of 10]: [Training] CE = 2.29965256 * 100; Err = 0.86000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012394s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
05/13/2016 15:10:51: Starting minibatch loop.
05/13/2016 15:10:51: Finished Epoch[ 5 of 10]: [Training] CE = 2.29579636 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.011529s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
08/16/2016 10:50:41: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 3: frames [300..400] (first sequence at sample 300), data subset 0 of 1
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 4 of 10]: [Training] CE = 2.29966064 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.0124s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
08/16/2016 10:50:41: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 4: frames [400..500] (first sequence at sample 400), data subset 0 of 1
08/16/2016 10:50:41: Starting minibatch loop.
08/16/2016 10:50:41: Finished Epoch[ 5 of 10]: [Training] CE = 2.30450394 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.012302s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
Setting dropout rate to 0.5.
05/13/2016 15:10:51: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 5: frames [500..600] (first sequence at sample 500), data subset 0 of 1
05/13/2016 15:10:51: Starting minibatch loop.
08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 5
05/13/2016 15:10:51: Finished Epoch[ 6 of 10]: [Training] CE = 2.30121231 * 100; Err = 0.84000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012276s
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6'
08/16/2016 10:50:41: Finished Epoch[ 6 of 10]: [Training] CE = 2.29013916 * 100; Err = 0.81000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012412s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6'
05/13/2016 15:10:51: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 6: frames [600..700] (first sequence at sample 600), data subset 0 of 1
05/13/2016 15:10:51: Starting minibatch loop.
08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 6
05/13/2016 15:10:52: Finished Epoch[ 7 of 10]: [Training] CE = 2.28975647 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.011495s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7'
08/16/2016 10:50:41: Finished Epoch[ 7 of 10]: [Training] CE = 2.29815765 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.012303s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7'
05/13/2016 15:10:52: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 7: frames [700..800] (first sequence at sample 700), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop.
08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 7
05/13/2016 15:10:52: Finished Epoch[ 8 of 10]: [Training] CE = 2.29035095 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012157s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8'
08/16/2016 10:50:41: Finished Epoch[ 8 of 10]: [Training] CE = 2.28805603 * 100; Err = 0.89000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012517s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8'
05/13/2016 15:10:52: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 8: frames [800..900] (first sequence at sample 800), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop.
08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 8
05/13/2016 15:10:52: Finished Epoch[ 9 of 10]: [Training] CE = 2.29797729 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.011451s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9'
08/16/2016 10:50:41: Finished Epoch[ 9 of 10]: [Training] CE = 2.29380524 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.012463s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9'
05/13/2016 15:10:52: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
08/16/2016 10:50:41: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
BlockRandomizer::StartEpoch: epoch 9: frames [900..1000] (first sequence at sample 900), data subset 0 of 1
05/13/2016 15:10:52: Starting minibatch loop.
08/16/2016 10:50:41: Starting minibatch loop.
(GPU): creating curand object with seed 9
05/13/2016 15:10:52: Finished Epoch[10 of 10]: [Training] CE = 2.29764435 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012689s
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution'
05/13/2016 15:10:52: CNTKCommandTrainEnd: Train
08/16/2016 10:50:41: Finished Epoch[10 of 10]: [Training] CE = 2.27814423 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012432s
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution'
08/16/2016 10:50:41: CNTKCommandTrainEnd: Train
05/13/2016 15:10:52: Action "train" complete.
08/16/2016 10:50:41: Action "train" complete.
05/13/2016 15:10:52: ##############################################################################
05/13/2016 15:10:52: # #
05/13/2016 15:10:52: # Action "test" #
05/13/2016 15:10:52: # #
05/13/2016 15:10:52: ##############################################################################
08/16/2016 10:50:41: ##############################################################################
08/16/2016 10:50:41: # #
08/16/2016 10:50:41: # Action "test" #
08/16/2016 10:50:41: # #
08/16/2016 10:50:41: ##############################################################################
Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1.
@ -530,7 +582,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 21 nodes to process in pass 2.
@ -538,17 +590,17 @@ Validating network. 21 nodes to process in pass 2.
Validating network, final pass.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
13 out of 34 nodes do not share the minibatch layout with the input data.
@ -560,46 +612,14 @@ evalNodeNames are not specified, using all the default evalnodes and training cr
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
Memory Sharing: Out of 34 matrices, 0 are shared as 0, and 34 are not shared.
(nil): {[CE Gradient[1]] [Err Gradient[1]] [OutputNodes.W Gradient[10 x 64]] [OutputNodes.b Gradient[10]] [OutputNodes.t Gradient[10 x 1 x *1]] [OutputNodes.z Gradient[10 x 1 x *1]] [conv1_act.W Gradient[32 x 75]] [conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.c Gradient[32 x 32 x 32 x *1]] [conv1_act.p Gradient[32 x 32 x 32 x *1]] [conv1_act.y Gradient[32 x 32 x 32 x *1]] [conv2_act.W Gradient[32 x 800]] [conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.c Gradient[15 x 15 x 32 x *1]] [conv2_act.p Gradient[15 x 15 x 32 x *1]] [conv2_act.y Gradient[15 x 15 x 32 x *1]] [conv3_act.W Gradient[64 x 800]] [conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.c Gradient[7 x 7 x 64 x *1]] [conv3_act.p Gradient[7 x 7 x 64 x *1]] [conv3_act.y Gradient[7 x 7 x 64 x *1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *1]] [features Gradient[32 x 32 x 3 x *1]] [h1.W Gradient[64 x 3 x 3 x 64]] [h1.b Gradient[64 x 1]] [h1.t Gradient[64 x *1]] [h1.y Gradient[64 x 1 x *1]] [h1.z Gradient[64 x 1 x *1]] [h1_d Gradient[64 x 1 x *1]] [labels Gradient[10 x *1]] [pool1 Gradient[15 x 15 x 32 x *1]] [pool2 Gradient[7 x 7 x 32 x *1]] [pool3 Gradient[3 x 3 x 64 x *1]] }
0x7fc883e04ba8: {[conv1_act.b Value[1 x 1 x 32]] }
0x7fc883e05fc8: {[conv1_act.W Value[32 x 75]] }
0x7fc883e06768: {[conv2_act.b Value[1 x 1 x 32]] }
0x7fc883e06928: {[conv2_act.W Value[32 x 800]] }
0x7fc883e085b8: {[conv3_act.b Value[1 x 1 x 64]] }
0x7fc883e09528: {[conv3_act.W Value[64 x 800]] }
0x7fc883e0b568: {[featOffs Value[1 x 1]] }
0x7fc883e0c1e8: {[features Value[32 x 32 x 3 x *1]] }
0x7fc883e0cc38: {[h1.b Value[64 x 1]] }
0x7fc883e0cf08: {[h1.W Value[64 x 3 x 3 x 64]] }
0x7fc883e0eb48: {[labels Value[10 x *1]] }
0x7fc883e0f558: {[OutputNodes.b Value[10]] }
0x7fc883e10068: {[OutputNodes.W Value[10 x 64]] }
0x7fc883e286b8: {[Err Value[1]] }
0x7fc883e2bd28: {[CE Value[1]] }
0x7fc883e2bfa8: {[conv1_act.y Value[32 x 32 x 32 x *1]] }
0x7fc883e54728: {[conv1_act.c Value[32 x 32 x 32 x *1]] }
0x7fc883e54a88: {[featScaled Value[32 x 32 x 3 x *1]] }
0x7fc883e54c18: {[conv1_act.p Value[32 x 32 x 32 x *1]] }
0x7fc883e71a78: {[pool1 Value[15 x 15 x 32 x *1]] }
0x7fc883e71c38: {[conv2_act.c Value[15 x 15 x 32 x *1]] }
0x7fc883e71fb8: {[conv2_act.p Value[15 x 15 x 32 x *1]] }
0x7fc883e72178: {[conv2_act.y Value[15 x 15 x 32 x *1]] }
0x7fc883e72338: {[pool2 Value[7 x 7 x 32 x *1]] }
0x7fc883e724f8: {[conv3_act.c Value[7 x 7 x 64 x *1]] }
0x7fc883e72878: {[conv3_act.p Value[7 x 7 x 64 x *1]] }
0x7fc883e72a38: {[conv3_act.y Value[7 x 7 x 64 x *1]] }
0x7fc883e72bf8: {[pool3 Value[3 x 3 x 64 x *1]] }
0x7fc883e72db8: {[h1.t Value[64 x *1]] }
0x7fc883e72f78: {[h1.z Value[64 x 1 x *1]] }
0x7fc883e73138: {[h1.y Value[64 x 1 x *1]] }
0x7fc883e732f8: {[h1_d Value[64 x 1 x *1]] }
0x7fc883e73678: {[OutputNodes.t Value[10 x 1 x *1]] }
0x7fc883e73838: {[OutputNodes.z Value[10 x 1 x *1]] }
05/13/2016 15:10:58: Final Results: Minibatch[1-625]: Err = 0.86430000 * 10000; CE = 2.28476029 * 10000; perplexity = 9.82333117
BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sample 0), data subset 0 of 1
08/16/2016 10:50:43: Minibatch[1-500]: Err = 0.86125000 * 8000; CE = 2.28389484 * 8000
08/16/2016 10:50:43: Minibatch[501-625]: Err = 0.86350000 * 2000; CE = 2.28027481 * 2000
08/16/2016 10:50:43: Final Results: Minibatch[1-625]: Err = 0.86170000 * 10000; CE = 2.28317084 * 10000; perplexity = 9.80772986
05/13/2016 15:10:58: Action "test" complete.
08/16/2016 10:50:43: Action "test" complete.
05/13/2016 15:10:58: __COMPLETED__
08/16/2016 10:50:43: __COMPLETED__

Просмотреть файл

@ -286,7 +286,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1.
@ -324,7 +324,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
Validating network. 21 nodes to process in pass 2.
@ -356,7 +356,7 @@ Post-processing network complete.
05/13/2016 08:17:53: Evaluation criterion node(s):
05/13/2016 08:17:53: Err = ClassificationError
05/13/2016 08:17:53: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -490,7 +490,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 34 nodes to process in pass 1.
@ -528,7 +528,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
Validating network. 21 nodes to process in pass 2.

Просмотреть файл

@ -291,7 +291,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1.
@ -340,7 +340,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 20 nodes to process in pass 2.
@ -380,7 +380,7 @@ Post-processing network complete.
05/13/2016 15:10:59: Evaluation criterion node(s):
05/13/2016 15:10:59: Err = ClassificationError
05/13/2016 15:10:59: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -491,7 +491,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1.
@ -540,7 +540,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 20 nodes to process in pass 2.

Просмотреть файл

@ -289,7 +289,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1.
@ -338,7 +338,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 20 nodes to process in pass 2.
@ -378,7 +378,7 @@ Post-processing network complete.
05/13/2016 08:18:26: Evaluation criterion node(s):
05/13/2016 08:18:26: Err = ClassificationError
05/13/2016 08:18:26: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -489,7 +489,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 45 nodes to process in pass 1.
@ -538,7 +538,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 20 nodes to process in pass 2.

Просмотреть файл

@ -356,7 +356,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1.
@ -546,7 +546,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 75 nodes to process in pass 2.
@ -652,7 +652,7 @@ Post-processing network complete.
05/03/2016 18:13:08: Evaluation criterion node(s):
05/03/2016 18:13:08: Err = ClassificationError
05/03/2016 18:13:08: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -907,7 +907,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1.
@ -1095,7 +1095,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 75 nodes to process in pass 2.

Просмотреть файл

@ -354,7 +354,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1.
@ -544,7 +544,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 75 nodes to process in pass 2.
@ -650,7 +650,7 @@ Post-processing network complete.
05/03/2016 14:04:12: Evaluation criterion node(s):
05/03/2016 14:04:12: Err = ClassificationError
05/03/2016 14:04:12: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -905,7 +905,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 184 nodes to process in pass 1.
@ -1093,7 +1093,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 75 nodes to process in pass 2.

Просмотреть файл

@ -356,7 +356,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1.
@ -1311,7 +1311,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 390 nodes to process in pass 2.
@ -1777,7 +1777,7 @@ Post-processing network complete.
05/03/2016 18:17:55: Evaluation criterion node(s):
05/03/2016 18:17:55: Err = ClassificationError
05/03/2016 18:17:55: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -2932,7 +2932,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1.
@ -3885,7 +3885,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 390 nodes to process in pass 2.

Просмотреть файл

@ -354,7 +354,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1.
@ -1309,7 +1309,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 390 nodes to process in pass 2.
@ -1775,7 +1775,7 @@ Post-processing network complete.
05/03/2016 14:05:00: Evaluation criterion node(s):
05/03/2016 14:05:00: Err = ClassificationError
05/03/2016 14:05:00: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -2930,7 +2930,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 949 nodes to process in pass 1.
@ -3883,7 +3883,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 390 nodes to process in pass 2.

Просмотреть файл

@ -282,7 +282,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1.
@ -318,7 +318,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 19 nodes to process in pass 2.
@ -350,7 +350,7 @@ Post-processing network complete.
05/13/2016 15:11:11: Evaluation criterion node(s):
05/13/2016 15:11:11: Err = ClassificationError
05/13/2016 15:11:11: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -446,7 +446,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1.
@ -482,7 +482,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 19 nodes to process in pass 2.

Просмотреть файл

@ -280,7 +280,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1.
@ -316,7 +316,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
Validating network. 19 nodes to process in pass 2.
@ -348,7 +348,7 @@ Post-processing network complete.
05/13/2016 08:19:02: Evaluation criterion node(s):
05/13/2016 08:19:02: Err = ClassificationError
05/13/2016 08:19:02: Err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
@ -444,7 +444,7 @@ Post-processing network...
3 roots:
CE = CrossEntropyWithSoftmax()
Err = ClassificationError()
Err = ErrorPrediction()
OutputNodes.z = Plus()
Validating network. 32 nodes to process in pass 1.
@ -480,7 +480,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
Validating network. 19 nodes to process in pass 2.

Просмотреть файл

@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -370,7 +370,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -399,7 +399,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -423,14 +423,14 @@ Post-processing network complete.
05/03/2016 15:21:43: Evaluation criterion node(s):
05/03/2016 15:21:43: EvalClassificationError = ClassificationError
05/03/2016 15:21:43: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x1abc7c8: {[InvStdOfFeatures Value[2]] }
0x1b40348: {[features Value[2 x *]] }
0x1b408b8: {[MeanOfFeatures Value[2]] }
@ -443,7 +443,7 @@ Memory Sharing Structure:
0x1b46708: {[labels Value[2 x *]] }
0x1b473e8: {[Prior Value[2]] }
0x1b4b138: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x1b4cc28: {[EvalClassificationError Value[1]] }
0x1b4cc28: {[EvalErrorPrediction Value[1]] }
0x1b4cea8: {[CrossEntropyWithSoftmax Value[1]] }
0x1b4d388: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x1b4d548: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
@ -473,139 +473,139 @@ Memory Sharing Structure:
05/03/2016 15:21:44: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:44: Starting minibatch loop.
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7
05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7
05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s
05/03/2016 15:21:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
05/03/2016 15:21:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7
05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7
05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s
05/03/2016 15:21:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
05/03/2016 15:21:51: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0
05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalClassificationError = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0
05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalErrorPrediction = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s
05/03/2016 15:21:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
05/03/2016 15:21:54: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -623,7 +623,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -652,7 +652,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -676,7 +676,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1abbf28: {[B0 Value[50 x 1]] }
0x1b47908: {[W1 Value[50 x 50]] }
0x1b48278: {[W2 Value[2 x 50]] }
@ -688,7 +688,7 @@ Memory Sharing Structure:
0x1b50cd8: {[Prior Value[2]] }
0x1b514f8: {[W0 Value[50 x 2]] }
0x1b53938: {[B1 Value[50 x 1]] }
0x1c0fd98: {[EvalClassificationError Value[1]] }
0x1c0fd98: {[EvalErrorPrediction Value[1]] }
0x1c0fef8: {[CrossEntropyWithSoftmax Value[1]] }
0x1c10438: {[LogOfPrior Value[2]] }
0x1c11f48: {[MVNormalizedFeatures Value[2 x *1]] }
@ -701,7 +701,7 @@ Memory Sharing Structure:
0x1c12d78: {[W2*H1 Value[2 x 1 x *1]] }
0x1c12f38: {[HLast Value[2 x 1 x *1]] }
05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005
05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005
05/03/2016 15:21:55: Action "test" complete.

Просмотреть файл

@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -371,7 +371,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -400,7 +400,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -424,14 +424,14 @@ Post-processing network complete.
05/03/2016 15:21:55: Evaluation criterion node(s):
05/03/2016 15:21:55: EvalClassificationError = ClassificationError
05/03/2016 15:21:55: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x12a62e8: {[features Value[2 x *]] }
0x20202b8: {[MeanOfFeatures Value[2]] }
0x20207c8: {[InvStdOfFeatures Value[2]] }
@ -444,7 +444,7 @@ Memory Sharing Structure:
0x278ae18: {[Prior Value[2]] }
0x278c158: {[LogOfPrior Value[2]] }
0x27908f8: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x2790a18: {[EvalClassificationError Value[1]] }
0x2790a18: {[EvalErrorPrediction Value[1]] }
0x2790d18: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x2790e78: {[CrossEntropyWithSoftmax Value[1]] }
0x27966e8: {[B0 Value[50 x 1]] }
@ -474,139 +474,139 @@ Memory Sharing Structure:
05/03/2016 15:21:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop.
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
05/03/2016 15:21:56: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
05/03/2016 15:21:56: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalClassificationError = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalErrorPrediction = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
05/03/2016 15:21:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -624,7 +624,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -653,7 +653,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -677,7 +677,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1222268: {[InvStdOfFeatures Value[2]] }
0x1223258: {[W2 Value[2 x 50]] }
0x12a56c8: {[B0 Value[50 x 1]] }
@ -697,12 +697,12 @@ Memory Sharing Structure:
0x2adcc08: {[W0*features Value[50 x *1]] }
0x2add0a8: {[W0 Value[50 x 2]] }
0x2ae0518: {[W1 Value[50 x 50]] }
0x68bf228: {[EvalClassificationError Value[1]] }
0x68bf228: {[EvalErrorPrediction Value[1]] }
0x68bf388: {[CrossEntropyWithSoftmax Value[1]] }
0x68bf988: {[LogOfPrior Value[2]] }
0x68d0438: {[features Value[2 x *1]] }
05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240
05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240
05/03/2016 15:21:57: Action "test" complete.

Просмотреть файл

@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -368,7 +368,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -397,7 +397,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -421,14 +421,14 @@ Post-processing network complete.
05/03/2016 15:29:48: Evaluation criterion node(s):
05/03/2016 15:29:48: EvalClassificationError = ClassificationError
05/03/2016 15:29:48: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000CDDFBEECA0: {[features Value[2 x *]] }
000000CDDFC7B170: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
000000CDDFC7B490: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
@ -438,7 +438,7 @@ Memory Sharing Structure:
000000CDDFC7B990: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
000000CDDFC7BC10: {[LogOfPrior Value[2]] }
000000CDDFC7BCB0: {[MVNormalizedFeatures Value[2 x *]] }
000000CDDFC7BD50: {[EvalClassificationError Value[1]] }
000000CDDFC7BD50: {[EvalErrorPrediction Value[1]] }
000000CDDFC7BDF0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
000000CDDFC7BF30: {[ScaledLogLikelihood Value[2 x 1 x *]] }
000000CDDFC7C070: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
@ -471,139 +471,139 @@ Memory Sharing Structure:
05/03/2016 15:29:48: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:48: Starting minibatch loop.
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8
05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8
05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s
05/03/2016 15:29:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
05/03/2016 15:29:49: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9
05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalClassificationError = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s
05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9
05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalErrorPrediction = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s
05/03/2016 15:29:50: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
05/03/2016 15:29:50: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0
05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0
05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s
05/03/2016 15:29:51: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
05/03/2016 15:29:51: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -621,7 +621,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -650,7 +650,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -674,7 +674,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
000000CDDFC7B490: {[W0 Value[50 x 2]] }
000000CDDFC7B530: {[features Value[2 x *1]] }
000000CDDFC7B710: {[W1 Value[50 x 50]] }
@ -690,7 +690,7 @@ Memory Sharing Structure:
000000CDDFC8C2B0: {[W1*H1+B1 Value[50 x 1 x *1]] }
000000CDDFC8C490: {[CrossEntropyWithSoftmax Value[1]] }
000000CDDFC8C5D0: {[LogOfPrior Value[2]] }
000000CDDFC8C670: {[EvalClassificationError Value[1]] }
000000CDDFC8C670: {[EvalErrorPrediction Value[1]] }
000000CDDFC8C990: {[MVNormalizedFeatures Value[2 x *1]] }
000000CDDFC8CA30: {[H2 Value[50 x 1 x *1]] }
000000CDDFC8CC10: {[W1*H1 Value[50 x 1 x *1]] }
@ -699,7 +699,7 @@ Memory Sharing Structure:
000000CDDFC8D610: {[HLast Value[2 x 1 x *1]] }
000000CDDFC8D750: {[W0*features+B0 Value[50 x 1 x *1]] }
05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868
05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868
05/03/2016 15:29:52: Action "test" complete.

Просмотреть файл

@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -369,7 +369,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -398,7 +398,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -422,14 +422,14 @@ Post-processing network complete.
05/03/2016 15:29:53: Evaluation criterion node(s):
05/03/2016 15:29:53: EvalClassificationError = ClassificationError
05/03/2016 15:29:53: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000572B66ECA0: {[features Value[2 x *]] }
00000057420A1700: {[W1 Value[50 x 50]] }
00000057420A1980: {[MeanOfFeatures Value[2]] }
@ -448,7 +448,7 @@ Memory Sharing Structure:
00000057439283E0: {[LogOfPrior Value[2]] }
00000057439285C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
0000005743928660: {[B1 Gradient[50 x 1]] [H2 Gradient[50 x 1 x *]] [HLast Gradient[2 x 1 x *]] }
00000057439287A0: {[EvalClassificationError Value[1]] }
00000057439287A0: {[EvalErrorPrediction Value[1]] }
0000005743928980: {[CrossEntropyWithSoftmax Value[1]] }
0000005743928A20: {[B2 Gradient[2 x 1]] }
0000005743928E80: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
@ -472,139 +472,139 @@ Memory Sharing Structure:
05/03/2016 15:29:54: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:54: Starting minibatch loop.
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3
05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3
05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s
05/03/2016 15:29:54: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
05/03/2016 15:29:54: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalClassificationError = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7
05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalClassificationError = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalErrorPrediction = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7
05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalErrorPrediction = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s
05/03/2016 15:29:55: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
05/03/2016 15:29:55: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:29:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4
05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalClassificationError = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4
05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalErrorPrediction = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s
05/03/2016 15:29:56: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
05/03/2016 15:29:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
@ -622,7 +622,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -651,7 +651,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -675,7 +675,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000005743925BB0: {[HLast Value[2 x 1 x *1]] }
0000005743925D90: {[MVNormalizedFeatures Value[2 x *1]] }
0000005743925E30: {[CrossEntropyWithSoftmax Value[1]] }
@ -688,7 +688,7 @@ Memory Sharing Structure:
00000057439265B0: {[W0*features+B0 Value[50 x 1 x *1]] }
0000005743926650: {[W1*H1 Value[50 x 1 x *1]] }
0000005743926970: {[H2 Value[50 x 1 x *1]] }
0000005743926AB0: {[EvalClassificationError Value[1]] }
0000005743926AB0: {[EvalErrorPrediction Value[1]] }
000000574B7FAD10: {[W0 Value[50 x 2]] }
000000574B7FB170: {[InvStdOfFeatures Value[2]] }
000000574B7FB210: {[MeanOfFeatures Value[2]] }
@ -700,7 +700,7 @@ Memory Sharing Structure:
000000574D960E50: {[B2 Value[2 x 1]] }
000000574D9610D0: {[B0 Value[50 x 1]] }
05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529
05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529
05/03/2016 15:29:56: Action "test" complete.

Просмотреть файл

@ -58,7 +58,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -157,7 +157,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -355,7 +355,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -384,7 +384,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -408,14 +408,14 @@ Post-processing network complete.
05/03/2016 15:21:15: Evaluation criterion node(s):
05/03/2016 15:21:15: EvalClassificationError = ClassificationError
05/03/2016 15:21:15: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x2e7f338: {[features Value[2 x *]] }
0x2e82908: {[MeanOfFeatures Value[2]] }
0x2e84f08: {[InvStdOfFeatures Value[2]] }
@ -427,7 +427,7 @@ Memory Sharing Structure:
0x2e8b718: {[B2 Value[2 x 1]] }
0x2e8c1e8: {[labels Value[2 x *]] }
0x2e8cf38: {[Prior Value[2]] }
0x2e926f8: {[EvalClassificationError Value[1]] }
0x2e926f8: {[EvalErrorPrediction Value[1]] }
0x2e92858: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x2e929b8: {[CrossEntropyWithSoftmax Value[1]] }
0x2e93218: {[LogOfPrior Value[2]] }
@ -458,139 +458,139 @@ Memory Sharing Structure:
05/03/2016 15:21:17: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:17: Starting minibatch loop.
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4
05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4
05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s
05/03/2016 15:21:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.1'
05/03/2016 15:21:20: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:20: Starting minibatch loop.
05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0
05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalClassificationError = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s
05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0
05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalErrorPrediction = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s
05/03/2016 15:21:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.2'
05/03/2016 15:21:23: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:23: Starting minibatch loop.
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalClassificationError = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4
05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalClassificationError = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalErrorPrediction = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4
05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalErrorPrediction = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s
05/03/2016 15:21:26: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn'
05/03/2016 15:21:26: CNTKCommandTrainEnd: Simple_Demo_Train
@ -608,7 +608,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -637,7 +637,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -661,7 +661,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x2e83eb8: {[W2 Value[2 x 50]] }
0x2e87ac8: {[MVNormalizedFeatures Value[2 x *1]] }
0x2e87e78: {[W0*features Value[50 x *1]] }
@ -676,7 +676,7 @@ Memory Sharing Structure:
0x2e8d298: {[B2 Value[2 x 1]] }
0x2e8f2c8: {[labels Value[2 x *1]] }
0x2e8f8e8: {[MeanOfFeatures Value[2]] }
0x2e91598: {[EvalClassificationError Value[1]] }
0x2e91598: {[EvalErrorPrediction Value[1]] }
0x2e916f8: {[CrossEntropyWithSoftmax Value[1]] }
0x2e91bb8: {[LogOfPrior Value[2]] }
0x2e93758: {[B0 Value[50 x 1]] }
@ -686,7 +686,7 @@ Memory Sharing Structure:
0x2e985f8: {[W1 Value[50 x 50]] }
0x2e99178: {[features Value[2 x *1]] }
05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032
05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032
05/03/2016 15:21:26: Action "test" complete.
@ -702,7 +702,7 @@ Post-processing network...
8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -732,7 +732,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -755,7 +755,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0x2e82858: {[PosteriorProb Value[2 x 1 x *2]] }
0x2e83b58: {[labels Value[2 x *2]] }
0x2e84318: {[MeanOfFeatures Value[2]] }

Просмотреть файл

@ -58,7 +58,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -157,7 +157,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -356,7 +356,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -385,7 +385,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -409,14 +409,14 @@ Post-processing network complete.
05/03/2016 15:21:27: Evaluation criterion node(s):
05/03/2016 15:21:27: EvalClassificationError = ClassificationError
05/03/2016 15:21:27: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0x1ef9338: {[features Value[2 x *]] }
0x2b32ad8: {[MeanOfFeatures Value[2]] }
0x2b32fe8: {[InvStdOfFeatures Value[2]] }
@ -429,7 +429,7 @@ Memory Sharing Structure:
0x3185898: {[Prior Value[2]] }
0x3186bd8: {[LogOfPrior Value[2]] }
0x318b378: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
0x318b498: {[EvalClassificationError Value[1]] }
0x318b498: {[EvalErrorPrediction Value[1]] }
0x318b798: {[ScaledLogLikelihood Value[2 x 1 x *]] }
0x318b8f8: {[CrossEntropyWithSoftmax Value[1]] }
0x3191148: {[B0 Value[50 x 1]] }
@ -459,139 +459,139 @@ Memory Sharing Structure:
05/03/2016 15:21:28: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.1'
05/03/2016 15:21:28: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5
05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalClassificationError = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5
05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalErrorPrediction = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.2'
05/03/2016 15:21:28: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 15:21:28: Starting minibatch loop.
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalClassificationError = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalErrorPrediction = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn'
05/03/2016 15:21:29: CNTKCommandTrainEnd: Simple_Demo_Train
@ -609,7 +609,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -638,7 +638,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -662,11 +662,11 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0x1efcc08: {[B2 Value[2 x 1]] }
0x1efd8c8: {[W0 Value[50 x 2]] }
0x1efee68: {[InvStdOfFeatures Value[2]] }
0x2b337e8: {[EvalClassificationError Value[1]] }
0x2b337e8: {[EvalErrorPrediction Value[1]] }
0x2b33948: {[CrossEntropyWithSoftmax Value[1]] }
0x2b33f08: {[LogOfPrior Value[2]] }
0x31808e8: {[W2 Value[2 x 50]] }
@ -687,7 +687,7 @@ Memory Sharing Structure:
0x7273058: {[W2*H1 Value[2 x 1 x *1]] }
0x7273218: {[HLast Value[2 x 1 x *1]] }
05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946
05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946
05/03/2016 15:21:29: Action "test" complete.
@ -703,7 +703,7 @@ Post-processing network...
8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -733,7 +733,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -756,7 +756,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0x1efcef8: {[features Value[2 x *2]] }
0x1efe2c8: {[labels Value[2 x *2]] }
0x1eff188: {[PosteriorProb Value[2 x 1 x *2]] }

Просмотреть файл

@ -56,7 +56,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -155,7 +155,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -353,7 +353,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -382,7 +382,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -406,14 +406,14 @@ Post-processing network complete.
05/03/2016 13:12:46: Evaluation criterion node(s):
05/03/2016 13:12:46: EvalClassificationError = ClassificationError
05/03/2016 13:12:46: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000702B410E90: {[features Value[2 x *]] }
000000702B44E0C0: {[W0 Value[50 x 2]] }
000000702B4D76F0: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
@ -428,7 +428,7 @@ Memory Sharing Structure:
000000702B4D8690: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
000000702B4D8730: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
000000702B4D89B0: {[CrossEntropyWithSoftmax Value[1]] }
000000702B4D8AF0: {[EvalClassificationError Value[1]] }
000000702B4D8AF0: {[EvalErrorPrediction Value[1]] }
000000702B4D8B90: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
000000702B4D8F50: {[B2 Gradient[2 x 1]] }
000000702B4D91D0: {[ScaledLogLikelihood Value[2 x 1 x *]] }
@ -456,139 +456,139 @@ Memory Sharing Structure:
05/03/2016 13:12:47: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:47: Starting minibatch loop.
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0
05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0
05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.1'
05/03/2016 13:12:48: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:48: Starting minibatch loop.
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7
05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalClassificationError = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7
05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalErrorPrediction = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.2'
05/03/2016 13:12:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:12:48: Starting minibatch loop.
05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1
05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s
05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1
05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s
05/03/2016 13:12:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn'
05/03/2016 13:12:49: CNTKCommandTrainEnd: Simple_Demo_Train
@ -606,7 +606,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -635,7 +635,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -659,7 +659,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
00000070343C5200: {[InvStdOfFeatures Value[2]] }
00000070343C5340: {[Prior Value[2]] }
00000070343C53E0: {[W0 Value[50 x 2]] }
@ -671,7 +671,7 @@ Memory Sharing Structure:
000000703442D030: {[HLast Value[2 x 1 x *1]] }
000000703442D0D0: {[W0*features Value[50 x *1]] }
000000703442D170: {[W1*H1+B1 Value[50 x 1 x *1]] }
000000703442D2B0: {[EvalClassificationError Value[1]] }
000000703442D2B0: {[EvalErrorPrediction Value[1]] }
000000703442D530: {[CrossEntropyWithSoftmax Value[1]] }
000000703442D5D0: {[W2 Value[2 x 50]] }
000000703442D670: {[LogOfPrior Value[2]] }
@ -684,7 +684,7 @@ Memory Sharing Structure:
0000007034432340: {[B0 Value[50 x 1]] }
0000007034432480: {[B2 Value[2 x 1]] }
05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515
05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515
05/03/2016 13:12:50: Action "test" complete.
@ -700,7 +700,7 @@ Post-processing network...
8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -730,7 +730,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -753,7 +753,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
000000702E3275E0: {[H2 Value[50 x 1 x *2]] }
000000702E327680: {[W2*H1 Value[2 x 1 x *2]] }
000000702E3277C0: {[LogOfPrior Value[2]] }

Просмотреть файл

@ -56,7 +56,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -155,7 +155,7 @@ Simple_Demo_Train = [
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
SimpleNetworkBuilder = [
layerSizes = 2:50*2:2
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
initValueScale = 1.0
applyMeanVarNorm = true
@ -354,7 +354,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -383,7 +383,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -407,14 +407,14 @@ Post-processing network complete.
05/03/2016 13:01:59: Evaluation criterion node(s):
05/03/2016 13:01:59: EvalClassificationError = ClassificationError
05/03/2016 13:01:59: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
000000501A590FF0: {[W2 Value[2 x 50]] }
000000501A591090: {[W0 Value[50 x 2]] }
000000501A5919F0: {[B1 Value[50 x 1]] }
@ -427,7 +427,7 @@ Memory Sharing Structure:
000000501A5A1180: {[ScaledLogLikelihood Value[2 x 1 x *]] }
000000501A5A1220: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
000000501A5A17C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
000000501A5A1900: {[EvalClassificationError Value[1]] }
000000501A5A1900: {[EvalErrorPrediction Value[1]] }
000000501A5A19A0: {[W0*features Value[50 x *]] }
000000501A5A1A40: {[W2*H1 Gradient[2 x 1 x *]] }
000000501A5A1F40: {[MVNormalizedFeatures Value[2 x *]] }
@ -457,139 +457,139 @@ Memory Sharing Structure:
05/03/2016 13:01:59: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:01:59: Starting minibatch loop.
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7
05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7
05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.1'
05/03/2016 13:02:00: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:02:00: Starting minibatch loop.
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5
05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalClassificationError = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5
05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalErrorPrediction = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.2'
05/03/2016 13:02:00: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
05/03/2016 13:02:00: Starting minibatch loop.
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7
05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalClassificationError = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7
05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalErrorPrediction = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s
05/03/2016 13:02:01: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn'
05/03/2016 13:02:01: CNTKCommandTrainEnd: Simple_Demo_Train
@ -607,7 +607,7 @@ Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -636,7 +636,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -660,7 +660,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
000000501A591090: {[W0*features+B0 Value[50 x 1 x *1]] }
000000501A591130: {[W1*H1 Value[50 x 1 x *1]] }
000000501A5916D0: {[W1*H1+B1 Value[50 x 1 x *1]] }
@ -672,7 +672,7 @@ Memory Sharing Structure:
000000501A592850: {[LogOfPrior Value[2]] }
000000501A5928F0: {[H2 Value[50 x 1 x *1]] }
000000501A592B70: {[W2 Value[2 x 50]] }
000000501A592D50: {[EvalClassificationError Value[1]] }
000000501A592D50: {[EvalErrorPrediction Value[1]] }
000000501A592DF0: {[CrossEntropyWithSoftmax Value[1]] }
0000005024E60C70: {[W1 Value[50 x 50]] }
0000005024E613F0: {[W0 Value[50 x 2]] }
@ -685,7 +685,7 @@ Memory Sharing Structure:
0000005024E62430: {[features Value[2 x *1]] }
0000005024E624D0: {[B1 Value[50 x 1]] }
05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526
05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526
05/03/2016 13:02:01: Action "test" complete.
@ -701,7 +701,7 @@ Post-processing network...
8 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -731,7 +731,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
Validating --> B2 = LearnableParameter() : -> [2 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
@ -754,7 +754,7 @@ Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
000000501A5914F0: {[InvStdOfFeatures Value[2]] }
000000501A591590: {[MeanOfFeatures Value[2]] }
000000501A5916D0: {[labels Value[2 x *2]] }

Просмотреть файл

@ -0,0 +1,434 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
MPIWrapper: initializing MPI
ping [requestnodes (before change)]: 1 nodes pinging each other
ping [requestnodes (before change)]: all 1 nodes responded
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
ping [requestnodes (after change)]: 1 nodes pinging each other
ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
08/16/2016 10:01:41: -------------------------------------------------------------------
08/16/2016 10:01:41: Build info:
08/16/2016 10:01:41: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:41: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:41: Build type: release
08/16/2016 10:01:41: Build target: GPU
08/16/2016 10:01:41: With 1bit-SGD: yes
08/16/2016 10:01:41: Math lib: mkl
08/16/2016 10:01:41: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:41: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:41: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:41: Build Branch: HEAD
08/16/2016 10:01:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:41: Built by philly on 643085f7f8c2
08/16/2016 10:01:41: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:41: -------------------------------------------------------------------
08/16/2016 10:01:42: -------------------------------------------------------------------
08/16/2016 10:01:42: GPU info:
08/16/2016 10:01:42: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:42: -------------------------------------------------------------------
08/16/2016 10:01:42: Running on localhost at 2016/08/16 10:01:42
08/16/2016 10:01:42: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:42: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:42: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:deviceId=-1
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:42: Commands: speechTrain
08/16/2016 10:01:42: Precision = "float"
08/16/2016 10:01:42: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
08/16/2016 10:01:42: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:42: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:42: ##############################################################################
08/16/2016 10:01:42: # #
08/16/2016 10:01:42: # Action "train" #
08/16/2016 10:01:42: # #
08/16/2016 10:01:42: ##############################################################################
08/16/2016 10:01:42: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using CPU
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
08/16/2016 10:01:42: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
Prior = Mean()
ScaledLogLikelihood = Minus()
Validating network. 25 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W2 = LearnableParameter() : -> [132 x 512]
Validating --> W1 = LearnableParameter() : -> [512 x 512]
Validating --> W0 = LearnableParameter() : -> [512 x 363]
Validating --> features = InputValue() : -> [363 x *]
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
Validating --> B0 = LearnableParameter() : -> [512 x 1]
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
Validating --> B1 = LearnableParameter() : -> [512 x 1]
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 17 nodes to process in pass 2.
Validating network, final pass.
12 out of 25 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:42: Created model with 25 nodes on CPU.
08/16/2016 10:01:42: Training criterion node(s):
08/16/2016 10:01:42: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 10:01:42: Evaluation criterion node(s):
08/16/2016 10:01:42: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
08/16/2016 10:01:42: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
08/16/2016 10:01:42: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:42: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:42: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:42: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 10:01:42: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 10:01:42: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 10:01:42: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:42: MeanOfFeatures = Mean()
08/16/2016 10:01:42: InvStdOfFeatures = InvStdDev()
08/16/2016 10:01:42: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:43: Precomputing --> Completed.
08/16/2016 10:01:43: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 10:01:44: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.45117986 * 2048; EvalErrorPrediction = 0.92187500 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.209966s
08/16/2016 10:01:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
08/16/2016 10:01:44: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:44: Action "train" complete.
08/16/2016 10:01:44: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,435 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
MPIWrapper: initializing MPI
ping [requestnodes (before change)]: 1 nodes pinging each other
ping [requestnodes (before change)]: all 1 nodes responded
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
ping [requestnodes (after change)]: 1 nodes pinging each other
ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
08/16/2016 10:01:45: -------------------------------------------------------------------
08/16/2016 10:01:45: Build info:
08/16/2016 10:01:45: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:45: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:45: Build type: release
08/16/2016 10:01:45: Build target: GPU
08/16/2016 10:01:45: With 1bit-SGD: yes
08/16/2016 10:01:45: Math lib: mkl
08/16/2016 10:01:45: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:45: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:45: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:45: Build Branch: HEAD
08/16/2016 10:01:45: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:45: Built by philly on 643085f7f8c2
08/16/2016 10:01:45: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:45: -------------------------------------------------------------------
08/16/2016 10:01:46: -------------------------------------------------------------------
08/16/2016 10:01:46: GPU info:
08/16/2016 10:01:46: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:46: -------------------------------------------------------------------
08/16/2016 10:01:46: Running on localhost at 2016/08/16 10:01:46
08/16/2016 10:01:46: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:46: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:46: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: FeedForward.cntk:deviceId=0
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
]
SGD = [
epochSize = 20480
minibatchSize = 256:1024:2048
learningRatesPerMB = 1.0:0.5:0.1
numMBsToShowResult = 10
momentumPerMB = 0.9:0.656119
maxEpochs = 3
keepCheckPointFiles = true
parallelTrain = [
parallelizationMethod = "DataParallelSGD"
distributedMBReading = true
dataParallelSGD = [
gradientBits = 1
]
]
autoAdjust=[
autoAdjustMinibatch = true
minibatchSizeTuningFrequency = 1
minibatchSearchCriterionErrorMargin = 2
]
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:46: Commands: speechTrain
08/16/2016 10:01:46: Precision = "float"
08/16/2016 10:01:46: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
08/16/2016 10:01:46: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:46: ##############################################################################
08/16/2016 10:01:46: # #
08/16/2016 10:01:46: # Action "train" #
08/16/2016 10:01:46: # #
08/16/2016 10:01:46: ##############################################################################
08/16/2016 10:01:46: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using GPU 0
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
08/16/2016 10:01:46: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
Prior = Mean()
ScaledLogLikelihood = Minus()
Validating network. 25 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W2 = LearnableParameter() : -> [132 x 512]
Validating --> W1 = LearnableParameter() : -> [512 x 512]
Validating --> W0 = LearnableParameter() : -> [512 x 363]
Validating --> features = InputValue() : -> [363 x *]
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
Validating --> B0 = LearnableParameter() : -> [512 x 1]
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
Validating --> B1 = LearnableParameter() : -> [512 x 1]
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 17 nodes to process in pass 2.
Validating network, final pass.
12 out of 25 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:46: Created model with 25 nodes on GPU 0.
08/16/2016 10:01:46: Training criterion node(s):
08/16/2016 10:01:46: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 10:01:46: Evaluation criterion node(s):
08/16/2016 10:01:46: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
08/16/2016 10:01:46: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
08/16/2016 10:01:46: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:46: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 10:01:46: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:46: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 10:01:46: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 10:01:46: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 10:01:46: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:46: MeanOfFeatures = Mean()
08/16/2016 10:01:46: InvStdOfFeatures = InvStdDev()
08/16/2016 10:01:46: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:46: Precomputing --> Completed.
08/16/2016 10:01:46: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 10:01:46: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.023072s
08/16/2016 10:01:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
08/16/2016 10:01:46: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:46: Action "train" complete.
08/16/2016 10:01:46: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1,18 +1,24 @@
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: May 3 2016 13:15:46
Last modified date: Tue Apr 26 23:35:31 2016
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: no
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
Built by svcphil on cntk-muc01
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
05/03/2016 13:22:22: -------------------------------------------------------------------
05/03/2016 13:22:22: Build info:
08/16/2016 03:20:10: -------------------------------------------------------------------
08/16/2016 03:20:10: Build info:
05/03/2016 13:22:22: Built time: May 3 2016 13:15:46
05/03/2016 13:22:22: Last modified date: Tue Apr 26 23:35:31 2016
05/03/2016 13:22:22: Build type: Release
05/03/2016 13:22:22: Build target: GPU
05/03/2016 13:22:22: With 1bit-SGD: no
05/03/2016 13:22:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
05/03/2016 13:22:22: CUB_PATH: c:\src\cub-1.4.1
05/03/2016 13:22:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
05/03/2016 13:22:22: Build Branch: HEAD
05/03/2016 13:22:22: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
05/03/2016 13:22:22: Built by svcphil on cntk-muc01
05/03/2016 13:22:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
05/03/2016 13:22:22: -------------------------------------------------------------------
08/16/2016 03:20:10: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:10: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:10: Build type: Release
08/16/2016 03:20:10: Build target: GPU
08/16/2016 03:20:10: With 1bit-SGD: yes
08/16/2016 03:20:10: Math lib: mkl
08/16/2016 03:20:10: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:10: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:10: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:10: Build Branch: HEAD
08/16/2016 03:20:10: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:10: Built by svcphil on Philly-Pool1
08/16/2016 03:20:10: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:10: -------------------------------------------------------------------
08/16/2016 03:20:12: -------------------------------------------------------------------
08/16/2016 03:20:12: GPU info:
05/03/2016 13:22:22: Running on DPHAIM-22 at 2016/05/03 13:22:22
05/03/2016 13:22:22: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
08/16/2016 03:20:12: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:12: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:12: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:12: -------------------------------------------------------------------
08/16/2016 03:20:12: Running on DPHAIM-25 at 2016/08/16 03:20:12
08/16/2016 03:20:12: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:22: RootDir = ".."
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:12: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
@ -65,7 +79,7 @@ speechTrain = [
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -111,35 +125,36 @@ speechTrain = [
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:22: RootDir = ".."
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:12: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -185,36 +200,37 @@ speechTrain = [
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:deviceId=-1
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:22: Commands: speechTrain
05/03/2016 13:22:22: Precision = "float"
05/03/2016 13:22:22: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
05/03/2016 13:22:22: CNTKCommandTrainInfo: speechTrain : 1
05/03/2016 13:22:22: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:12: Commands: speechTrain
08/16/2016 03:20:12: Precision = "float"
08/16/2016 03:20:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
08/16/2016 03:20:12: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
05/03/2016 13:22:22: ##############################################################################
05/03/2016 13:22:22: # #
05/03/2016 13:22:22: # Action "train" #
05/03/2016 13:22:22: # #
05/03/2016 13:22:22: ##############################################################################
08/16/2016 03:20:12: ##############################################################################
08/16/2016 03:20:12: # #
08/16/2016 03:20:12: # Action "train" #
08/16/2016 03:20:12: # #
08/16/2016 03:20:12: ##############################################################################
05/03/2016 13:22:22: CNTKCommandTrainBegin: speechTrain
08/16/2016 03:20:12: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using CPU
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
@ -284,13 +300,25 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
05/03/2016 13:22:23: Creating virgin network.
08/16/2016 03:20:12: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -319,7 +347,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
@ -336,70 +364,70 @@ Validating network, final pass.
Post-processing network complete.
05/03/2016 13:22:23: Created model with 25 nodes on CPU.
08/16/2016 03:20:12: Created model with 25 nodes on CPU.
05/03/2016 13:22:23: Training criterion node(s):
05/03/2016 13:22:23: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 03:20:12: Training criterion node(s):
08/16/2016 03:20:12: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
05/03/2016 13:22:23: Evaluation criterion node(s):
05/03/2016 13:22:23: EvalClassificationError = ClassificationError
08/16/2016 03:20:12: Evaluation criterion node(s):
08/16/2016 03:20:12: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
000000BDD334C430: {[features Value[363 x *]] }
000000BDD334C4D0: {[W0 Value[512 x 363]] }
000000BDD334C610: {[MeanOfFeatures Value[363]] }
000000BDD334C890: {[B0 Value[512 x 1]] }
000000BDD334CCF0: {[W1 Value[512 x 512]] }
000000BDD334CE30: {[B1 Value[512 x 1]] }
000000BDD334D1F0: {[InvStdOfFeatures Value[363]] }
000000BDD5BCA080: {[Prior Value[132]] }
000000BDD5BCA120: {[EvalClassificationError Value[1]] }
000000BDD5BCA260: {[W2 Value[132 x 512]] }
000000BDD5BCA440: {[labels Value[132 x *]] }
000000BDD5BCA6C0: {[MVNormalizedFeatures Value[363 x *]] }
000000BDD5BCAE40: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
000000BDD5BCAEE0: {[CrossEntropyWithSoftmax Gradient[1]] }
000000BDD5BCAF80: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
000000BDD5BCB0C0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
000000BDD5BCB160: {[ScaledLogLikelihood Value[132 x 1 x *]] }
000000BDD5BCB340: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
000000BDD5BCB520: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
000000BDD5BCB5C0: {[B2 Gradient[132 x 1]] }
000000BDD5BCB700: {[W0*features Value[512 x *]] }
000000BDD5BCB7A0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
000000BDD5BCB8E0: {[LogOfPrior Value[132]] }
000000BDD5BCB980: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
000000BDD5BCBAC0: {[B2 Value[132 x 1]] }
000000BDD5BCBB60: {[CrossEntropyWithSoftmax Value[1]] }
000000BDD5BCBC00: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
000000BDD5BCBCA0: {[W2*H1 Gradient[132 x 1 x *]] }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
05/03/2016 13:22:23: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:12: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
05/03/2016 13:22:23: MeanOfFeatures = Mean()
05/03/2016 13:22:23: InvStdOfFeatures = InvStdDev()
05/03/2016 13:22:23: Prior = Mean()
08/16/2016 03:20:12: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 03:20:12: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 03:20:12: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:12: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 03:20:12: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 03:20:12: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 03:20:12: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:12: MeanOfFeatures = Mean()
08/16/2016 03:20:12: InvStdOfFeatures = InvStdDev()
08/16/2016 03:20:12: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
05/03/2016 13:22:24: Precomputing --> Completed.
08/16/2016 03:20:15: Precomputing --> Completed.
05/03/2016 13:22:24: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
08/16/2016 03:20:15: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
05/03/2016 13:22:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
05/03/2016 13:22:25: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.48531419 * 2048; EvalClassificationError = 0.90722656 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.288909s
05/03/2016 13:22:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
05/03/2016 13:22:25: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 03:20:15: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.46427900 * 2048; EvalErrorPrediction = 0.91259766 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.28059s
08/16/2016 03:20:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
08/16/2016 03:20:15: CNTKCommandTrainEnd: speechTrain
05/03/2016 13:22:25: Action "train" complete.
08/16/2016 03:20:15: Action "train" complete.
05/03/2016 13:22:25: __COMPLETED__
08/16/2016 03:20:15: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1,18 +1,24 @@
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
-------------------------------------------------------------------
Build info:
Built time: May 3 2016 13:15:46
Last modified date: Tue Apr 26 23:35:31 2016
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: no
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
Built by svcphil on cntk-muc01
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
mpihelper: only one MPI process: MPI operation will be boring
ping [mpihelper]: 1 nodes pinging each other
ping [mpihelper]: all 1 nodes responded
05/03/2016 13:22:25: -------------------------------------------------------------------
05/03/2016 13:22:25: Build info:
08/16/2016 03:20:17: -------------------------------------------------------------------
08/16/2016 03:20:17: Build info:
05/03/2016 13:22:25: Built time: May 3 2016 13:15:46
05/03/2016 13:22:25: Last modified date: Tue Apr 26 23:35:31 2016
05/03/2016 13:22:25: Build type: Release
05/03/2016 13:22:25: Build target: GPU
05/03/2016 13:22:25: With 1bit-SGD: no
05/03/2016 13:22:25: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
05/03/2016 13:22:25: CUB_PATH: c:\src\cub-1.4.1
05/03/2016 13:22:25: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
05/03/2016 13:22:25: Build Branch: HEAD
05/03/2016 13:22:25: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
05/03/2016 13:22:25: Built by svcphil on cntk-muc01
05/03/2016 13:22:25: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
05/03/2016 13:22:25: -------------------------------------------------------------------
08/16/2016 03:20:17: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:17: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:17: Build type: Release
08/16/2016 03:20:17: Build target: GPU
08/16/2016 03:20:17: With 1bit-SGD: yes
08/16/2016 03:20:17: Math lib: mkl
08/16/2016 03:20:17: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:17: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:17: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:17: Build Branch: HEAD
08/16/2016 03:20:17: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:17: Built by svcphil on Philly-Pool1
08/16/2016 03:20:17: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:17: -------------------------------------------------------------------
08/16/2016 03:20:19: -------------------------------------------------------------------
08/16/2016 03:20:19: GPU info:
05/03/2016 13:22:25: Running on DPHAIM-22 at 2016/05/03 13:22:25
05/03/2016 13:22:25: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
08/16/2016 03:20:19: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:19: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:19: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:19: -------------------------------------------------------------------
08/16/2016 03:20:19: Running on DPHAIM-25 at 2016/08/16 03:20:19
08/16/2016 03:20:19: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:25: RootDir = ".."
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:19: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
@ -65,7 +79,7 @@ speechTrain = [
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -111,35 +125,36 @@ speechTrain = [
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
05/03/2016 13:22:25: RootDir = ".."
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:19: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = "1"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
parallelTrain = true
speechTrain = [
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -185,36 +200,37 @@ speechTrain = [
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=2048]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: FeedForward.cntk:command=speechTrain
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: FeedForward.cntk:deviceId=0
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:parallelTrain=true
configparameters: FeedForward.cntk:precision=float
configparameters: FeedForward.cntk:RootDir=..
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
configparameters: FeedForward.cntk:speechTrain=[
action = "train"
SimpleNetworkBuilder = [
layerSizes = 363:512:512:132
trainingCriterion = "CrossEntropyWithSoftmax"
evalCriterion = "ClassificationError"
evalCriterion = "ErrorPrediction"
layerTypes = "Sigmoid"
applyMeanVarNorm = true
needPrior = true
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
configparameters: FeedForward.cntk:timestamping=true
configparameters: FeedForward.cntk:traceLevel=1
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
05/03/2016 13:22:25: Commands: speechTrain
05/03/2016 13:22:25: Precision = "float"
05/03/2016 13:22:25: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
05/03/2016 13:22:25: CNTKCommandTrainInfo: speechTrain : 1
05/03/2016 13:22:25: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:19: Commands: speechTrain
08/16/2016 03:20:19: Precision = "float"
08/16/2016 03:20:19: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
08/16/2016 03:20:19: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:19: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
05/03/2016 13:22:25: ##############################################################################
05/03/2016 13:22:25: # #
05/03/2016 13:22:25: # Action "train" #
05/03/2016 13:22:25: # #
05/03/2016 13:22:25: ##############################################################################
08/16/2016 03:20:19: ##############################################################################
08/16/2016 03:20:19: # #
08/16/2016 03:20:19: # Action "train" #
08/16/2016 03:20:19: # #
08/16/2016 03:20:19: ##############################################################################
05/03/2016 13:22:25: CNTKCommandTrainBegin: speechTrain
08/16/2016 03:20:19: CNTKCommandTrainBegin: speechTrain
SimpleNetworkBuilder Using GPU 0
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
@ -284,14 +300,26 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
05/03/2016 13:22:25: Creating virgin network.
08/16/2016 03:20:19: Creating virgin network.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
7 roots:
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
EvalClassificationError = ClassificationError()
EvalErrorPrediction = ErrorPrediction()
InvStdOfFeatures = InvStdDev()
MeanOfFeatures = Mean()
PosteriorProb = Softmax()
@ -320,7 +348,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
Validating --> B2 = LearnableParameter() : -> [132 x 1]
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
@ -337,70 +365,70 @@ Validating network, final pass.
Post-processing network complete.
05/03/2016 13:22:26: Created model with 25 nodes on GPU 0.
08/16/2016 03:20:20: Created model with 25 nodes on GPU 0.
05/03/2016 13:22:26: Training criterion node(s):
05/03/2016 13:22:26: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
08/16/2016 03:20:20: Training criterion node(s):
08/16/2016 03:20:20: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
05/03/2016 13:22:26: Evaluation criterion node(s):
05/03/2016 13:22:26: EvalClassificationError = ClassificationError
08/16/2016 03:20:20: Evaluation criterion node(s):
08/16/2016 03:20:20: EvalErrorPrediction = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing Structure:
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
00000087D360C610: {[features Value[363 x *]] }
00000087EB4FEEF0: {[W0 Value[512 x 363]] }
00000087EB4FF530: {[B1 Value[512 x 1]] }
00000087EB4FF850: {[W1 Value[512 x 512]] }
00000087EB4FFC10: {[W2 Value[132 x 512]] }
00000087EB500070: {[B2 Value[132 x 1]] }
00000087EB5001B0: {[MeanOfFeatures Value[363]] }
00000087EB500250: {[InvStdOfFeatures Value[363]] }
00000087EB5004D0: {[B0 Value[512 x 1]] }
00000087EDA2B150: {[labels Value[132 x *]] }
00000087EDA2B330: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
00000087EDA2B3D0: {[Prior Value[132]] }
00000087EDA2B6F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
00000087EDA2B8D0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
00000087EDA2BB50: {[CrossEntropyWithSoftmax Value[1]] }
00000087EDA2BC90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
00000087EDA2C0F0: {[EvalClassificationError Value[1]] }
00000087EDA2C190: {[W0*features Value[512 x *]] }
00000087EDA2C2D0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
00000087EDA2C370: {[W2*H1 Gradient[132 x 1 x *]] }
00000087EDA2C410: {[B2 Gradient[132 x 1]] }
00000087EDA2C730: {[ScaledLogLikelihood Value[132 x 1 x *]] }
00000087EDA2C7D0: {[LogOfPrior Value[132]] }
00000087EDA2CAF0: {[MVNormalizedFeatures Value[363 x *]] }
00000087EDA2CB90: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
00000087EDA2CCD0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
00000087EDA2CEB0: {[CrossEntropyWithSoftmax Gradient[1]] }
00000087EDA2CFF0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
{ W0*features+B0 : [512 x 1 x *] (gradient)
W1*H1 : [512 x 1 x *] }
{ H2 : [512 x 1 x *]
W1*H1 : [512 x 1 x *] (gradient) }
{ HLast : [132 x 1 x *]
W2 : [132 x 512] (gradient) }
{ W0 : [512 x 363] (gradient)
W0*features+B0 : [512 x 1 x *] }
{ B0 : [512 x 1] (gradient)
H1 : [512 x 1 x *] (gradient)
W1*H1+B1 : [512 x 1 x *] (gradient)
W2*H1 : [132 x 1 x *] }
{ H1 : [512 x 1 x *]
W0*features : [512 x *] (gradient) }
{ W1 : [512 x 512] (gradient)
W1*H1+B1 : [512 x 1 x *] }
{ B1 : [512 x 1] (gradient)
H2 : [512 x 1 x *] (gradient)
HLast : [132 x 1 x *] (gradient) }
05/03/2016 13:22:26: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:20: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
05/03/2016 13:22:26: MeanOfFeatures = Mean()
05/03/2016 13:22:26: InvStdOfFeatures = InvStdDev()
05/03/2016 13:22:26: Prior = Mean()
08/16/2016 03:20:20: Node 'B0' (LearnableParameter operation) : [512 x 1]
08/16/2016 03:20:20: Node 'B1' (LearnableParameter operation) : [512 x 1]
08/16/2016 03:20:20: Node 'B2' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:20: Node 'W0' (LearnableParameter operation) : [512 x 363]
08/16/2016 03:20:20: Node 'W1' (LearnableParameter operation) : [512 x 512]
08/16/2016 03:20:20: Node 'W2' (LearnableParameter operation) : [132 x 512]
08/16/2016 03:20:20: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:20: MeanOfFeatures = Mean()
08/16/2016 03:20:20: InvStdOfFeatures = InvStdDev()
08/16/2016 03:20:20: Prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
05/03/2016 13:22:27: Precomputing --> Completed.
08/16/2016 03:20:21: Precomputing --> Completed.
05/03/2016 13:22:27: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
08/16/2016 03:20:21: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
05/03/2016 13:22:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
05/03/2016 13:22:27: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.42832291 * 2048; EvalClassificationError = 0.91357422 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.052947s
05/03/2016 13:22:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
05/03/2016 13:22:27: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
08/16/2016 03:20:21: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.05551s
08/16/2016 03:20:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
08/16/2016 03:20:21: CNTKCommandTrainEnd: speechTrain
05/03/2016 13:22:27: Action "train" complete.
08/16/2016 03:20:21: Action "train" complete.
05/03/2016 13:22:27: __COMPLETED__
08/16/2016 03:20:21: __COMPLETED__
~MPIWrapper

Просмотреть файл

@ -5,5 +5,5 @@
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]" || exit $?
cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]" || exit $?

Просмотреть файл

@ -0,0 +1,682 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: Build info:
08/16/2016 10:01:47: Built time: Aug 16 2016 09:41:57
08/16/2016 10:01:47: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:01:47: Build type: release
08/16/2016 10:01:47: Build target: GPU
08/16/2016 10:01:47: With 1bit-SGD: yes
08/16/2016 10:01:47: Math lib: mkl
08/16/2016 10:01:47: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:01:47: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:01:47: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:01:47: Build Branch: HEAD
08/16/2016 10:01:47: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:01:47: Built by philly on 643085f7f8c2
08/16/2016 10:01:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: GPU info:
08/16/2016 10:01:47: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:01:47: -------------------------------------------------------------------
08/16/2016 10:01:47: Running on localhost at 2016/08/16 10:01:47
08/16/2016 10:01:47: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:47: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:01:47: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:deviceId=-1
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:01:47: Commands: speechTrain
08/16/2016 10:01:47: Precision = "float"
08/16/2016 10:01:47: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
08/16/2016 10:01:47: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:01:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:01:47: ##############################################################################
08/16/2016 10:01:47: # #
08/16/2016 10:01:47: # Action "train" #
08/16/2016 10:01:47: # #
08/16/2016 10:01:47: ##############################################################################
08/16/2016 10:01:47: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using CPU
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 10:01:48: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:01:48: Created model with 113 nodes on CPU.
08/16/2016 10:01:48: Training criterion node(s):
08/16/2016 10:01:48: ce = CrossEntropyWithSoftmax
08/16/2016 10:01:48: Evaluation criterion node(s):
08/16/2016 10:01:48: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
08/16/2016 10:01:48: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 10:01:48: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:01:48: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:01:48: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:01:48: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:01:48: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 10:01:48: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:01:48: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:01:48: featNorm.xMean = Mean()
08/16/2016 10:01:48: featNorm.xStdDev = InvStdDev()
08/16/2016 10:01:48: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:01:49: Precomputing --> Completed.
08/16/2016 10:01:50: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:01:50: Starting minibatch loop.
08/16/2016 10:01:53: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87313957 * 160; err = 0.90625000 * 160; time = 3.3910s; samplesPerSecond = 47.2
08/16/2016 10:01:56: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84521751 * 160; err = 0.69375000 * 160; time = 2.9626s; samplesPerSecond = 54.0
08/16/2016 10:01:58: Finished Epoch[ 1 of 1]: [Training] ce = 4.85644356 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=8.39953s
08/16/2016 10:01:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 10:01:59: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:01:59: Action "train" complete.
08/16/2016 10:01:59: __COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,683 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 264172964 kB
-------------------------------------------------------------------
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 09:41:57
Last modified date: Mon Aug 15 23:39:17 2016
Build type: release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: /usr/local/cuda-7.5
CUB_PATH: /usr/local/cub-1.4.1
CUDNN_PATH: /usr/local/cudnn-4.0
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by philly on 643085f7f8c2
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
-------------------------------------------------------------------
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
08/16/2016 10:02:00: -------------------------------------------------------------------
08/16/2016 10:02:00: Build info:
08/16/2016 10:02:00: Built time: Aug 16 2016 09:41:57
08/16/2016 10:02:00: Last modified date: Mon Aug 15 23:39:17 2016
08/16/2016 10:02:00: Build type: release
08/16/2016 10:02:00: Build target: GPU
08/16/2016 10:02:00: With 1bit-SGD: yes
08/16/2016 10:02:00: Math lib: mkl
08/16/2016 10:02:00: CUDA_PATH: /usr/local/cuda-7.5
08/16/2016 10:02:00: CUB_PATH: /usr/local/cub-1.4.1
08/16/2016 10:02:00: CUDNN_PATH: /usr/local/cudnn-4.0
08/16/2016 10:02:00: Build Branch: HEAD
08/16/2016 10:02:00: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 10:02:00: Built by philly on 643085f7f8c2
08/16/2016 10:02:00: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
08/16/2016 10:02:00: -------------------------------------------------------------------
08/16/2016 10:02:01: -------------------------------------------------------------------
08/16/2016 10:02:01: GPU info:
08/16/2016 10:02:01: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
08/16/2016 10:02:01: -------------------------------------------------------------------
08/16/2016 10:02:01: Running on localhost at 2016/08/16 10:02:01
08/16/2016 10:02:01: Command line:
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:02:01: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 10:02:01: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
configparameters: LSTM-NDL.cntk:deviceId=0
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
]
labels = [
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 10:02:01: Commands: speechTrain
08/16/2016 10:02:01: Precision = "float"
08/16/2016 10:02:01: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
08/16/2016 10:02:01: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 10:02:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 10:02:01: ##############################################################################
08/16/2016 10:02:01: # #
08/16/2016 10:02:01: # Action "train" #
08/16/2016 10:02:01: # #
08/16/2016 10:02:01: ##############################################################################
08/16/2016 10:02:01: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using GPU 0
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 10:02:01: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 10:02:01: Created model with 113 nodes on GPU 0.
08/16/2016 10:02:01: Training criterion node(s):
08/16/2016 10:02:01: ce = CrossEntropyWithSoftmax
08/16/2016 10:02:01: Evaluation criterion node(s):
08/16/2016 10:02:01: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
08/16/2016 10:02:01: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 10:02:01: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 10:02:01: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 10:02:01: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 10:02:01: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 10:02:01: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 10:02:01: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 10:02:01: Precomputing --> 3 PreCompute nodes found.
08/16/2016 10:02:01: featNorm.xMean = Mean()
08/16/2016 10:02:01: featNorm.xStdDev = InvStdDev()
08/16/2016 10:02:01: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 10:02:02: Precomputing --> Completed.
08/16/2016 10:02:02: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 10:02:03: Starting minibatch loop.
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 0.5069s; samplesPerSecond = 315.6
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 0.4852s; samplesPerSecond = 329.8
08/16/2016 10:02:04: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=1.33633s
08/16/2016 10:02:04: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 10:02:05: CNTKCommandTrainEnd: speechTrain
08/16/2016 10:02:05: Action "train" complete.
08/16/2016 10:02:05: __COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -1 +0,0 @@
__COMPLETED__

Просмотреть файл

@ -0,0 +1,681 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
08/16/2016 03:20:22: -------------------------------------------------------------------
08/16/2016 03:20:22: Build info:
08/16/2016 03:20:22: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:22: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:22: Build type: Release
08/16/2016 03:20:22: Build target: GPU
08/16/2016 03:20:22: With 1bit-SGD: yes
08/16/2016 03:20:22: Math lib: mkl
08/16/2016 03:20:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:22: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:22: Build Branch: HEAD
08/16/2016 03:20:22: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:22: Built by svcphil on Philly-Pool1
08/16/2016 03:20:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:22: -------------------------------------------------------------------
08/16/2016 03:20:23: -------------------------------------------------------------------
08/16/2016 03:20:23: GPU info:
08/16/2016 03:20:23: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:23: -------------------------------------------------------------------
08/16/2016 03:20:23: Running on DPHAIM-25 at 2016/08/16 03:20:23
08/16/2016 03:20:23: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:23: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:23: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
DeviceId=-1
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:deviceId=-1
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:23: Commands: speechTrain
08/16/2016 03:20:23: Precision = "float"
08/16/2016 03:20:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
08/16/2016 03:20:23: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:23: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:23: ##############################################################################
08/16/2016 03:20:23: # #
08/16/2016 03:20:23: # Action "train" #
08/16/2016 03:20:23: # #
08/16/2016 03:20:23: ##############################################################################
08/16/2016 03:20:23: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using CPU
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 03:20:24: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 03:20:24: Created model with 113 nodes on CPU.
08/16/2016 03:20:24: Training criterion node(s):
08/16/2016 03:20:24: ce = CrossEntropyWithSoftmax
08/16/2016 03:20:24: Evaluation criterion node(s):
08/16/2016 03:20:24: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
08/16/2016 03:20:24: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 03:20:24: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:24: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:24: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:24: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:24: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 03:20:24: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:24: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:24: featNorm.xMean = Mean()
08/16/2016 03:20:24: featNorm.xStdDev = InvStdDev()
08/16/2016 03:20:24: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 03:20:27: Precomputing --> Completed.
08/16/2016 03:20:28: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 03:20:28: Starting minibatch loop.
08/16/2016 03:20:31: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87950134 * 160; err = 0.90625000 * 160; time = 3.6415s; samplesPerSecond = 43.9
08/16/2016 03:20:35: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84555817 * 160; err = 0.69375000 * 160; time = 3.6742s; samplesPerSecond = 43.5
08/16/2016 03:20:38: Finished Epoch[ 1 of 1]: [Training] ce = 4.85900003 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=9.76851s
08/16/2016 03:20:38: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 03:20:39: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:39: Action "train" complete.
08/16/2016 03:20:39: __COMPLETED__

Просмотреть файл

@ -0,0 +1,682 @@
CPU info:
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
Hardware threads: 24
Total Memory: 268381192 kB
-------------------------------------------------------------------
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
-------------------------------------------------------------------
Build info:
Built time: Aug 16 2016 03:09:16
Last modified date: Fri Aug 12 05:28:23 2016
Build type: Release
Build target: GPU
With 1bit-SGD: yes
Math lib: mkl
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
CUB_PATH: c:\src\cub-1.4.1
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
Build Branch: HEAD
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
Built by svcphil on Philly-Pool1
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
-------------------------------------------------------------------
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
08/16/2016 03:20:41: -------------------------------------------------------------------
08/16/2016 03:20:41: Build info:
08/16/2016 03:20:41: Built time: Aug 16 2016 03:09:16
08/16/2016 03:20:41: Last modified date: Fri Aug 12 05:28:23 2016
08/16/2016 03:20:41: Build type: Release
08/16/2016 03:20:41: Build target: GPU
08/16/2016 03:20:41: With 1bit-SGD: yes
08/16/2016 03:20:41: Math lib: mkl
08/16/2016 03:20:41: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
08/16/2016 03:20:41: CUB_PATH: c:\src\cub-1.4.1
08/16/2016 03:20:41: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
08/16/2016 03:20:41: Build Branch: HEAD
08/16/2016 03:20:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
08/16/2016 03:20:41: Built by svcphil on Philly-Pool1
08/16/2016 03:20:41: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
08/16/2016 03:20:41: -------------------------------------------------------------------
08/16/2016 03:20:43: -------------------------------------------------------------------
08/16/2016 03:20:43: GPU info:
08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
08/16/2016 03:20:43: -------------------------------------------------------------------
08/16/2016 03:20:43: Running on DPHAIM-25 at 2016/08/16 03:20:43
08/16/2016 03:20:43: Command line:
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:43: RootDir = ".."
ConfigDir = "$RootDir$/Config"
DataDir = "$RootDir$/Data"
OutputDir = "$RootDir$/Output"
ModelDir = "$OutputDir$/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "$DataDir$/glob_0000.scp"
]
labels = [
mlfFile = "$DataDir$/glob_0000.mlf"
labelMappingFile = "$DataDir$/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
08/16/2016 03:20:43: RootDir = ".."
ConfigDir = "../Config"
DataDir = "../Data"
OutputDir = "../Output"
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models"
deviceId = -1
command = speechTrain
precision = "float"
traceLevel = 1
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
parallelTrain = true
frameMode = false
truncated = true
speechTrain = [
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
]
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
DeviceId=0
timestamping=true
speechTrain=[SGD=[maxEpochs=1]]
speechTrain=[SGD=[epochSize=64]]
speechTrain=[reader=[useMersenneTwisterRand=true]]
parallelTrain=false
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
configparameters: LSTM-NDL.cntk:command=speechTrain
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
configparameters: LSTM-NDL.cntk:deviceId=0
configparameters: LSTM-NDL.cntk:frameMode=false
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:parallelTrain=false
configparameters: LSTM-NDL.cntk:precision=float
configparameters: LSTM-NDL.cntk:RootDir=..
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
configparameters: LSTM-NDL.cntk:speechTrain=[
action = "train"
nbrUttsIneachRecurrentIter = 16
NDLNetworkBuilder = [
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
]
SGD = [
epochSize = 0
minibatchSize = 16
learningRatesPerMB = 0.5
numMBsToShowResult = 10
momentumPerMB = 0:0.9
maxEpochs = 4
keepCheckPointFiles = true
]
reader = [
readerType = "HTKMLFReader"
readMethod = "blockRandomize"
miniBatchMode = "partial"
randomize = "auto"
verbosity = 0
features = [
dim = 363
type = "real"
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
]
labels = [
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
labelDim = 132
labelType = "category"
]
]
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
configparameters: LSTM-NDL.cntk:timestamping=true
configparameters: LSTM-NDL.cntk:traceLevel=1
configparameters: LSTM-NDL.cntk:truncated=true
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
08/16/2016 03:20:43: Commands: speechTrain
08/16/2016 03:20:43: Precision = "float"
08/16/2016 03:20:43: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
08/16/2016 03:20:43: CNTKCommandTrainInfo: speechTrain : 1
08/16/2016 03:20:43: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
08/16/2016 03:20:43: ##############################################################################
08/16/2016 03:20:43: # #
08/16/2016 03:20:43: # Action "train" #
08/16/2016 03:20:43: # #
08/16/2016 03:20:43: ##############################################################################
08/16/2016 03:20:43: CNTKCommandTrainBegin: speechTrain
NDLBuilder Using GPU 0
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
label set 0: 129 classes
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
08/16/2016 03:20:43: Creating virgin network.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
Post-processing network...
6 roots:
ce = CrossEntropyWithSoftmax()
err = ErrorPrediction()
featNorm.xMean = Mean()
featNorm.xStdDev = InvStdDev()
logPrior.prior = Mean()
scaledLogLikelihood = Minus()
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
Validating network. 113 nodes to process in pass 1.
Validating --> labels = InputValue() : -> [132 x *]
Validating --> W = LearnableParameter() : -> [132 x 0]
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
Validating --> features = InputValue() : -> [363 x *]
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
Validating --> b = LearnableParameter() : -> [132 x 1]
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
Validating network. 88 nodes to process in pass 2.
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
Validating network. 15 nodes to process in pass 3.
Validating network, final pass.
29 out of 113 nodes do not share the minibatch layout with the input data.
Post-processing network complete.
08/16/2016 03:20:44: Created model with 113 nodes on GPU 0.
08/16/2016 03:20:44: Training criterion node(s):
08/16/2016 03:20:44: ce = CrossEntropyWithSoftmax
08/16/2016 03:20:44: Evaluation criterion node(s):
08/16/2016 03:20:44: err = ErrorPrediction
Allocating matrices for forward and/or backward propagation.
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
LSTMoutput3.dh : [512 x 1 x *]
LSTMoutput3.wxx : [4096 x *] (gradient) }
{ LSTMoutput2.Wco : [1024] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] }
{ LSTMoutput1.dh : [512 x 1 x *]
LSTMoutput1.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
LSTMoutput2.dh : [512 x 1 x *]
LSTMoutput2.wxx : [4096 x *] (gradient) }
{ LSTMoutput1.Wco : [1024] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] }
{ LSTMoutput3.b : [4096 x 1] (gradient)
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] }
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
LSTMoutput2.ft : [1024 x 1 x *] }
{ LSTMoutput1.Wci : [1024] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] }
{ LSTMoutput1.Wcf : [1024] (gradient)
LSTMoutput2.it : [1024 x 1 x *] }
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] }
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] }
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
LSTMoutput2.wxx : [4096 x *] }
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
LSTMoutput2.Wcidc : [1024 x 1 x *] }
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
LSTMoutput2.G2 : [1024 x 1 x *] }
{ LSTMoutput1.b : [4096 x 1] (gradient)
LSTMoutput1.dh : [512 x 1 x *] (gradient)
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
LSTMoutput3.wxx : [4096 x *] }
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
{ LSTMoutput1.wx : [4096 x 363] (gradient)
LSTMoutput1.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
LSTMoutput3.Wcidc : [1024 x 1 x *] }
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
{ LSTMoutput2.b : [4096 x 1] (gradient)
LSTMoutput2.dh : [512 x 1 x *] (gradient)
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
LSTMoutputW : [132 x 1 x *] (gradient) }
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] }
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
LSTMoutput3.wxxpb : [4096 x 1 x *] }
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
LSTMoutput3.whh : [4096 x 1 x *] }
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
unnamed193 : [132 x *] (gradient) }
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
unnamed193 : [132 x *] }
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
LSTMoutput3.bft : [1024 x 1 x *] }
{ LSTMoutputW : [132 x 1 x *]
W : [132 x 512 x 1] (gradient) }
{ LSTMoutput2.Wci : [1024] (gradient)
LSTMoutput3.G1 : [1024 x 1 x *] }
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
LSTMoutput3.ft : [1024 x 1 x *] }
{ LSTMoutput2.Wcf : [1024] (gradient)
LSTMoutput3.it : [1024 x 1 x *] }
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
LSTMoutput3.G4 : [1024 x 1 x *] }
08/16/2016 03:20:44: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
08/16/2016 03:20:44: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
08/16/2016 03:20:44: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
08/16/2016 03:20:44: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
08/16/2016 03:20:44: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
08/16/2016 03:20:44: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
08/16/2016 03:20:44: Node 'b' (LearnableParameter operation) : [132 x 1]
08/16/2016 03:20:44: Precomputing --> 3 PreCompute nodes found.
08/16/2016 03:20:44: featNorm.xMean = Mean()
08/16/2016 03:20:44: featNorm.xStdDev = InvStdDev()
08/16/2016 03:20:44: logPrior.prior = Mean()
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
08/16/2016 03:20:45: Precomputing --> Completed.
08/16/2016 03:20:46: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
08/16/2016 03:20:46: Starting minibatch loop.
08/16/2016 03:20:47: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 1.1338s; samplesPerSecond = 141.1
08/16/2016 03:20:48: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 1.0409s; samplesPerSecond = 153.7
08/16/2016 03:20:49: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=2.90303s
08/16/2016 03:20:50: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
08/16/2016 03:20:51: CNTKCommandTrainEnd: speechTrain
08/16/2016 03:20:51: Action "train" complete.
08/16/2016 03:20:51: __COMPLETED__

Просмотреть файл

@ -5,5 +5,5 @@
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
# cntkrun <CNTK config file name> <additional CNTK args>
cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] parallelTrain=false" || exit $?
cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false" || exit $?

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,4 +6,4 @@
. $TEST_DIR/../run-timit-test-common
# cntkrun <CNTK config file name> <additional CNTK arg>
cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments" || exit $?
cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments TIMIT_TrainAdaptLR=[reader=[useMersenneTwisterRand=true]] TIMIT_TrainAdaptLR=[cvReader=[useMersenneTwisterRand=true]]" || exit $?

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -6,7 +6,7 @@
. $TEST_DIR/../run-timit-test-common
# Train:
cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments" || exit $?
cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments TIMIT_TrainSimple=[reader=[useMersenneTwisterRand=true]]" || exit $?
# Validate:
cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $?
cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $?

Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше