merged from master. Undid the ClassificationError baseline updates due to merge conflicts
This commit is contained in:
Коммит
5b969bac70
|
@ -34,48 +34,29 @@
|
|||
<UseZip Condition="Exists('$(ZLIB_PATH)')">true</UseZip>
|
||||
</PropertyGroup>
|
||||
|
||||
<Choose>
|
||||
<When Condition="Exists('$(ACML_PATH)')">
|
||||
<PropertyGroup>
|
||||
<MathLibrary>ACML</MathLibrary>
|
||||
<MathLibraryName>ACML</MathLibraryName>
|
||||
<MathIncludePath>$(ACML_PATH)\include</MathIncludePath>
|
||||
<MathLibraryPath>$(ACML_PATH)\lib</MathLibraryPath>
|
||||
<MathLinkLibrary>libacml_mp_dll.lib</MathLinkLibrary>
|
||||
<MathDelayLoad>libacml_mp_dll.dll</MathDelayLoad>
|
||||
<MathPostBuildCopyPattern>$(ACML_PATH)\lib\*.dll</MathPostBuildCopyPattern>
|
||||
<UnitTestDlls>$(OutDir)libacml_mp_dll.dll;$(OutDir)libifcoremd.dll;$(OutDir)libifportmd.dll;$(OutDir)libiomp*.dll;$(OutDir)libmmd.dll;$(OutDir)svml_dispmd.dll;</UnitTestDlls>
|
||||
<MathDefine>USE_ACML</MathDefine>
|
||||
</PropertyGroup>
|
||||
</When>
|
||||
|
||||
<!-- See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl on how to configure to build CNTK with MKL -->
|
||||
<When Condition="'$(CNTK_MKL)' == '1'">
|
||||
<PropertyGroup>
|
||||
<MathLibrary>MKL</MathLibrary>
|
||||
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
|
||||
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
|
||||
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
|
||||
<MathDefine>USE_MKL</MathDefine>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
|
||||
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
|
||||
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
|
||||
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
|
||||
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
|
||||
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
|
||||
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
|
||||
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
|
||||
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
|
||||
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
|
||||
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
|
||||
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
|
||||
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
|
||||
</PropertyGroup>
|
||||
</When>
|
||||
</Choose>
|
||||
<PropertyGroup>
|
||||
<MathLibrary>MKL</MathLibrary>
|
||||
<CNTKCustomMKLVersion>1</CNTKCustomMKLVersion>
|
||||
<CNTKCustomMKLPath>$(CNTK_MKL_PATH)\$(CNTKCustomMKLVersion)</CNTKCustomMKLPath>
|
||||
<MathIncludePath>$(CNTKCustomMKLPath)\include</MathIncludePath>
|
||||
<MathDefine>USE_MKL</MathDefine>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' != '1'">
|
||||
<MathLibraryName>CNTK custom MKL Parallel (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
|
||||
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\parallel</MathLibraryPath>
|
||||
<MathLinkLibrary>mkl_cntk_p.lib</MathLinkLibrary>
|
||||
<MathDelayLoad>mkl_cntk_p.dll</MathDelayLoad>
|
||||
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
|
||||
<UnitTestDlls>$(OutDir)mkl_cntk_p.dll;$(OutDir)libiomp5md.dll;</UnitTestDlls>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(CNTK_MKL_SEQUENTIAL)' == '1'">
|
||||
<MathLibraryName>CNTK custom MKL Sequential (Version: $(CNTKCustomMKLVersion))</MathLibraryName>
|
||||
<MathLibraryPath>$(CNTKCustomMKLPath)\x64\sequential</MathLibraryPath>
|
||||
<MathLinkLibrary>mkl_cntk_s.lib</MathLinkLibrary>
|
||||
<MathDelayLoad>mkl_cntk_s.dll</MathDelayLoad>
|
||||
<MathPostBuildCopyPattern>$(MathLibraryPath)\*.dll</MathPostBuildCopyPattern>
|
||||
<UnitTestDlls>$(OutDir)mkl_cntk_s.dll;</UnitTestDlls>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="$(UseZip)">
|
||||
<ZipInclude>$(ZLIB_PATH)\include;$(ZLIB_PATH)\lib\libzip\include;</ZipInclude>
|
||||
|
|
3
CNTK.sln
3
CNTK.sln
|
@ -1150,6 +1150,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CPPEvalClient", "Examples\E
|
|||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "BrainScriptTests", "Tests\UnitTests\BrainScriptTests\BrainScriptTests.vcxproj", "{9F999212-AFC5-4EAC-AA78-F7247D46C456}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{60BDB847-D0C4-4FD3-A947-0C15C08BCDB5} = {60BDB847-D0C4-4FD3-A947-0C15C08BCDB5}
|
||||
{928ABD1B-4D3B-4017-AEF1-0FA1B4467513} = {928ABD1B-4D3B-4017-AEF1-0FA1B4467513}
|
||||
{EAD17188-072C-4726-B840-A769C36DAD1B} = {EAD17188-072C-4726-B840-A769C36DAD1B}
|
||||
{86883653-8A61-4038-81A0-2379FAE4200A} = {86883653-8A61-4038-81A0-2379FAE4200A}
|
||||
EndProjectSection
|
||||
EndProject
|
||||
|
|
31
Makefile
31
Makefile
|
@ -9,8 +9,6 @@
|
|||
# that provides
|
||||
# BUILDTYPE= One of release or debug
|
||||
# defaults to release
|
||||
# ACML_PATH= path to ACML library installation
|
||||
# only needed if MATHLIB=acml
|
||||
# MKL_PATH= path to CNTK custom MKL installation
|
||||
# only needed if MATHLIB=mkl
|
||||
# CNTK_CUSTOM_MKL_VERSION=2
|
||||
|
@ -21,8 +19,8 @@
|
|||
# defaults to /usr/include/nvidia/gdk
|
||||
# GDK_NVML_LIB_PATH= path to CUDA GDK (stub) library path, so $(GDK_NVML_LIB_PATH)/libnvidia-ml.so exists
|
||||
# defaults to /usr/src/gdk/nvml/lib
|
||||
# MATHLIB= One of acml or mkl
|
||||
# defaults to acml
|
||||
# MATHLIB= mkl
|
||||
# defaults to mkl
|
||||
# CUDA_PATH= Path to CUDA
|
||||
# If not specified, GPU will not be enabled
|
||||
# CUB_PATH= path to NVIDIA CUB installation, so $(CUB_PATH)/cub/cub.cuh exists
|
||||
|
@ -60,8 +58,8 @@ BUILDTYPE=release
|
|||
endif
|
||||
|
||||
ifndef MATHLIB
|
||||
$(info DEFAULTING MATHLIB=acml)
|
||||
MATHLIB = acml
|
||||
$(info DEFAULTING MATHLIB=mkl)
|
||||
MATHLIB = mkl
|
||||
endif
|
||||
|
||||
#### Configure based on options above
|
||||
|
@ -137,13 +135,6 @@ else
|
|||
COMMON_FLAGS +=-DCPUONLY
|
||||
endif
|
||||
|
||||
ifeq ("$(MATHLIB)","acml")
|
||||
INCLUDEPATH += $(ACML_PATH)/include
|
||||
LIBPATH += $(ACML_PATH)/lib
|
||||
LIBS += -lacml_mp -liomp5 -lm -lpthread
|
||||
COMMON_FLAGS += -DUSE_ACML
|
||||
endif
|
||||
|
||||
ifeq ("$(MATHLIB)","mkl")
|
||||
INCLUDEPATH += $(MKL_PATH)/$(CNTK_CUSTOM_MKL_VERSION)/include
|
||||
LIBS += -lm
|
||||
|
@ -418,6 +409,10 @@ CNTKLIBRARY_TESTS_SRC =\
|
|||
Tests/UnitTests/V2LibraryTests/TensorTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/TrainerTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/CifarResNet.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/SerializationTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/LearnerTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/FunctionTests.cpp \
|
||||
Tests/UnitTests/V2LibraryTests/SequenceClassification.cpp \
|
||||
|
||||
CNTKLIBRARY_TESTS:=$(BINDIR)/v2librarytests
|
||||
CNTKLIBRARY_TESTS_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(CNTKLIBRARY_TESTS_SRC)))
|
||||
|
@ -933,22 +928,24 @@ UNITTEST_BRAINSCRIPT_SRC = \
|
|||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptEvaluator.cpp \
|
||||
$(SOURCEDIR)/CNTK/BrainScript/BrainScriptParser.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ParserTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/ComputationNetworkTests.cpp \
|
||||
$(SOURCEDIR)/../Tests/UnitTests/BrainScriptTests/stdafx.cpp
|
||||
|
||||
UNITTEST_BRAINSCRIPT_SRC+=$(COMMON_SRC)
|
||||
UNITTEST_BRAINSCRIPT_SRC += $(COMPUTATION_NETWORK_LIB_SRC)
|
||||
UNITTEST_BRAINSCRIPT_SRC += $(SEQUENCE_TRAINING_LIB_SRC)
|
||||
|
||||
UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC))
|
||||
UNITTEST_BRAINSCRIPT_OBJ := $(patsubst %.cu, $(OBJDIR)/%.o, $(patsubst %.cpp, $(OBJDIR)/%.o, $(UNITTEST_BRAINSCRIPT_SRC)))
|
||||
|
||||
UNITTEST_BRAINSCRIPT := $(BINDIR)/brainscripttests
|
||||
|
||||
ALL += $(UNITTEST_BRAINSCRIPT)
|
||||
SRC += $(UNITTEST_BRAINSCRIPT_SRC)
|
||||
|
||||
$(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ)
|
||||
$(UNITTEST_BRAINSCRIPT): $(UNITTEST_BRAINSCRIPT_OBJ) | $(CNTKMATH_LIB)
|
||||
@echo $(SEPARATOR)
|
||||
@mkdir -p $(dir $@)
|
||||
@echo building $@ for $(ARCH) with build type $(BUILDTYPE)
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl
|
||||
$(CXX) $(LDFLAGS) $(patsubst %,-L%, $(LIBDIR) $(LIBPATH) $(GDK_NVML_LIB_PATH) $(BOOSTLIB_PATH)) $(patsubst %, $(RPATH)%, $(ORIGINLIBDIR) $(LIBPATH) $(BOOSTLIB_PATH)) -o $@ $^ $(BOOSTLIBS) $(LIBS) -ldl -l$(CNTKMATH)
|
||||
|
||||
unittests: $(UNITTEST_EVAL) $(UNITTEST_READER) $(UNITTEST_NETWORK) $(UNITTEST_MATH) $(UNITTEST_BRAINSCRIPT)
|
||||
|
||||
|
|
|
@ -72,18 +72,6 @@ void DoTrain(const ConfigRecordType& config)
|
|||
bool makeMode = config(L"makeMode", true);
|
||||
DEVICEID_TYPE deviceId = DeviceFromConfig(config);
|
||||
|
||||
// determine the network-creation function
|
||||
// We have several ways to create that network.
|
||||
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
|
||||
|
||||
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
|
||||
|
||||
auto dataReader = CreateObject<DataReader>(config, L"reader");
|
||||
|
||||
shared_ptr<DataReader> cvDataReader;
|
||||
if (config.Exists(L"cvReader"))
|
||||
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
|
||||
|
||||
shared_ptr<SGD<ElemType>> optimizer;
|
||||
if (config.Exists(L"optimizer"))
|
||||
{
|
||||
|
@ -95,8 +83,39 @@ void DoTrain(const ConfigRecordType& config)
|
|||
optimizer = make_shared<SGD<ElemType>>(configSGD);
|
||||
}
|
||||
|
||||
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
|
||||
int startEpoch = optimizer->DetermineStartEpoch(makeMode);
|
||||
if (startEpoch == optimizer->GetMaxEpochs())
|
||||
{
|
||||
LOGPRINTF(stderr, "No further training is necessary.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
wstring modelFileName = optimizer->GetModelNameForEpoch(int(startEpoch) - 1);
|
||||
bool loadNetworkFromCheckpoint = startEpoch >= 0;
|
||||
fprintf(stderr, "\n");
|
||||
if (loadNetworkFromCheckpoint)
|
||||
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
|
||||
else
|
||||
LOGPRINTF(stderr, "Creating virgin network.\n");
|
||||
|
||||
// determine the network-creation function
|
||||
// We have several ways to create that network.
|
||||
function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn;
|
||||
|
||||
createNetworkFn = GetNetworkFactory<ConfigRecordType, ElemType>(config);
|
||||
|
||||
// create or load from checkpoint
|
||||
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
|
||||
auto dataReader = CreateObject<DataReader>(config, L"reader");
|
||||
|
||||
shared_ptr<DataReader> cvDataReader;
|
||||
if (config.Exists(L"cvReader"))
|
||||
cvDataReader = CreateObject<DataReader>(config, L"cvReader");
|
||||
|
||||
optimizer->InitMPI(MPIWrapper::GetInstance());
|
||||
optimizer->Train(createNetworkFn, deviceId, dataReader.get(), cvDataReader.get(), makeMode);
|
||||
optimizer->Train(net, deviceId, dataReader.get(), cvDataReader.get(), startEpoch, loadNetworkFromCheckpoint);
|
||||
}
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace ScriptableObjects {
|
||||
|
@ -189,9 +208,8 @@ void DoDumpNodes(const ConfigParameters& config)
|
|||
if (!printValues && !printMetadata)
|
||||
InvalidArgument("printValues and printMetadata: Since both are set to false, there will be nothing to dump");
|
||||
|
||||
ComputationNetwork net(CPUDEVICE); // always use CPU
|
||||
net.Load<ElemType>(modelPath); // TODO: we have a function now to combine this and the previous line
|
||||
net.DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
|
||||
ComputationNetworkPtr net = ComputationNetwork::CreateFromFile<ElemType>(CPUDEVICE, modelPath);
|
||||
net->DumpNodeInfoToFile(nodeName, printValues, printMetadata, outputFile, nodeNameRegexStr);
|
||||
}
|
||||
|
||||
template void DoDumpNodes<float>(const ConfigParameters& config);
|
||||
|
|
|
@ -10,7 +10,7 @@
|
|||
#include "stdafx.h"
|
||||
#ifdef _WIN32
|
||||
#include <crtdbg.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#include "Basics.h"
|
||||
#include "Actions.h"
|
||||
|
|
|
@ -53,8 +53,6 @@ if "%p_CNTK_MKL%" == "1" (
|
|||
) else (
|
||||
echo #define _MATHLIB_ "mkl">> buildinfo.h$$
|
||||
)
|
||||
) else (
|
||||
echo #define _MATHLIB_ "acml">> buildinfo.h$$
|
||||
)
|
||||
|
||||
echo #define _BUILDER_ "%USERNAME%" >> buildinfo.h$$
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#ifdef SWIG
|
||||
#define final
|
||||
#define explicit
|
||||
#define static_assert(condition, message)
|
||||
#endif
|
||||
|
||||
#include "CNTKLibraryInternals.h"
|
||||
|
||||
#include <memory>
|
||||
|
@ -14,10 +20,12 @@
|
|||
#include <array>
|
||||
#include <stdarg.h>
|
||||
#include <assert.h>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <iosfwd>
|
||||
#include<algorithm>
|
||||
|
||||
namespace CNTK
|
||||
|
@ -236,7 +244,7 @@ namespace CNTK
|
|||
}
|
||||
|
||||
///
|
||||
/// Creates and returns a new shape contructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
|
||||
/// Creates and returns a new shape constructed by appending the dimensions of the specified 'shape' to 'this' shape's dimensions.
|
||||
///
|
||||
NDShape AppendShape(const NDShape& shape) const
|
||||
{
|
||||
|
@ -665,35 +673,52 @@ namespace CNTK
|
|||
|
||||
///
|
||||
/// Denotes an Axis of a Variable and is used for specifying the axes parameters of certain Functions such as reductions.
|
||||
/// Besides the static axes corresponding to each of the axes of the Variable's shape, Input and Output Variables
|
||||
/// also have one or more dynamic axes (corresponding to the sequence dimensions) and one implicit batch axis denoting the axes
|
||||
/// along which multiple sequences are batched in the Values corresponding to the variable when performing computations.
|
||||
/// Besides the static axes corresponding to each of the axes of the Variable's shape, Variables of kind 'Input' and any
|
||||
/// 'Output' Variables dependent on an 'Input' Variable also have 2 additional dynamic axes whose dimensions are known only
|
||||
/// when the Variable is bound to actual data during compute (viz. sequence axis and batch axis denoting the axis along which
|
||||
/// multiple sequences are batched)
|
||||
///
|
||||
class Axis final
|
||||
{
|
||||
CNTK_API static const std::wstring s_staticAxisNamePrefix;
|
||||
public:
|
||||
///
|
||||
/// Construct an Axis object denoting a static axis with the specified index.
|
||||
///
|
||||
Axis(size_t staticAxisIdx)
|
||||
explicit Axis(size_t staticAxisIdx)
|
||||
: m_staticAxisIdx(staticAxisIdx)
|
||||
{
|
||||
const wchar_t* staticAxisNamePrefix = L"staticAxis_";
|
||||
m_name = staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
|
||||
m_name = s_staticAxisNamePrefix + std::to_wstring(staticAxisIdx);
|
||||
}
|
||||
|
||||
///
|
||||
/// Construct a dynamic axis with the specified name.
|
||||
///
|
||||
Axis(const std::wstring& name)
|
||||
explicit Axis(const std::wstring& name)
|
||||
: m_staticAxisIdx(SIZE_MAX), m_name(name)
|
||||
{
|
||||
if (m_name.length() > s_staticAxisNamePrefix.length())
|
||||
{
|
||||
auto prefix = m_name.substr(0, s_staticAxisNamePrefix.length());
|
||||
auto suffix = m_name.substr(s_staticAxisNamePrefix.length(), m_name.length() - s_staticAxisNamePrefix.length());
|
||||
if (prefix == s_staticAxisNamePrefix)
|
||||
{
|
||||
if (suffix == L"0")
|
||||
*this = Axis(0);
|
||||
else
|
||||
{
|
||||
auto suffixVal = std::stoul(suffix);
|
||||
if (suffixVal != 0)
|
||||
*this = Axis(suffixVal);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Returns a boolean indicating if 'this' Axis corresponds to a static axis
|
||||
///
|
||||
bool IsStaticAxis() const { return m_staticAxisIdx == SIZE_MAX; }
|
||||
bool IsStaticAxis() const { return m_staticAxisIdx != SIZE_MAX; }
|
||||
|
||||
///
|
||||
/// Returns the axis index if 'this' Axis is a static axis. Throws an exception otherwise.
|
||||
|
@ -714,12 +739,7 @@ namespace CNTK
|
|||
///
|
||||
/// Static Axis object representing the batch axis.
|
||||
///
|
||||
CNTK_API static const Axis& BatchAxis();
|
||||
|
||||
///
|
||||
/// Special Axis object denoting all the axes of the Value object in whose context it is used.
|
||||
///
|
||||
CNTK_API static const Axis& AllAxes();
|
||||
CNTK_API static const Axis& DefaultBatchAxis();
|
||||
|
||||
///
|
||||
/// Name of 'this' axis
|
||||
|
@ -753,7 +773,20 @@ namespace CNTK
|
|||
{
|
||||
return !(first == second);
|
||||
}
|
||||
}
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<CNTK::Axis>
|
||||
{
|
||||
size_t operator()(const CNTK::Axis& x) const
|
||||
{
|
||||
return std::hash<std::wstring>()(x.Name());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
///
|
||||
/// Enumeration type denoting the kind of a symbolic Variable object
|
||||
///
|
||||
|
@ -780,47 +813,76 @@ namespace CNTK
|
|||
template <typename T>
|
||||
friend struct std::hash;
|
||||
|
||||
CNTK_API static const std::vector<Axis> s_defaultInputVariableDynamicAxes;
|
||||
public:
|
||||
///
|
||||
/// Create an 'Input' Variable.
|
||||
///
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType)
|
||||
: Variable(shape, dataType, L"")
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, dataType, L"", dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable.
|
||||
///
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name)
|
||||
: Variable(shape, dataType, std::wstring(name))
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, dataType, std::wstring(name), dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable.
|
||||
///
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name)
|
||||
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, false, name)
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, false, dataType, name, dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable denoting sparse data.
|
||||
///
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name = L"")
|
||||
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, false, { Axis::DefaultDynamicAxis() }, isSparse, name)
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, isSparse, dataType, false, L"", dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable denoting sparse data.
|
||||
///
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, isSparse, dataType, std::wstring(name), dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable denoting sparse data.
|
||||
///
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, isSparse, dataType, false, name, dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable and specify if gradients are to be computed for this input
|
||||
///
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"")
|
||||
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, false, name)
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const wchar_t* name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, dataType, needsGradient, std::wstring(name), dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable and specify if gradients are to be computed for this input
|
||||
///
|
||||
Variable(const NDShape& shape, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, false, dataType, needsGradient, name, dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
|
||||
///
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name = L"")
|
||||
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, { Axis::DefaultDynamicAxis() }, isSparse, name)
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, isSparse, dataType, needsGradient, L"", dynamicAxes)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create an 'Input' Variable denoting sparse data and specify if gradients are to be computed for this input
|
||||
///
|
||||
Variable(const NDShape& shape, bool isSparse, CNTK::DataType dataType, bool needsGradient, const std::wstring& name, const std::vector<Axis>& dynamicAxes = s_defaultInputVariableDynamicAxes)
|
||||
: Variable(shape, VariableKind::Input, dataType, nullptr, nullptr, needsGradient, dynamicAxes, isSparse, name)
|
||||
{}
|
||||
|
||||
///
|
||||
|
@ -860,7 +922,7 @@ namespace CNTK
|
|||
///
|
||||
/// Returns a boolean value indicating if 'this' variable denotes sparse data
|
||||
///
|
||||
bool IsSparse() const { return (m_dataFields->m_isSparse); }
|
||||
bool IsSparse() const { return m_dataFields->m_isSparse; }
|
||||
|
||||
///
|
||||
/// Returns a boolean value indicating if 'this' variable is an Input
|
||||
|
@ -941,6 +1003,14 @@ namespace CNTK
|
|||
VariableFields(const NDShape& shape, VariableKind varType, CNTK::DataType type, Function* ownerFunction, const NDArrayViewPtr& value, bool needsGradient, const std::vector<Axis>& dynamicAxes, bool isSparse, const std::wstring& name)
|
||||
: m_shape(shape), m_varKind(varType), m_dataType(type), m_ownerFunction(ownerFunction), m_value(value), m_needsGradient(needsGradient), m_dynamicAxes(dynamicAxes), m_isSparse(isSparse), m_name(name)
|
||||
{
|
||||
// Validate that each of the dynamic axes are unique
|
||||
std::unordered_set<Axis> uniqueDynamicAxis;
|
||||
for (auto& currentDynamicAxis : dynamicAxes)
|
||||
{
|
||||
auto retVal = uniqueDynamicAxis.insert(currentDynamicAxis);
|
||||
if (!retVal.second)
|
||||
InvalidArgument("Dynamic axis named %S is specified more than once for Variable object", currentDynamicAxis.Name().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
@ -1079,7 +1149,7 @@ namespace CNTK
|
|||
/// Contruct a Placeholder with the specified NDShape
|
||||
///
|
||||
explicit Placeholder(const NDShape& shape, const std::wstring& name = L"")
|
||||
: Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, {Axis::DefaultDynamicAxis()}, name)
|
||||
: Variable(shape, VariableKind::Placeholder, DataType::Unknown, nullptr, false, { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() }, name)
|
||||
{}
|
||||
|
||||
///
|
||||
|
@ -1097,13 +1167,15 @@ namespace CNTK
|
|||
}
|
||||
|
||||
namespace std {
|
||||
template <> struct hash<CNTK::Axis>
|
||||
|
||||
template <> struct hash<CNTK::NDShape>
|
||||
{
|
||||
size_t operator()(const CNTK::Axis& x) const
|
||||
size_t operator()(const CNTK::NDShape& x) const
|
||||
{
|
||||
return std::hash<std::wstring>()(x.Name());
|
||||
return std::hash<std::wstring>()(x.AsString());
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <> struct hash<CNTK::Variable>
|
||||
{
|
||||
|
@ -1441,6 +1513,21 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API FunctionPtr Softmax(const Variable& operand, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in hardmax operation on specified tensor input operand
|
||||
///
|
||||
CNTK_API FunctionPtr Hardmax(const Variable& operand, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in transpose dimensions operation on specified tensor input operand
|
||||
///
|
||||
CNTK_API FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the slice operation on specified tensor input operand
|
||||
///
|
||||
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in elementwise tensor addition operation with the specified input operands.
|
||||
///
|
||||
|
@ -1497,6 +1584,13 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API FunctionPtr Times(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in matrix multiplication operation with the transpose of the left input operand
|
||||
/// and the specified right operand. Only accepts left operands of ranks 1 or 2.
|
||||
/// TODO: Specify the constraints on the shapes of the operands.
|
||||
///
|
||||
CNTK_API FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes = 1, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in operation to compute squared-error for specified input operands.
|
||||
///
|
||||
|
@ -1518,7 +1612,6 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name = L"");
|
||||
|
||||
//CNTK_API FunctionPtr PastValue(const Variable& initialState, const Variable& operand, Axis axis, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in operation for getting the future value along the lone dynamic axis of the specified operand.
|
||||
|
@ -1532,6 +1625,16 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in sum reduction operation on specified tensor input operand along the specified axis
|
||||
///
|
||||
CNTK_API FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in LogSum reduction operation on specified tensor input operand along the specified axis
|
||||
///
|
||||
CNTK_API FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name = L"");
|
||||
|
||||
///
|
||||
/// Per dimension mean-variance normalization of the specified input operand.
|
||||
///
|
||||
|
@ -1630,6 +1733,7 @@ namespace CNTK
|
|||
NDShape,
|
||||
Vector,
|
||||
Dictionary,
|
||||
NDArrayView,
|
||||
};
|
||||
|
||||
static const char* TypeName(Type type)
|
||||
|
@ -1654,6 +1758,8 @@ namespace CNTK
|
|||
return "Vector";
|
||||
case Type::Dictionary:
|
||||
return "Dictionary";
|
||||
case Type::NDArrayView:
|
||||
return "NDArrayView";
|
||||
default:
|
||||
LogicError("Unknown DictionaryValue::Type");
|
||||
}
|
||||
|
@ -1687,13 +1793,21 @@ namespace CNTK
|
|||
DictionaryValue(const wchar_t* value)
|
||||
: DictionaryValue(std::wstring(value))
|
||||
{}
|
||||
|
||||
// Due to SWIG we had to flatten this template for vector<DictionaryValue>
|
||||
DictionaryValue(const std::vector<CNTK::DictionaryValue>& value) : m_valueType(GetValueType<std::vector<CNTK::DictionaryValue>>())
|
||||
{
|
||||
AllocateDataPtr(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
DictionaryValue(const T& value) : m_valueType(GetValueType<T>())
|
||||
{
|
||||
static_assert(std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value,
|
||||
static_assert((std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value),
|
||||
"Unsupported ValueType");
|
||||
|
||||
AllocateDataPtr(value);
|
||||
|
@ -1706,6 +1820,12 @@ namespace CNTK
|
|||
*this = other;
|
||||
}
|
||||
|
||||
DictionaryValue(DictionaryValue&& other) : m_valueType(Type::Bool)
|
||||
{
|
||||
// The m_valueType must have been set to a non-ptr type to prevent an attempt to interpret
|
||||
// the underlying underlying uninitialized value as a ptr and free it.
|
||||
*this = std::move(other);
|
||||
}
|
||||
DictionaryValue& operator=(const DictionaryValue& other)
|
||||
{
|
||||
if (this != &other)
|
||||
|
@ -1723,11 +1843,33 @@ namespace CNTK
|
|||
AllocateDataPtr(other.GetValue<std::vector<DictionaryValue>>());
|
||||
else if (other.m_valueType == Type::Dictionary)
|
||||
AllocateDataPtr(other.GetValue<Dictionary>());
|
||||
else if (other.m_valueType == Type::NDArrayView)
|
||||
AllocateDataPtr(other.GetValue<NDArrayView>());
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
DictionaryValue& operator=(DictionaryValue&& other)
|
||||
{
|
||||
FreeDataPtr();
|
||||
|
||||
m_valueType = other.m_valueType;
|
||||
m_data = other.m_data;
|
||||
|
||||
if (other.m_valueType == Type::String ||
|
||||
other.m_valueType == Type::NDShape ||
|
||||
other.m_valueType == Type::Vector ||
|
||||
other.m_valueType == Type::Dictionary ||
|
||||
other.m_valueType == Type::NDArrayView)
|
||||
{
|
||||
other.m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
other.m_valueType = Type::None;
|
||||
|
||||
return *this;
|
||||
}
|
||||
~DictionaryValue()
|
||||
{
|
||||
FreeDataPtr();
|
||||
|
@ -1764,7 +1906,8 @@ namespace CNTK
|
|||
template <typename T, typename std::enable_if<std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value>::type* = nullptr>
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value>::type* = nullptr>
|
||||
const T& GetValue() const
|
||||
{
|
||||
VerifyType<T>();
|
||||
|
@ -1781,21 +1924,25 @@ namespace CNTK
|
|||
return m_valueType;
|
||||
}
|
||||
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us);
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us);
|
||||
CNTK_API bool operator==(const DictionaryValue& other) const;
|
||||
CNTK_API bool operator!=(const DictionaryValue& other) const;
|
||||
|
||||
friend CNTK_API std::istream& operator>>(std::istream& stream, DictionaryValue& us);
|
||||
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const DictionaryValue& us);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
static Type GetValueType()
|
||||
{
|
||||
static_assert(std::is_same<T, bool>::value ||
|
||||
static_assert((std::is_same<T, bool>::value ||
|
||||
std::is_same<T, size_t>::value ||
|
||||
std::is_same<T, float>::value ||
|
||||
std::is_same<T, double>::value ||
|
||||
std::is_same<T, std::wstring>::value ||
|
||||
std::is_same<T, NDShape>::value ||
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value,
|
||||
std::is_same<T, std::vector<DictionaryValue>>::value ||
|
||||
std::is_same<T, Dictionary>::value ||
|
||||
std::is_same<T, NDArrayView>::value),
|
||||
"Unsupported ValueType");
|
||||
|
||||
if (std::is_same<T, bool>::value) return Type::Bool;
|
||||
|
@ -1806,6 +1953,7 @@ namespace CNTK
|
|||
if (std::is_same<T, NDShape>::value) return Type::NDShape;
|
||||
if (std::is_same<T, std::vector<DictionaryValue>>::value) return Type::Vector;
|
||||
if (std::is_same<T, Dictionary>::value) return Type::Dictionary;
|
||||
if (std::is_same<T, NDArrayView>::value) return Type::NDArrayView;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -1831,6 +1979,8 @@ namespace CNTK
|
|||
FreePtrAsType<std::vector<DictionaryValue>>();
|
||||
else if (m_valueType == Type::Dictionary)
|
||||
FreePtrAsType<Dictionary>();
|
||||
else if (m_valueType == Type::Dictionary)
|
||||
FreePtrAsType<NDArrayView>();
|
||||
}
|
||||
|
||||
Type m_valueType;
|
||||
|
@ -1884,9 +2034,11 @@ namespace CNTK
|
|||
return Contains(key.c_str());
|
||||
}
|
||||
|
||||
CNTK_API bool operator==(const Dictionary& other) const;
|
||||
CNTK_API bool operator!=(const Dictionary& other) const;
|
||||
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us);
|
||||
friend CNTK_API Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us);
|
||||
friend CNTK_API std::istream& operator>>(std::istream& stream, Dictionary& us);
|
||||
friend CNTK_API std::ostream& operator<<(std::ostream& stream, const Dictionary& us);
|
||||
|
||||
private:
|
||||
std::shared_ptr<std::unordered_map<std::wstring, DictionaryValue>> m_dictionaryData;
|
||||
|
@ -1924,6 +2076,9 @@ namespace CNTK
|
|||
///
|
||||
CNTK_API virtual void RestoreFromCheckpoint(const Dictionary& /*checkpoint*/) {}
|
||||
|
||||
///
|
||||
/// Destruct this Learner.
|
||||
///
|
||||
virtual ~Learner() {}
|
||||
|
||||
protected:
|
||||
|
@ -1935,37 +2090,127 @@ namespace CNTK
|
|||
|
||||
};
|
||||
|
||||
///
|
||||
/// A collection of key-value pairs that represents training parameter schedule in
|
||||
/// terms of the number of processed samples.
|
||||
/// This class provides a number of convenience constructors to allow easy conversion
|
||||
/// from a single value, a vector of values and a list of pairs to the training schedule.
|
||||
///
|
||||
template <typename T>
|
||||
class TrainingParameterSchedule
|
||||
{
|
||||
public:
|
||||
///
|
||||
/// Create a schedule with a constant parameter value.
|
||||
///
|
||||
TrainingParameterSchedule(T value)
|
||||
: m_schedule({ std::make_pair(0, value) }), m_unit(1)
|
||||
{}
|
||||
|
||||
///
|
||||
/// Create a schedule where the parameter changes its value every 'unit' samples:
|
||||
/// schedule[0] is used for the first 'unit' samples, schedule[1] -- for the second,
|
||||
/// and so on. The last value is then used repeatedly until the end of training.
|
||||
///
|
||||
TrainingParameterSchedule(const std::vector<T>& schedule, size_t unit = 1)
|
||||
: m_unit(unit)
|
||||
{
|
||||
// TODO: 0 will be used to mean "the entire sweep"
|
||||
if (unit == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
|
||||
|
||||
if (schedule.size() == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
|
||||
|
||||
size_t i = 1;
|
||||
for (const auto& value : schedule)
|
||||
{
|
||||
m_schedule[m_unit * i++] = value;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Create a schedule using the list of key-value pairs, where the key specifies
|
||||
/// the number of 'units' the parameter should maintain the corresponding value.
|
||||
/// The value from the last pair is used repeatedly until the end of training.
|
||||
/// For example, {{1, 0.05}, {2, 0.1}, {1, 0.005}} and unit = 100, corresponds to
|
||||
/// a schedule where the value of '0.05' is used for the first 100 samples, then
|
||||
/// '0.1' is used for the second 200 samples, after which the values is switched
|
||||
/// to '0.005'.
|
||||
///
|
||||
TrainingParameterSchedule(const std::initializer_list<std::pair<const size_t, T>>& schedule, size_t unit = 1)
|
||||
: m_unit(unit)
|
||||
{
|
||||
// TODO: 0 will be used to mean "the entire sweep"
|
||||
if (unit == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : 'unit' cannot be 0.");
|
||||
|
||||
if (schedule.size() == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : schedule is empty.");
|
||||
|
||||
size_t i = 0;
|
||||
for (const auto& it : schedule)
|
||||
{
|
||||
if (it.first == 0)
|
||||
RuntimeError("TrainingParameterSchedule::constructor : unit count cannot be 0.");
|
||||
|
||||
i += it.first;
|
||||
m_schedule[m_unit * i] = it.second;
|
||||
}
|
||||
}
|
||||
|
||||
///
|
||||
/// Returns a value corresponding to the absolute sample count from the beginning of training.
|
||||
///
|
||||
CNTK_API const T& operator[](size_t samleCount) const;
|
||||
|
||||
private:
|
||||
std::map<size_t, T> m_schedule;
|
||||
size_t m_unit;
|
||||
};
|
||||
|
||||
typedef TrainingParameterSchedule<double> LearningRatesPerSample;
|
||||
typedef TrainingParameterSchedule<double> MomentumsPerSample;
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in SGD learner.
|
||||
///
|
||||
/// TODO: add additional SGD parameters here (a collection of learning rate values)
|
||||
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters, double learningRatePerSample);
|
||||
CNTK_API LearnerPtr SGDLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in Momentum SGD learner.
|
||||
///
|
||||
/// TODO: add additional Momentum parameters here (a collection of momentum rate values)
|
||||
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr MomentumSGDLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in Nesterov's accelerated SGD learner.
|
||||
///
|
||||
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr NesterovLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in AdaGrad learner.
|
||||
///
|
||||
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier = true);
|
||||
CNTK_API LearnerPtr AdaGradLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier = true);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in FSAdaGrad (improved AdaGrad) learner.
|
||||
///
|
||||
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters);
|
||||
CNTK_API LearnerPtr FSAdaGradLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
///
|
||||
/// Create an instance of the CNTK built-in RMSProp learner.
|
||||
///
|
||||
CNTK_API LearnerPtr RMSPropLearner(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
double gamma,
|
||||
double inc,
|
||||
double dec,
|
||||
|
@ -1975,7 +2220,7 @@ namespace CNTK
|
|||
|
||||
///
|
||||
/// Trainer is the top-level abstraction responsible for the orchestration of the training of a model
|
||||
/// using the specified learners and training data either explicilty supplied as Value objects or from
|
||||
/// using the specified learners and training data either explicitly supplied as Value objects or from
|
||||
/// a MinibatchSource object.
|
||||
///
|
||||
class Trainer
|
||||
|
@ -2063,7 +2308,7 @@ namespace CNTK
|
|||
};
|
||||
|
||||
///
|
||||
/// Abstraction for generating minbatches of samples for training/evaluation.
|
||||
/// Abstraction for generating minibatches of samples for training/evaluation.
|
||||
///
|
||||
class MinibatchSource : public std::enable_shared_from_this<MinibatchSource>
|
||||
{
|
||||
|
@ -2079,10 +2324,14 @@ namespace CNTK
|
|||
/// #samples or both. In case the size is specified in terms of both #sequences and #samples, the smaller of the 2 is taken. The actual
|
||||
/// returned size of the minibatch is the min across all streams. Also the requested MB size fields in the maps are updated by the
|
||||
/// MinibatchSource to contain the actual #sequences and #samples in the returned minibatch for the corresponding stream.
|
||||
/// The return value indciates if the MinibatchSource will return any further data in subsequent calls of this function.
|
||||
/// The return value indicates if the MinibatchSource will return any further data in subsequent calls of this function.
|
||||
///
|
||||
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) = 0;
|
||||
///
|
||||
/// Destruct this MinibatchSource.
|
||||
///
|
||||
virtual ~MinibatchSource() {}
|
||||
|
||||
// TODO: Methods to save and restore from checkpoints
|
||||
|
||||
|
|
|
@ -7,6 +7,12 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#ifdef SWIG
|
||||
#define final
|
||||
#define explicit
|
||||
#define static_assert(condition, message)
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef CNTKV2LIBRARYDLL
|
||||
#define CNTK_API __declspec(dllexport)
|
||||
|
@ -47,8 +53,6 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
|
||||
template <typename ElementType>
|
||||
class ComputationNode;
|
||||
|
||||
class File;
|
||||
}}}
|
||||
|
||||
// TODO: The following should be reconciled with the equivalent code in the CNTK implementation
|
||||
|
@ -133,7 +137,7 @@ namespace CNTK
|
|||
#define NOT_IMPLEMENTED \
|
||||
{ \
|
||||
fprintf(stderr, "Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
CNTK::LogicError("Inside File: %s Line: %d Function: %s -> Feature Not Implemented.\n", __FILE__, __LINE__, __FUNCTION__); \
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -144,6 +148,7 @@ namespace CNTK
|
|||
class CompositeFunction;
|
||||
class Function;
|
||||
class Variable;
|
||||
class Axis;
|
||||
|
||||
// Similar to make_shared except that it associates a custom deleter with the shared_ptr to ensure
|
||||
// that objects are deleted on the same side of the library DLL where they are allocated
|
||||
|
@ -174,4 +179,15 @@ namespace CNTK
|
|||
|
||||
class MinibatchSource;
|
||||
typedef std::shared_ptr<MinibatchSource> MinibatchSourcePtr;
|
||||
|
||||
namespace Internal
|
||||
{
|
||||
CNTK_API FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name = L"");
|
||||
CNTK_API FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name = L"");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
#include "RecurrentNodes.h"
|
||||
#include "EvaluationNodes.h"
|
||||
#include "TrainingNodes.h"
|
||||
#include "ReshapingNodes.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
|
@ -32,6 +33,7 @@ namespace CNTK
|
|||
|
||||
Variable var;
|
||||
NDShape varShape = AsNDShape(node->GetSampleLayout());
|
||||
|
||||
// The CNTK sample layouts may have trailing axes with dimension size of 1 which are automatically
|
||||
// added when converting from NDShape to CNTK internal TensorShapes and are not present in the original
|
||||
// shapes specified by the user. These should be truncated.
|
||||
|
@ -57,11 +59,10 @@ namespace CNTK
|
|||
if (node->HasMBLayout())
|
||||
{
|
||||
// TODO: Currently only default dynamic axis is supported
|
||||
const std::wstring defaultCNTKDynamicAxisName = L"";
|
||||
if (inputNode->GetRequestedDynamicAxis() != defaultCNTKDynamicAxisName)
|
||||
LogicError("Found dynamic axis named '%S' while currently only default dynamic axis named '%S' is supported!", node->GetMBLayout()->GetAxisName(), defaultCNTKDynamicAxisName.c_str());
|
||||
auto inputNodeInternalDynamicAxisName = inputNode->GetRequestedDynamicAxis();
|
||||
std::vector<Axis> inputVarDynamicAxes = DynamicAxesFromInternalDynamicAxisName(inputNodeInternalDynamicAxisName);
|
||||
|
||||
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName());
|
||||
var = Variable(varShape, isSparse, AsDataType<ElementType>(), node->GetLearningRateMultiplier() != 0, node->GetName(), inputVarDynamicAxes);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -121,6 +122,40 @@ namespace CNTK
|
|||
opType = PrimitiveOpType::Reciprocal;
|
||||
else if (node->OperationName() == OperationNameOf(SoftmaxNode))
|
||||
opType = PrimitiveOpType::Softmax;
|
||||
else if (node->OperationName() == OperationNameOf(HardmaxNode))
|
||||
opType = PrimitiveOpType::Hardmax;
|
||||
else if (node->OperationName() == OperationNameOf(TransposeDimensionsNode))
|
||||
{
|
||||
auto transposeDimensionsNode = node->As<TransposeDimensionsNode<ElementType>>();
|
||||
primitiveFunctionConfigParameters[L"axis1"] = (size_t)transposeDimensionsNode->Axis1();
|
||||
primitiveFunctionConfigParameters[L"axis2"] = (size_t)transposeDimensionsNode->Axis2();
|
||||
|
||||
opType = PrimitiveOpType::TransposeAxes;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(WhereNode))
|
||||
{
|
||||
auto whereNode = node->As<WhereNode<ElementType>>();
|
||||
auto internalDynamicAxisName = whereNode->DynamicAxisName();
|
||||
std::vector<Axis> dynamicAxes = DynamicAxesFromInternalDynamicAxisName(internalDynamicAxisName);
|
||||
std::vector<std::wstring> dynamicAxesNames;
|
||||
for (auto axis : dynamicAxes)
|
||||
dynamicAxesNames.push_back(axis.Name());
|
||||
|
||||
primitiveFunctionConfigParameters[L"newDynamicAxes"] = AsDictionaryValueVector(dynamicAxesNames);
|
||||
|
||||
opType = PrimitiveOpType::Where;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(SliceNode))
|
||||
{
|
||||
auto sliceNode = node->As<SliceNode<ElementType>>();
|
||||
primitiveFunctionConfigParameters[L"axis"] = Axis(sliceNode->Axis() - 1).Name();
|
||||
primitiveFunctionConfigParameters[L"beginIndex"] = sliceNode->BeginIndex();
|
||||
primitiveFunctionConfigParameters[L"endIndex"] = sliceNode->EndIndex();
|
||||
|
||||
opType = PrimitiveOpType::Slice;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(SumElementsNode))
|
||||
opType = PrimitiveOpType::SumAll;
|
||||
else if (node->OperationName() == OperationNameOf(PlusNode))
|
||||
opType = PrimitiveOpType::Plus;
|
||||
else if (node->OperationName() == OperationNameOf(MinusNode))
|
||||
|
@ -139,11 +174,23 @@ namespace CNTK
|
|||
opType = PrimitiveOpType::Greater;
|
||||
else if (node->OperationName() == OperationNameOf(GreaterEqualNode))
|
||||
opType = PrimitiveOpType::GreaterEqual;
|
||||
else if (node->OperationName() == OperationNameOf(PackedIndexNode))
|
||||
opType = PrimitiveOpType::PackedIndex;
|
||||
else if (node->OperationName() == OperationNameOf(GatherPackedNode))
|
||||
{
|
||||
std::swap(inputVars[0], inputVars[1]);
|
||||
opType = PrimitiveOpType::GatherPacked;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(TimesNode))
|
||||
{
|
||||
primitiveFunctionConfigParameters[L"numOutputAxes"] = DictionaryValue((size_t)node->As<TimesNode<ElementType>>()->OutputRank());
|
||||
primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TimesNode<ElementType>>()->OutputRank();
|
||||
opType = PrimitiveOpType::Times;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(TransposeTimesNode))
|
||||
{
|
||||
primitiveFunctionConfigParameters[L"numOutputAxes"] = (size_t)node->As<TransposeTimesNode<ElementType>>()->OutputRank();
|
||||
opType = PrimitiveOpType::TransposeTimes;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(PastValueNode))
|
||||
{
|
||||
if (inputVars.size() == 1)
|
||||
|
@ -151,7 +198,7 @@ namespace CNTK
|
|||
auto initialStateVar = Constant({}, node->As<PastValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
|
||||
inputVars.insert(inputVars.begin(), initialStateVar);
|
||||
}
|
||||
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<PastValueNode<ElementType>>()->TimeStep());
|
||||
primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<PastValueNode<ElementType>>()->TimeStep();
|
||||
opType = PrimitiveOpType::PastValue;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(FutureValueNode))
|
||||
|
@ -161,7 +208,7 @@ namespace CNTK
|
|||
auto initialStateVar = Constant({}, node->As<FutureValueNode<ElementType>>()->InitialActivationValue(), AsDeviceDescriptor(node->GetDeviceId()));
|
||||
inputVars.insert(inputVars.begin(), initialStateVar);
|
||||
}
|
||||
primitiveFunctionConfigParameters[L"stepSize"] = DictionaryValue((size_t)node->As<FutureValueNode<ElementType>>()->TimeStep());
|
||||
primitiveFunctionConfigParameters[L"stepSize"] = (size_t)node->As<FutureValueNode<ElementType>>()->TimeStep();
|
||||
opType = PrimitiveOpType::FutureValue;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(SquareErrorNode))
|
||||
|
@ -176,8 +223,14 @@ namespace CNTK
|
|||
std::swap(inputVars[0], inputVars[1]);
|
||||
opType = PrimitiveOpType::ClassificationError;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(SumElementsNode))
|
||||
opType = PrimitiveOpType::ReduceSum;
|
||||
else if (node->OperationName() == OperationNameOf(ReduceElementsNode))
|
||||
{
|
||||
auto reduceElementsNode = node->As<ReduceElementsNode<ElementType>>();
|
||||
primitiveFunctionConfigParameters[L"CNTKInternalReductionAxisIndex"] = (size_t)reduceElementsNode->ReductionAxis();
|
||||
primitiveFunctionConfigParameters[L"ReductionOpName"] = reduceElementsNode->ReductionOpName();
|
||||
|
||||
opType = PrimitiveOpType::ReduceElements;
|
||||
}
|
||||
else if (node->OperationName() == OperationNameOf(ConvolutionNode))
|
||||
{
|
||||
auto convolutionNode = node->As<ConvolutionNode<ElementType>>();
|
||||
|
|
|
@ -14,21 +14,17 @@ namespace CNTK
|
|||
return GPUDevice(0);
|
||||
}
|
||||
|
||||
/*static*/ const std::wstring Axis::s_staticAxisNamePrefix = L"staticAxis_";
|
||||
|
||||
/*static*/ const Axis& Axis::DefaultDynamicAxis()
|
||||
{
|
||||
static Axis s_defaultDynamicAxis(L"defaultDynamicAxis");
|
||||
return s_defaultDynamicAxis;
|
||||
}
|
||||
|
||||
/*static*/ const Axis& Axis::BatchAxis()
|
||||
/*static*/ const Axis& Axis::DefaultBatchAxis()
|
||||
{
|
||||
static Axis s_batchAxis(L"batchAxis");
|
||||
static Axis s_batchAxis(L"defaultBatchAxis");
|
||||
return s_batchAxis;
|
||||
}
|
||||
|
||||
/*static*/ const Axis& Axis::AllAxes()
|
||||
{
|
||||
static Axis s_allAxes(L"allAxes");
|
||||
return s_allAxes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -10,6 +10,10 @@
|
|||
#include "Utils.h"
|
||||
#include "ComputationNode.h"
|
||||
#include "ReshapingNodes.h"
|
||||
#include "EvaluationNodes.h"
|
||||
#include "TrainingNodes.h"
|
||||
#include "LinearAlgebraNodes.h"
|
||||
#include "InputAndParamNodes.h"
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
|
||||
|
@ -72,6 +76,17 @@ namespace CNTK
|
|||
}
|
||||
}
|
||||
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalSumReductionOpName = L"Sum";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalLogSumReductionOpName = L"LogSum";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalMeanReductionOpName = L"Mean";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalMaxReductionOpName = L"Max";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalMinReductionOpName = L"Min";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalAllReductionOpName = L"All";
|
||||
/*static*/ const std::wstring PrimitiveFunction::InternalAnyReductionOpName = L"Any";
|
||||
|
||||
/*static*/ std::wstring CompositeFunction::s_internalDefaultDynamicAxisName = L"";
|
||||
/*static*/ std::wstring CompositeFunction::s_internalNoSequenceAxisName = L"noSequenceAxis";
|
||||
|
||||
// Replace any PlaceHolder Variables in the graph of Functions underlying 'this' CompositeFunction. All PlaceHolder variables
|
||||
// should have been replaced before performing any Forward compute of 'this' Function.
|
||||
/*virtual*/ void CompositeFunction::ReplacePlaceholders(const std::unordered_map<Placeholder, Variable>& placeholderReplacements,
|
||||
|
@ -122,22 +137,46 @@ namespace CNTK
|
|||
computationNodePtr->SetLearningRateMultiplier(0.0);
|
||||
|
||||
NDArrayViewPtr value = variable.IsConstant() ? Constant(variable).Value() : Parameter(variable).Value();
|
||||
auto matrix = variable.IsConstant() ? value->GetMatrix<ElementType>()->AsReference() : value->GetWritableMatrix<ElementType>()->AsReference();
|
||||
computationNodePtr->Value() = std::move(matrix);
|
||||
std::shared_ptr<const Matrix<ElementType>> valueMatrix = variable.IsConstant() ? value->GetMatrix<ElementType>() : value->GetWritableMatrix<ElementType>();
|
||||
if (variable.IsParameter() || (valueMatrix->GetDeviceId() == network->GetDeviceId()))
|
||||
computationNodePtr->Value() = valueMatrix->AsReference();
|
||||
else
|
||||
{
|
||||
Matrix<ElementType> clonedMatrix(valueMatrix->GetNumRows(), valueMatrix->GetNumCols(), network->GetDeviceId(), valueMatrix->GetMatrixType(), valueMatrix->GetFormat());
|
||||
clonedMatrix.AssignValuesOf(*valueMatrix);
|
||||
computationNodePtr->Value() = std::move(clonedMatrix);
|
||||
}
|
||||
}
|
||||
else if (variable.IsInput())
|
||||
{
|
||||
// TODO: Support inputs with > 1 dynamic axes
|
||||
if (variable.DynamicAxes().size() != 1)
|
||||
LogicError("Currently only Input variables with one dynamic axis are supported");
|
||||
// TODO: Input variables currently are required to have the default batch axis
|
||||
auto dynamicAxes = variable.DynamicAxes();
|
||||
auto foundDefaultBatchAxis = std::find(dynamicAxes.begin(), dynamicAxes.end(), Axis::DefaultBatchAxis());
|
||||
if (foundDefaultBatchAxis == dynamicAxes.end())
|
||||
LogicError("Currently Input Variables are required to have the DefaultBatchAxis as one of their dynamic axes");
|
||||
|
||||
auto dynamicAxis = variable.DynamicAxes()[0];
|
||||
if (dynamicAxis != Axis::DefaultDynamicAxis())
|
||||
LogicError("Currently only Input variables with DefaultDynamicAxis are supported");
|
||||
if (IsSparseInput(variable))
|
||||
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()));
|
||||
if (dynamicAxes.back() != Axis::DefaultBatchAxis())
|
||||
LogicError("Currently Input Variables are required to have the DefaultBatchAxis as their last dynamic axes");
|
||||
|
||||
// TODO: Support inputs with > 1 dynamic axes
|
||||
if ((dynamicAxes.size() < 1) || (dynamicAxes.size() > 2))
|
||||
LogicError("Currently only Input variables with 1 or 2 dynamic axis are supported");
|
||||
|
||||
std::wstring internalDynamicAxisName;
|
||||
if (dynamicAxes.size() == 1)
|
||||
internalDynamicAxisName = s_internalNoSequenceAxisName;
|
||||
else if (dynamicAxes[0] == Axis::DefaultDynamicAxis())
|
||||
internalDynamicAxisName = s_internalDefaultDynamicAxisName;
|
||||
else
|
||||
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()));
|
||||
internalDynamicAxisName = dynamicAxes[0].Name();
|
||||
|
||||
if (!internalDynamicAxisName.empty())
|
||||
network->AddNodeToNetAndAttachInputs(New<DynamicAxisNode<ElementType>>(network->GetDeviceId(), internalDynamicAxisName), {});
|
||||
|
||||
if (IsSparseInput(variable))
|
||||
computationNodePtr = builder.CreateSparseInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
|
||||
else
|
||||
computationNodePtr = builder.CreateInputNode(variable.Name(), AsTensorShape(variable.Shape()), internalDynamicAxisName);
|
||||
|
||||
if (variable.NeedsGradient())
|
||||
{
|
||||
|
@ -219,11 +258,29 @@ namespace CNTK
|
|||
computationNodePtr = builder.Reciprocal(input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::Softmax:
|
||||
if (functionInputs[0].Shape().NumAxes() > 1)
|
||||
InvalidArgument("Softmax operation can only be applied to a 1D input");
|
||||
|
||||
computationNodePtr = builder.Softmax(input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::Hardmax:
|
||||
computationNodePtr = builder.Hardmax(input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::TransposeAxes:
|
||||
{
|
||||
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
|
||||
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
|
||||
|
||||
// The axis ids passed to the internal CNTK TransposeDimensionsNode are 1 based instead of 0 based
|
||||
computationNodePtr = New<TransposeDimensionsNode<ElementType>>(network->GetDeviceId(), function->Name(), (int)(axis1.StaticAxisIndex() + 1), (int)(axis2.StaticAxisIndex() + 1));
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Where:
|
||||
{
|
||||
auto dynamicAxes = variable.DynamicAxes();
|
||||
auto internalCNTKWhereNodeDynamicAxisName = (dynamicAxes == std::vector<Axis>({ Axis::DefaultBatchAxis() })) ? CompositeFunction::s_internalNoSequenceAxisName : dynamicAxes[0].Name();
|
||||
computationNodePtr = New<WhereNode<ElementType>>(network->GetDeviceId(), function->Name(), internalCNTKWhereNodeDynamicAxisName);
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Pooling:
|
||||
{
|
||||
PoolingType poolingType = (PoolingType)(functionConfig[L"poolingType"].GetValue<size_t>());
|
||||
|
@ -235,6 +292,9 @@ namespace CNTK
|
|||
computationNodePtr = builder.Pooling(input0Node, AsCNTKPoolKind(poolingType), AsTensorShape(poolingWindowsShape, true), AsTensorShape(strides, true), autoPadding, AsTensorShape(lowerPad, true), AsTensorShape(upperPad, true), ImageLayoutKind::CHW, function->Name());
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::SumAll:
|
||||
computationNodePtr = builder.Sum(input0Node, function->Name());
|
||||
break;
|
||||
case PrimitiveOpType::Plus:
|
||||
computationNodePtr = builder.Plus(input0Node, input1Node, function->Name());
|
||||
break;
|
||||
|
@ -268,6 +328,12 @@ namespace CNTK
|
|||
computationNodePtr = builder.Times(input0Node, input1Node, numOutputAxes, function->Name());
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::TransposeTimes:
|
||||
{
|
||||
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
|
||||
computationNodePtr = network->AddNodeToNetAndAttachInputs(New<TransposeTimesNode<ElementType>>(network->GetDeviceId(), function->Name(), numOutputAxes), { input0Node, input1Node });
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Convolution:
|
||||
{
|
||||
NDShape outputMapCount, kernelShape;
|
||||
|
@ -296,35 +362,25 @@ namespace CNTK
|
|||
{
|
||||
Variable initialStateVar = functionInputs[0];
|
||||
Variable inputOperandVar = functionInputs[1];
|
||||
// TODO: Current we only support a scalar initial state
|
||||
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
|
||||
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
|
||||
|
||||
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
|
||||
if (inputOperandVar.Shape().NumAxes() != 1)
|
||||
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 static axis");
|
||||
|
||||
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
|
||||
if (inputOperandVar.DynamicAxes().size() != 1)
|
||||
LogicError("Currently PastValue/FutureValue Function only supports input operand with 1 dynamic axis");
|
||||
|
||||
// Get the intial state of the PastValue/FutureValue operation
|
||||
ElementType initStateValue;
|
||||
NDArrayView tempView({}, &initStateValue, 1, DeviceDescriptor::CPUDevice());
|
||||
tempView.CopyFrom(*Constant(initialStateVar).Value());
|
||||
|
||||
size_t stepSize = primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>();
|
||||
if (op == PrimitiveOpType::PastValue)
|
||||
computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name());
|
||||
computationNodePtr = builder.PastValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
|
||||
else
|
||||
computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape()[0], primitiveFunction->FunctionConfig()[L"stepSize"].GetValue<size_t>(), function->Name());
|
||||
computationNodePtr = builder.FutureValue(input1Node, (float)initStateValue, inputOperandVar.Shape().TotalSize(), stepSize, function->Name());
|
||||
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::ReduceSum:
|
||||
case PrimitiveOpType::ReduceElements:
|
||||
{
|
||||
// TODO: Use the new ReduceElements node instead of the legacy SumElements node for reduction. Currently ReduceElements has incorrect MBLayout inference.
|
||||
//computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), L"Sum", 0), { input0Node });
|
||||
computationNodePtr = builder.Sum(input0Node, function->Name());
|
||||
auto CNTKInternalReductionAxisIndex = (int)functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
|
||||
auto reductionOpName = functionConfig[L"ReductionOpName"].GetValue<std::wstring>();
|
||||
computationNodePtr = network->AddNodeToNetAndAttachInputs(New<ReduceElementsNode<ElementType>>(network->GetDeviceId(), function->Name(), reductionOpName, CNTKInternalReductionAxisIndex), { input0Node });
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::BatchNormalization:
|
||||
|
@ -353,6 +409,25 @@ namespace CNTK
|
|||
computationNodePtr = variableToNodeMap[variable];
|
||||
|
||||
break;
|
||||
case PrimitiveOpType::PackedIndex:
|
||||
computationNodePtr = New<PackedIndexNode<ElementType>>(network->GetDeviceId(), function->Name());
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node, input1Node });
|
||||
break;
|
||||
case PrimitiveOpType::GatherPacked:
|
||||
computationNodePtr = New<GatherPackedNode<ElementType>>(network->GetDeviceId(), function->Name());
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input1Node, input0Node });
|
||||
break;
|
||||
case PrimitiveOpType::Slice:
|
||||
{
|
||||
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
|
||||
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
|
||||
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
|
||||
|
||||
// Internal CNTK SliceNode takes 1 based axis indices instead of 0 based
|
||||
computationNodePtr = New<SliceNode<ElementType>>(network->GetDeviceId(), function->Name(), beginIndex, endIndex, (int)(axis.StaticAxisIndex() + 1));
|
||||
network->AddNodeToNetAndAttachInputs(computationNodePtr, { input0Node });
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
|
||||
break;
|
||||
|
@ -486,11 +561,11 @@ namespace CNTK
|
|||
if (value->Data()->Shape().NumAxes() == var.Shape().NumAxes())
|
||||
return{ value->Data()->GetMatrix<ElementType>(), nullptr };
|
||||
|
||||
if (value->Data()->Shape().NumAxes() != (var.Shape().NumAxes() + var.DynamicAxes().size() + 1))
|
||||
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by 1 + number of dynamic axes");
|
||||
if (value->Data()->Shape().NumAxes() < (var.Shape().NumAxes() + var.DynamicAxes().size()))
|
||||
InvalidArgument("Value's number of axes should be larger than the Variable's number of axes by number of dynamic axes");
|
||||
|
||||
if (var.DynamicAxes().size() > 1)
|
||||
LogicError("More than one dynamic axis for a variable is currently unsupported");
|
||||
if (var.DynamicAxes().size() > 2)
|
||||
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
|
||||
|
||||
size_t maxNumTimeSteps = value->Data()->Shape()[var.Shape().NumAxes()];
|
||||
size_t numSequences = value->Data()->Shape()[var.Shape().NumAxes() + 1];
|
||||
|
@ -618,9 +693,9 @@ namespace CNTK
|
|||
sequenceLengths.push_back(sequenceInfo.GetNumTimeSteps());
|
||||
}
|
||||
|
||||
// Reshuffle to data to unpack and uninterleave the CNTK form data
|
||||
// Now generate the gather indices
|
||||
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId());
|
||||
// Reshuffle to data to unpack and uninterleave the CNTK form packed data
|
||||
// Now generate the scatter indices
|
||||
auto shuffledMatrixData = std::make_shared<Matrix<ElementType>>(matrix.GetNumRows(), maxNumTimeSteps * numSequences, matrix.GetDeviceId(), matrix.GetMatrixType(), matrix.GetFormat());
|
||||
|
||||
std::vector<size_t> sequencesShorterThanLongestSequence;
|
||||
for (size_t i = 0; i < numSequences; ++i)
|
||||
|
@ -659,15 +734,15 @@ namespace CNTK
|
|||
}
|
||||
|
||||
auto tensorView = new TensorView<ElementType>(shuffledMatrixData, AsTensorShape(valueDataShape));
|
||||
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), StorageFormat::Dense, valueDataShape, readOnly, tensorView);
|
||||
auto data = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), AsDeviceDescriptor(matrix.GetDeviceId()), AsStorageFormat(shuffledMatrixData->GetFormat()), valueDataShape, readOnly, tensorView);
|
||||
return MakeSharedObject<Value>(data, mask);
|
||||
}
|
||||
|
||||
template <typename ElementType>
|
||||
/*static*/ ValuePtr CompositeFunction::GetValueObjectFromCNTKImplMatrixAndMBLayout(Variable var, const Matrix<ElementType>& matrix, const MBLayoutPtr& layout, bool readOnly /*= true*/)
|
||||
{
|
||||
if (var.DynamicAxes().size() > 1)
|
||||
LogicError("More than one dynamic axis for a variable is currently unsupported");
|
||||
if (var.DynamicAxes().size() > 2)
|
||||
LogicError("More than 2 dynamic axis for a variable is currently unsupported");
|
||||
|
||||
if (AsDataType<ElementType>() != var.GetDataType())
|
||||
LogicError("The specified ElementType %s does not match the DataType %s", typeid(ElementType).name(), DataTypeName(var.GetDataType()));
|
||||
|
@ -732,7 +807,7 @@ namespace CNTK
|
|||
MBLayoutPtr layout = CNTKMatrixAndMBLayout.second;
|
||||
auto nodeLayout = computationNode->GetMBLayout();
|
||||
if (((layout == nullptr) != (nodeLayout == nullptr)) || ((layout != nullptr) && (*layout != *nodeLayout)))
|
||||
InvalidArgument("The layout of the specified gradient Value in incompatible with the layout of the corresponding Variable computed during Forward call");
|
||||
InvalidArgument("The layout of the specified gradient Value is incompatible with the layout of the corresponding Variable computed during Forward call");
|
||||
computationNode->As<ComputationNode<ElementType>>()->AssignGradient(*CNTKMatrixAndMBLayout.first);
|
||||
}
|
||||
|
||||
|
@ -814,12 +889,9 @@ namespace CNTK
|
|||
}
|
||||
|
||||
if (varValue == nullptr)
|
||||
{
|
||||
auto data = MakeSharedObject<NDArrayView>(var.GetDataType(), valueShape, AsDeviceDescriptor(computationNode->ValuePtr()->GetDeviceId()));
|
||||
auto mask = (nodeValue->Mask() != nullptr) ? MakeSharedObject<NDMask>(nodeValue->Mask()->Shape(), nodeValue->Mask()->Device()) : nullptr;
|
||||
varValue = MakeSharedObject<Value>(data, mask);
|
||||
}
|
||||
varValue->CopyFrom(*nodeValue);
|
||||
varValue = nodeValue->DeepClone();
|
||||
else
|
||||
varValue->CopyFrom(*nodeValue);
|
||||
}
|
||||
|
||||
void CompositeFunction::GetNetworkOutputs(std::unordered_map<Variable, ValuePtr>& outputs)
|
||||
|
@ -984,7 +1056,7 @@ namespace CNTK
|
|||
|
||||
FunctionPtr Round(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return Floor(Plus(operand, Constant(NDShape({}), 0.5f)), name);
|
||||
return Floor(Plus(operand, ScalarConstant(operand.GetDataType(), 0.5f)), name);
|
||||
}
|
||||
|
||||
FunctionPtr Floor(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
|
@ -1012,6 +1084,71 @@ namespace CNTK
|
|||
return UnaryOp(PrimitiveOpType::Softmax, operand, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr Hardmax(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return UnaryOp(PrimitiveOpType::Hardmax, operand, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr TransposeAxes(const Variable& operand, const Axis& axis1, const Axis& axis2, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
|
||||
LogicError("TransposeAxes currently does not support transposing dynamic axes");
|
||||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"axis1"] = axis1.Name();
|
||||
additionalProperties[L"axis2"] = axis2.Name();
|
||||
return UnaryOp(PrimitiveOpType::TransposeAxes, operand, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
if ((endIndex - beginIndex) <= 0)
|
||||
InvalidArgument("CNTK::Slice: endIndex (%d) - beginIndex (%d) must be a positive number", endIndex, beginIndex);
|
||||
|
||||
if (axis == Axis::DefaultBatchAxis())
|
||||
LogicError("Slice is currently unsupported along the batch axis");
|
||||
|
||||
if (axis.IsStaticAxis())
|
||||
return Internal::Slice(operand, axis, beginIndex, endIndex, name);
|
||||
|
||||
auto operandAxes = operand.DynamicAxes();
|
||||
auto findAxis = std::find(operandAxes.begin(), operandAxes.end(), axis);
|
||||
if (findAxis == operandAxes.end())
|
||||
InvalidArgument("The specified dynamic axis named %S does not match any of the dynamic axes of the operand", axis.Name().c_str());
|
||||
|
||||
auto beginFlagsLambda = [beginIndex, operand]() {
|
||||
return (beginIndex > 0) ? Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, beginIndex)) : Internal::IsWithin(operand, beginIndex);
|
||||
};
|
||||
|
||||
auto endFlagsLambda = [endIndex, operand]() {
|
||||
return (endIndex > 0) ? Internal::IsWithin(operand, endIndex) : Minus(ScalarConstant(operand.GetDataType(), 1.0f), Internal::IsWithin(operand, endIndex));
|
||||
};
|
||||
|
||||
FunctionPtr flags;
|
||||
if (beginIndex == 0)
|
||||
flags = endFlagsLambda();
|
||||
else if (endIndex == 0)
|
||||
flags = beginFlagsLambda();
|
||||
else
|
||||
flags = ElementTimes(beginFlagsLambda(), endFlagsLambda());
|
||||
|
||||
// Since we are slicing along a dynamic axis, the output variable's dynamic axes will be different than the operand
|
||||
std::vector<Axis> newDynamicAxes;
|
||||
for (auto operandAxis : operandAxes)
|
||||
{
|
||||
if (operandAxis == axis)
|
||||
{
|
||||
// If we are selecting just one frame from the dynamic axis, we can remove that axis
|
||||
if ((endIndex - beginIndex) > 1)
|
||||
newDynamicAxes.push_back(CompositeFunction::NextAutoGeneratedDynamicAxis());
|
||||
}
|
||||
else
|
||||
newDynamicAxes.push_back(operandAxis);
|
||||
}
|
||||
|
||||
return Internal::Gather(operand, flags, newDynamicAxes);
|
||||
}
|
||||
|
||||
FunctionPtr BinaryOp(PrimitiveOpType op, const Variable& leftOperand, const Variable& rightOperand, Dictionary&& opConfig, const std::wstring& name)
|
||||
{
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(op, std::vector<Variable>({ leftOperand, rightOperand }), std::move(opConfig), name), name);
|
||||
|
@ -1074,6 +1211,13 @@ namespace CNTK
|
|||
return BinaryOp(PrimitiveOpType::Times, leftOperand, rightOperand, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
FunctionPtr TransposeTimes(const Variable& leftOperand, const Variable& rightOperand, size_t numOutputAxes /*= 1*/, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"numOutputAxes"] = numOutputAxes;
|
||||
return BinaryOp(PrimitiveOpType::TransposeTimes, leftOperand, rightOperand, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
FunctionPtr SquaredError(const Variable& prediction, const Variable& targets, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return BinaryOp(PrimitiveOpType::SquaredError, prediction, targets, Dictionary(), name);
|
||||
|
@ -1081,18 +1225,20 @@ namespace CNTK
|
|||
|
||||
FunctionPtr CrossEntropyWithSoftmax(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name);
|
||||
return ReduceSum(Minus(ReduceLogSum(prediction, Axis(0)), TransposeTimes(labels, prediction)), name);
|
||||
//return BinaryOp(PrimitiveOpType::CrossEntropyWithSoftmax, prediction, labels, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr ClassificationError(const Variable& prediction, const Variable& labels, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name);
|
||||
return ReduceSum(Minus(ScalarConstant(prediction.GetDataType(), 1.0f), TransposeTimes(labels, Hardmax(prediction))), name);
|
||||
//return BinaryOp(PrimitiveOpType::ClassificationError, prediction, labels, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr PastValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
if (operand.DynamicAxes().size() != 1)
|
||||
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis");
|
||||
if (operand.DynamicAxes().size() != 2)
|
||||
InvalidArgument("PastValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
|
||||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
|
||||
|
@ -1101,8 +1247,8 @@ namespace CNTK
|
|||
|
||||
FunctionPtr FutureValue(const Variable& initialState, const Variable& operand, size_t stepSize, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
if (operand.DynamicAxes().size() != 1)
|
||||
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic axis");
|
||||
if (operand.DynamicAxes().size() != 2)
|
||||
InvalidArgument("FutureValue overload that does not explicitly specify a dynamic axis can only be used for operands with exactly one dynamic sequence-axis");
|
||||
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"stepSize"] = DictionaryValue(stepSize);
|
||||
|
@ -1111,7 +1257,17 @@ namespace CNTK
|
|||
|
||||
FunctionPtr ReduceSum(const Variable& operand, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return UnaryOp(PrimitiveOpType::ReduceSum, operand, Dictionary(), name);
|
||||
return UnaryOp(PrimitiveOpType::SumAll, operand, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr ReduceSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return Internal::ReduceElements(operand, PrimitiveFunction::InternalSumReductionOpName, axis, name);
|
||||
}
|
||||
|
||||
FunctionPtr ReduceLogSum(const Variable& operand, const Axis& axis, const std::wstring& name/* = L""*/)
|
||||
{
|
||||
return Internal::ReduceElements(operand, PrimitiveFunction::InternalLogSumReductionOpName, axis, name);
|
||||
}
|
||||
|
||||
FunctionPtr PerDimMeanVarianceNormalize(const Variable& operand, const NDArrayViewPtr& mean, const NDArrayViewPtr& invStdDev, const std::wstring& name /*= L""*/)
|
||||
|
@ -1207,4 +1363,94 @@ namespace CNTK
|
|||
|
||||
return CompositeFunction::Create(MakeSharedObject<PrimitiveFunction>(PrimitiveOpType::Combine, inputs, Dictionary(), name), name);
|
||||
}
|
||||
|
||||
namespace Internal
|
||||
{
|
||||
FunctionPtr PackedIndex(const Variable& operand, const Variable& index, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
return BinaryOp(PrimitiveOpType::PackedIndex, operand, index, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr GatherPacked(const Variable& operand, const Variable& packedIndex, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
return BinaryOp(PrimitiveOpType::GatherPacked, operand, packedIndex, Dictionary(), name);
|
||||
}
|
||||
|
||||
FunctionPtr ZeroesLike(const Variable& operand)
|
||||
{
|
||||
if (operand.Shape().NumAxes() > 1)
|
||||
LogicError("ZerosLike currently does not support operands with more than 1 static axes");
|
||||
|
||||
auto rowSliceFunc = Internal::Slice(operand, Axis(0), 0, 1);
|
||||
return Minus(rowSliceFunc, rowSliceFunc);
|
||||
}
|
||||
|
||||
FunctionPtr IsWithin(const Variable& operand, int offset, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
if (offset == 0)
|
||||
InvalidArgument("Internal::CNTK::IsWithin: The offset must be positive");
|
||||
|
||||
if (offset > 0)
|
||||
return PastValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), offset, name);
|
||||
else
|
||||
return FutureValue(ScalarConstant(operand.GetDataType(), 1.0f), ZeroesLike(operand), -offset, name);
|
||||
}
|
||||
|
||||
FunctionPtr Where(const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
auto additionalProperties = Dictionary();
|
||||
std::vector<std::wstring> newDynamicAxesNames;
|
||||
for (auto axis : newDynamicAxes)
|
||||
newDynamicAxesNames.push_back(axis.Name());
|
||||
|
||||
additionalProperties[L"newDynamicAxes"] = AsDictionaryValueVector(newDynamicAxesNames);
|
||||
return UnaryOp(PrimitiveOpType::Where, condition, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
FunctionPtr Gather(const Variable& operand, const Variable& condition, const std::vector<Axis>& newDynamicAxes, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
return Internal::GatherPacked(operand, Internal::PackedIndex(operand, Where(condition, newDynamicAxes)));
|
||||
}
|
||||
|
||||
FunctionPtr Slice(const Variable& operand, const Axis& axis, int beginIndex, int endIndex, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"axis"] = axis.Name();
|
||||
additionalProperties[L"beginIndex"] = (size_t)beginIndex;
|
||||
additionalProperties[L"endIndex"] = (size_t)endIndex;
|
||||
|
||||
return UnaryOp(PrimitiveOpType::Slice, operand, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
FunctionPtr ReduceElements(const Variable& operand, const std::wstring& reductionOpName, const Axis& axis, const std::wstring& name /*= L""*/)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
|
||||
if (axis.IsStaticAxis())
|
||||
{
|
||||
auto additionalProperties = Dictionary();
|
||||
additionalProperties[L"CNTKInternalReductionAxisIndex"] = (size_t)(axis.StaticAxisIndex() + 1);
|
||||
additionalProperties[L"ReductionOpName"] = reductionOpName;
|
||||
return UnaryOp(PrimitiveOpType::ReduceElements, operand, std::move(additionalProperties), name);
|
||||
}
|
||||
|
||||
if (axis == Axis::DefaultBatchAxis())
|
||||
LogicError("Reduction is currently unsupported along the batch axis");
|
||||
|
||||
if (reductionOpName != PrimitiveFunction::InternalSumReductionOpName)
|
||||
LogicError("%S reduction along dynamic axis is currently unsupported", reductionOpName.c_str());
|
||||
|
||||
std::function<FunctionPtr(const Variable& leftOperand, const Variable& rightOperand)> reductionFunctor;
|
||||
if (reductionOpName == PrimitiveFunction::InternalSumReductionOpName)
|
||||
reductionFunctor = std::bind(Plus, _1, _2, L"");
|
||||
|
||||
// We are reducing over a dynamic axis which is currently implemented using recurrence
|
||||
auto cumulativeSumFunctionPlaceholder = Placeholder(operand.Shape());
|
||||
auto prevAccumulatedValuesFunction = PastValue(ScalarConstant(operand.GetDataType(), 0.0f), cumulativeSumFunctionPlaceholder, 1);
|
||||
auto cumulativeSumFunction = reductionFunctor(prevAccumulatedValuesFunction, operand);
|
||||
cumulativeSumFunction->ReplacePlaceholders({ { cumulativeSumFunctionPlaceholder, cumulativeSumFunction } });
|
||||
|
||||
return CNTK::Slice(cumulativeSumFunction, axis, -1, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,12 @@ namespace CNTK
|
|||
Abs,
|
||||
Reciprocal,
|
||||
Softmax,
|
||||
Hardmax,
|
||||
TransposeAxes,
|
||||
Where,
|
||||
Slice,
|
||||
Pooling,
|
||||
SumAll,
|
||||
Plus,
|
||||
Minus,
|
||||
ElementTimes,
|
||||
|
@ -37,14 +42,17 @@ namespace CNTK
|
|||
LessEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
PackedIndex,
|
||||
GatherPacked,
|
||||
Times,
|
||||
TransposeTimes,
|
||||
Convolution,
|
||||
SquaredError,
|
||||
CrossEntropyWithSoftmax,
|
||||
ClassificationError,
|
||||
PastValue,
|
||||
FutureValue,
|
||||
ReduceSum,
|
||||
ReduceElements,
|
||||
BatchNormalization,
|
||||
Combine,
|
||||
};
|
||||
|
@ -77,7 +85,12 @@ namespace CNTK
|
|||
{ PrimitiveOpType::Abs, "Abs" },
|
||||
{ PrimitiveOpType::Reciprocal, "Reciprocal" },
|
||||
{ PrimitiveOpType::Softmax, "Softmax" },
|
||||
{ PrimitiveOpType::Hardmax, "Hardmax" },
|
||||
{ PrimitiveOpType::TransposeAxes, "TransposeAxes" },
|
||||
{ PrimitiveOpType::Where, "Where" },
|
||||
{ PrimitiveOpType::Slice, "Slice" },
|
||||
{ PrimitiveOpType::Pooling, "Pooling" },
|
||||
{ PrimitiveOpType::SumAll, "SumAll" },
|
||||
{ PrimitiveOpType::Plus, "Plus" },
|
||||
{ PrimitiveOpType::Minus, "Minus" },
|
||||
{ PrimitiveOpType::ElementTimes, "ElementTimes" },
|
||||
|
@ -87,14 +100,17 @@ namespace CNTK
|
|||
{ PrimitiveOpType::LessEqual, "LessEqual" },
|
||||
{ PrimitiveOpType::Greater, "Greater" },
|
||||
{ PrimitiveOpType::GreaterEqual, "GreaterEqual" },
|
||||
{ PrimitiveOpType::PackedIndex, "PackedIndex" },
|
||||
{ PrimitiveOpType::GatherPacked, "GatherPacked" },
|
||||
{ PrimitiveOpType::Times, "Times" },
|
||||
{ PrimitiveOpType::TransposeTimes, "TransposeTimes" },
|
||||
{ PrimitiveOpType::Convolution, "Convolution" },
|
||||
{ PrimitiveOpType::SquaredError, "SquaredError" },
|
||||
{ PrimitiveOpType::CrossEntropyWithSoftmax, "CrossEntropyWithSoftmax" },
|
||||
{ PrimitiveOpType::ClassificationError, "ClassificationError" },
|
||||
{ PrimitiveOpType::PastValue, "PastValue" },
|
||||
{ PrimitiveOpType::FutureValue, "FutureValue" },
|
||||
{ PrimitiveOpType::ReduceSum, "ReduceSum" },
|
||||
{ PrimitiveOpType::ReduceElements, "ReduceElements" },
|
||||
{ PrimitiveOpType::BatchNormalization, "BatchNormalization" },
|
||||
{ PrimitiveOpType::Combine, "Combine" }
|
||||
};
|
||||
|
@ -107,6 +123,15 @@ namespace CNTK
|
|||
|
||||
class PrimitiveFunction final : public Function
|
||||
{
|
||||
public:
|
||||
static const std::wstring InternalSumReductionOpName;
|
||||
static const std::wstring InternalLogSumReductionOpName;
|
||||
static const std::wstring InternalMeanReductionOpName;
|
||||
static const std::wstring InternalMaxReductionOpName;
|
||||
static const std::wstring InternalMinReductionOpName;
|
||||
static const std::wstring InternalAllReductionOpName;
|
||||
static const std::wstring InternalAnyReductionOpName;
|
||||
|
||||
public:
|
||||
PrimitiveFunction(PrimitiveOpType op, const std::vector<Variable>& inputs, Dictionary&& functionConfig, const std::wstring& functionName = L"")
|
||||
: Function(inputs, GetOutputVariables(op, inputs, this, functionConfig), nullptr, functionName), m_op(op), m_functionConfig(std::move(functionConfig))
|
||||
|
@ -242,16 +267,26 @@ namespace CNTK
|
|||
DataType outputDataType = inputs[0].GetDataType();
|
||||
|
||||
// We currently require that the inputs' dynamic axes if any match
|
||||
std::vector<Axis> outputDynamicAxes = inputs[0].DynamicAxes();
|
||||
for (auto inputVar : inputs)
|
||||
std::vector<Axis> outputDynamicAxes;
|
||||
if (op == PrimitiveOpType::Where)
|
||||
;
|
||||
else if ((op == PrimitiveOpType::PackedIndex) || (op == PrimitiveOpType::GatherPacked))
|
||||
{
|
||||
auto currentInputDynamicAxes = inputVar.DynamicAxes();
|
||||
if (outputDynamicAxes.empty())
|
||||
outputDynamicAxes = currentInputDynamicAxes;
|
||||
else
|
||||
outputDynamicAxes = inputs[1].DynamicAxes();
|
||||
}
|
||||
else
|
||||
{
|
||||
outputDynamicAxes = inputs[0].DynamicAxes();
|
||||
for (auto inputVar : inputs)
|
||||
{
|
||||
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes))
|
||||
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand");
|
||||
auto currentInputDynamicAxes = inputVar.DynamicAxes();
|
||||
if (outputDynamicAxes.empty())
|
||||
outputDynamicAxes = currentInputDynamicAxes;
|
||||
else
|
||||
{
|
||||
if (!currentInputDynamicAxes.empty() && (currentInputDynamicAxes != outputDynamicAxes))
|
||||
LogicError("Currently if an operand of a binary elementwise operation has any dynamic axes, those must match the dynamic axes of the other operand");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -268,9 +303,38 @@ namespace CNTK
|
|||
case PrimitiveOpType::Abs:
|
||||
case PrimitiveOpType::Reciprocal:
|
||||
case PrimitiveOpType::Softmax:
|
||||
case PrimitiveOpType::Hardmax:
|
||||
assert(inputs.size() == 1);
|
||||
if (((op == PrimitiveOpType::Softmax) || (op == PrimitiveOpType::Hardmax)) && (inputs[0].Shape().NumAxes() > 1))
|
||||
InvalidArgument("Softmax/Hardmax operation can only be applied to a 1D input");
|
||||
|
||||
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
case PrimitiveOpType::TransposeAxes:
|
||||
{
|
||||
assert(inputs.size() == 1);
|
||||
auto axis1 = Axis(functionConfig[L"axis1"].GetValue<std::wstring>());
|
||||
auto axis2 = Axis(functionConfig[L"axis2"].GetValue<std::wstring>());
|
||||
|
||||
if (!axis1.IsStaticAxis() || !axis2.IsStaticAxis())
|
||||
LogicError("TransposeAxes operation currently does not support transposing dynamic axes");
|
||||
|
||||
auto transposedTensorShape = AsTensorShape(inputs[0].Shape(), true);
|
||||
transposedTensorShape.SwapDimsInPlace(axis1.StaticAxisIndex(), axis2.StaticAxisIndex());
|
||||
outputs.push_back(Variable(AsNDShape(transposedTensorShape), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Where:
|
||||
{
|
||||
assert(inputs.size() == 1);
|
||||
std::vector<Axis> newDynamicAxes;
|
||||
auto newDynamicAxesNames = AsBasicElementTypeVector<std::wstring>(functionConfig[L"newDynamicAxes"].GetValue<std::vector<DictionaryValue>>());
|
||||
for (auto axisName : newDynamicAxesNames)
|
||||
newDynamicAxes.push_back(Axis(axisName));
|
||||
|
||||
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[0].Shape()), outputDataType, owner, newDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Pooling:
|
||||
{
|
||||
assert(inputs.size() == 1);
|
||||
|
@ -282,6 +346,10 @@ namespace CNTK
|
|||
outputs.push_back(Variable(ConvolutionOpOutputShape(inputs[0].Shape(), poolingWindowsShape, { 1 }, strides, { true }, autoPadding, lowerPad, upperPad, false), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::SumAll:
|
||||
assert(inputs.size() == 1);
|
||||
outputs.push_back(Variable({}, outputDataType, owner, std::vector<Axis>({})));
|
||||
break;
|
||||
case PrimitiveOpType::Plus:
|
||||
case PrimitiveOpType::Minus:
|
||||
case PrimitiveOpType::ElementTimes:
|
||||
|
@ -297,15 +365,26 @@ namespace CNTK
|
|||
case PrimitiveOpType::Times:
|
||||
{
|
||||
assert(inputs.size() == 2);
|
||||
|
||||
// TODO: Support dynamic axes on the left operand
|
||||
if (!inputs[0].DynamicAxes().empty())
|
||||
LogicError("Dynamic axes are currently unsupported for left operand of a Times operation");
|
||||
|
||||
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
|
||||
outputs.push_back(Variable(TimesOpOutputShape(inputs[0].Shape(), inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::TransposeTimes:
|
||||
{
|
||||
assert(inputs.size() == 2);
|
||||
|
||||
auto numLeftOperandAxes = inputs[0].Shape().NumAxes();
|
||||
if (numLeftOperandAxes > 2)
|
||||
InvalidArgument("TransposeTimes operation only supports left operands of rank 1 or 2");
|
||||
|
||||
NDShape transposedLeftOperandShape(2, 1);
|
||||
for (size_t i = 0; i < numLeftOperandAxes; ++i)
|
||||
transposedLeftOperandShape[transposedLeftOperandShape.NumAxes() - i - 1] = inputs[0].Shape()[i];
|
||||
|
||||
size_t numOutputAxes = functionConfig[L"numOutputAxes"].GetValue<size_t>();
|
||||
outputs.push_back(Variable(TimesOpOutputShape(transposedLeftOperandShape, inputs[1].Shape(), numOutputAxes), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Convolution:
|
||||
{
|
||||
assert(inputs.size() == 2);
|
||||
|
@ -341,26 +420,45 @@ namespace CNTK
|
|||
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
|
||||
reductionAxes.push_back(i);
|
||||
|
||||
outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, {}));
|
||||
outputs.push_back(Variable(ReductionOpOutputShape(op, predictionShape, reductionAxes), outputDataType, owner, std::vector<Axis>({})));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::PastValue:
|
||||
case PrimitiveOpType::FutureValue:
|
||||
{
|
||||
assert(inputs.size() == 2);
|
||||
Variable initialStateVar = inputs[0];
|
||||
Variable inputOperandVar = inputs[1];
|
||||
// TODO: Current we only support a scalar initial state
|
||||
if (!initialStateVar.IsConstant() || (initialStateVar.Shape().NumAxes() > 0))
|
||||
LogicError("Currently PastValue/FutureValue Function only supports scalar initial state");
|
||||
|
||||
// TODO: We currently only support input operand with 1 static axis for PastValue/FutureValue
|
||||
if (inputOperandVar.Shape().NumAxes() > 1)
|
||||
LogicError("Currently PastValue/FutureValue Function only supports input operand with <= 1 static axis");
|
||||
|
||||
// TODO: We currently only support input operand with 1 dynamic axis for PastValue/FutureValue
|
||||
if (inputOperandVar.DynamicAxes().size() != 2)
|
||||
LogicError("Currently PastValue/FutureValue Function only supports input operand with with 2 dynamic axis (1 sequence-axis and 1 batch-axis)");
|
||||
|
||||
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
case PrimitiveOpType::ReduceSum:
|
||||
}
|
||||
case PrimitiveOpType::ReduceElements:
|
||||
{
|
||||
assert(inputs.size() == 1);
|
||||
|
||||
// TODO: For reductions, we should remove any of the dynamic axes from 'outputDynamicAxes' that are being reduced over.
|
||||
// Currently we only support reductions that reduce over all axes
|
||||
std::vector<Axis> reductionOutputDynamicAxes = {};
|
||||
auto CNTKInternalReductionAxisIndex = functionConfig[L"CNTKInternalReductionAxisIndex"].GetValue<size_t>();
|
||||
std::vector<size_t> reductionAxes;
|
||||
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
|
||||
reductionAxes.push_back(i);
|
||||
// TODO: Do not use a integer literal for the special value of axis id that indicates all static axes
|
||||
if (CNTKInternalReductionAxisIndex == 0)
|
||||
{
|
||||
for (size_t i = 0; i < inputs[0].Shape().NumAxes(); ++i)
|
||||
reductionAxes.push_back(i);
|
||||
}
|
||||
else
|
||||
reductionAxes.push_back(CNTKInternalReductionAxisIndex - 1);
|
||||
|
||||
outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, reductionOutputDynamicAxes));
|
||||
outputs.push_back(Variable(ReductionOpOutputShape(op, inputs[0].Shape(), reductionAxes), outputDataType, owner, inputs[0].DynamicAxes()));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::BatchNormalization:
|
||||
|
@ -369,6 +467,60 @@ namespace CNTK
|
|||
case PrimitiveOpType::Combine:
|
||||
outputs = inputs;
|
||||
break;
|
||||
case PrimitiveOpType::PackedIndex:
|
||||
outputs.push_back(Variable(UnaryElementwiseOpOutputShape(inputs[1].Shape()), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
case PrimitiveOpType::GatherPacked:
|
||||
{
|
||||
bool sourceHasDynamicAxis = !inputs[0].DynamicAxes().empty();
|
||||
NDShape outputShape;
|
||||
|
||||
// inherit tensor dimension from sourceData, minus the last (column or time) dimension. TODO this needs to become simpler...
|
||||
if (sourceHasDynamicAxis)
|
||||
outputShape = inputs[0].Shape();
|
||||
else
|
||||
{
|
||||
if (inputs[0].Shape().NumAxes() > 1)
|
||||
outputShape = outputShape.SubShape(0, outputShape.NumAxes() - 1);
|
||||
else
|
||||
outputShape = {};
|
||||
}
|
||||
|
||||
outputs.push_back(Variable(outputShape, outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
case PrimitiveOpType::Slice:
|
||||
{
|
||||
auto axis = Axis(functionConfig[L"axis"].GetValue<std::wstring>());
|
||||
int beginIndex = functionConfig[L"beginIndex"].GetValue<size_t>();
|
||||
int endIndex = functionConfig[L"endIndex"].GetValue<size_t>();
|
||||
if (!axis.IsStaticAxis())
|
||||
LogicError("Built-in Slice operation currently does not support slicing along dynamic axis");
|
||||
|
||||
if (axis.StaticAxisIndex() >= inputs[0].Shape().NumAxes())
|
||||
InvalidArgument("The specified axis index (%d) for the Slice operation is outside the bounds of the available axes of the input", (int)axis.StaticAxisIndex());
|
||||
|
||||
size_t sliceAxisDim = inputs[0].Shape()[axis.StaticAxisIndex()];
|
||||
int realBeginIndex = (beginIndex >= 0) ? beginIndex : beginIndex + sliceAxisDim;
|
||||
int realEndIndex = (endIndex > 0) ? endIndex : endIndex + sliceAxisDim;
|
||||
if ((sliceAxisDim < realEndIndex) || (realEndIndex < realBeginIndex) || (realBeginIndex < 0))
|
||||
RuntimeError("Slice operation: Index range [%d,%d), interpreted as [%d,%d), is invalid for input ([%S]).",
|
||||
beginIndex,
|
||||
endIndex,
|
||||
realBeginIndex,
|
||||
realEndIndex,
|
||||
inputs[0].Shape().AsString().c_str());
|
||||
|
||||
auto outputTensorShape = AsTensorShape(inputs[0].Shape(), true);
|
||||
|
||||
// propagate as much as we can
|
||||
if ((axis.StaticAxisIndex() < outputTensorShape.GetRank()) && (0 <= realBeginIndex) && (realBeginIndex <= realEndIndex) && (realEndIndex <= sliceAxisDim))
|
||||
outputTensorShape.NarrowTo(axis.StaticAxisIndex(), realBeginIndex, realEndIndex);
|
||||
|
||||
|
||||
outputs.push_back(Variable(AsNDShape(outputTensorShape), outputDataType, owner, outputDynamicAxes));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
LogicError("Specified op %s not yet supported", PrimitiveOpTypeName(op));
|
||||
break;
|
||||
|
@ -417,6 +569,17 @@ namespace CNTK
|
|||
std::unordered_map<StreamInfo, std::pair<NDArrayViewPtr, NDArrayViewPtr>>& computedMeanAndInvStdDevs,
|
||||
const DeviceDescriptor& device /*= DeviceDescriptor::CPUDevice()*/);
|
||||
|
||||
public:
|
||||
static std::wstring s_internalDefaultDynamicAxisName;
|
||||
static std::wstring s_internalNoSequenceAxisName;
|
||||
|
||||
static Axis NextAutoGeneratedDynamicAxis()
|
||||
{
|
||||
static std::atomic<unsigned int> nextAutoGeneratedDynamicAxis(0);
|
||||
static const std::wstring autoGeneratedDynamicAxisNamePrefix = L"autoGeneratedDynamicAxis_";
|
||||
return Axis(autoGeneratedDynamicAxisNamePrefix + std::to_wstring(nextAutoGeneratedDynamicAxis++));
|
||||
}
|
||||
|
||||
public:
|
||||
static CompositeFunctionPtr Create(const FunctionPtr& rootFunction, const std::wstring& name = L"")
|
||||
{
|
||||
|
@ -524,4 +687,17 @@ namespace CNTK
|
|||
// the next 'Backward' call.
|
||||
std::unordered_set<Variable> m_currentBackpropRoots;
|
||||
};
|
||||
|
||||
inline std::vector<CNTK::Axis> DynamicAxesFromInternalDynamicAxisName(const std::wstring& internalDynamicAxisName)
|
||||
{
|
||||
std::vector<CNTK::Axis> inputVarDynamicAxes;
|
||||
if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalDefaultDynamicAxisName)
|
||||
inputVarDynamicAxes = { CNTK::Axis::DefaultDynamicAxis(), CNTK::Axis::DefaultBatchAxis() };
|
||||
else if (internalDynamicAxisName == CNTK::CompositeFunction::s_internalNoSequenceAxisName)
|
||||
inputVarDynamicAxes = { CNTK::Axis::DefaultBatchAxis() };
|
||||
else
|
||||
inputVarDynamicAxes = { CNTK::Axis(internalDynamicAxisName), CNTK::Axis::DefaultBatchAxis() };
|
||||
|
||||
return inputVarDynamicAxes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,19 +8,18 @@
|
|||
#include "Utils.h"
|
||||
|
||||
#define UPDATE_FUNCTION \
|
||||
switch (smoothedGradientValue->GetDataType()) \
|
||||
switch (smoothedGradientValue->GetDataType()) \
|
||||
{ \
|
||||
case DataType::Float: \
|
||||
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
Update<float>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
break; \
|
||||
case DataType::Double: \
|
||||
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
Update<double>(parameter, gradientValue, smoothedGradientValue, trainingSampleCount); \
|
||||
break; \
|
||||
default: \
|
||||
NOT_IMPLEMENTED; \
|
||||
}
|
||||
|
||||
|
||||
using namespace Microsoft::MSR::CNTK;
|
||||
using namespace std;
|
||||
|
||||
|
@ -141,7 +140,7 @@ namespace CNTK
|
|||
// L1 regularizer with proximal gradient descent method
|
||||
if (m_additionalOptions.l1RegularizationWeight > 0)
|
||||
{
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
// multiply by actualMBSize so that it's invariant to minibatch size since learning rate is per sample
|
||||
auto weight = ElementType(learningRate * m_additionalOptions.l1RegularizationWeight * actualMBSize);
|
||||
parameterValue->GetWritableMatrix<ElementType>()->InplaceSoftThreshold(weight);
|
||||
|
@ -154,48 +153,49 @@ namespace CNTK
|
|||
return arrayView->GetWritableTensorView<ElementType>();
|
||||
}
|
||||
|
||||
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters)
|
||||
LearnerBase::LearnerBase(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients /* = true */)
|
||||
: Learner(parameters),
|
||||
m_learningRatePerSample(0.0),
|
||||
m_sampleCount(0)
|
||||
m_learningRates(learningRates),
|
||||
m_sampleCount(0),
|
||||
m_minibatchCount(0)
|
||||
{
|
||||
const unordered_set<Parameter>& parameterSet = parameters;
|
||||
for (const auto& parameter : parameterSet)
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
// TODO: using the same device to allocate data for all smoothed gradients. Is this correct?
|
||||
// Should the device be specified on the per-parameter basis?
|
||||
NDArrayViewPtr view;
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
if (!allocateSmoothGradients)
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0f, parameter.Shape(), parameter.Value()->Device());
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
view = MakeSharedObject<NDArrayView>(0.0, parameter.Shape(), parameter.Value()->Device());
|
||||
}
|
||||
|
||||
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, parameter.Shape());
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
m_additionalOptions.learningRateMultipliers.insert(make_pair(parameter, 1.0));
|
||||
}
|
||||
}
|
||||
|
||||
void LearnerBase::ResetSmoothedGradients()
|
||||
/*static*/ NDArrayViewPtr LearnerBase::AllocateNDArrayView(const Parameter& parameter, const NDShape& shape)
|
||||
{
|
||||
for (const auto& parameter : Parameters())
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
{
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const auto& data = smoothedGradientValue;
|
||||
switch (data->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
data->SetValue(0.0f);
|
||||
break;
|
||||
case DataType::Double:
|
||||
data->SetValue(0.0);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", ::CNTK::DataTypeName(data->GetDataType()));
|
||||
}
|
||||
return MakeSharedObject<NDArrayView>(float(0.0), shape, parameter.Value()->Device());
|
||||
}
|
||||
else
|
||||
{
|
||||
return MakeSharedObject<NDArrayView>(0.0, shape, parameter.Value()->Device());
|
||||
}
|
||||
}
|
||||
|
||||
/*static*/ NDShape LearnerBase::GetMatrixShape(const Parameter& parameter)
|
||||
{
|
||||
if (parameter.GetDataType() == DataType::Float)
|
||||
{
|
||||
auto matrix = GetMatrix<float>(parameter.Value());
|
||||
return { matrix->GetNumRows(), matrix->GetNumCols() };
|
||||
}
|
||||
else
|
||||
{
|
||||
auto matrix = GetMatrix<double>(parameter.Value());
|
||||
return { matrix->GetNumRows(), matrix->GetNumCols() };
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,17 +219,19 @@ namespace CNTK
|
|||
#endif
|
||||
|
||||
#if DUMPOUTPUT
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
LOGPRINTF(stderr, "learnRatePerSample=%0.8f, momentum=%0.8f, actualMBSize=%ld\n",
|
||||
m_learningRatePerSample, m_momentumPerSample, trainingSampleCount);
|
||||
learningRate, momentum, trainingSampleCount);
|
||||
LOGPRINTF(stderr, "GradUpdateType()=%s, GradientUpdateNoiseStd()=%0.8f\n",
|
||||
LearnerType().c_str(), m_GaussianNoiseInjectStd);
|
||||
LearnerType().c_str(), m_additionalOptions.gaussianNoiseInjectionStdDev);
|
||||
Print(gradientValue, "Gradient Update");
|
||||
Print(smoothedGradientValue, "Smoothed Gradient Input");
|
||||
#endif
|
||||
UPDATE_FUNCTION;
|
||||
|
||||
#if DUMPOUTPUT
|
||||
Print(parameterValue, "Parameter Update");
|
||||
Print(parameter.Value(), "Parameter Update");
|
||||
#endif
|
||||
|
||||
#ifdef _DEBUG
|
||||
|
@ -239,6 +241,7 @@ namespace CNTK
|
|||
#endif
|
||||
}
|
||||
m_sampleCount += trainingSampleCount;
|
||||
m_minibatchCount++;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -265,9 +268,16 @@ namespace CNTK
|
|||
|
||||
/*virtual*/ Dictionary LearnerBase::GetCheckpointState() const /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
Dictionary checkpoint;
|
||||
|
||||
checkpoint[L"checkpointVersion"] = checkpointVersion;
|
||||
checkpoint[L"sampleCount"] = m_sampleCount;
|
||||
checkpoint[L"minibatchCount"] = m_minibatchCount;
|
||||
|
||||
// TODO: should we also save learning rate schedule into the checkpoint?
|
||||
// If that is the case, need to be able to override this method in subclasses
|
||||
// and save momentum schedule as well.
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
// TODO: parameter name is not guaranteed to be unique. Instead, all serializable objects
|
||||
|
@ -277,31 +287,48 @@ namespace CNTK
|
|||
{
|
||||
LogicError("Parameter names must be unique");
|
||||
}
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
|
||||
// Potentially, could store things like dimensions, element size, format, etc., but
|
||||
// that seems to be redundant, since all of that is passed in the constructor.
|
||||
checkpoint[parameter.Name()] = SerializeToVector(smoothedGradientValue);
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
checkpoint[parameter.Name()] = *smoothedGradientValue;
|
||||
}
|
||||
return checkpoint;
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerBase::RestoreFromCheckpoint(const Dictionary& checkpoint) /*override*/
|
||||
{
|
||||
NOT_IMPLEMENTED; // Until the new checkpointing is fully fleshed out, nobody should be calling this.
|
||||
m_sampleCount = checkpoint[L"sampleCount"].GetValue<size_t>();
|
||||
m_minibatchCount = checkpoint[L"minibatchCount"].GetValue<size_t>();
|
||||
|
||||
size_t version = checkpoint[L"minibatchCount"].GetValue<size_t>();
|
||||
if (checkpointVersion != version)
|
||||
{
|
||||
// At the moment, we only support one version, so this should never happen.
|
||||
LogicError("Unsupported checkpoint version.");
|
||||
}
|
||||
|
||||
for (const auto& parameter : Parameters())
|
||||
{
|
||||
if (!checkpoint.Contains(parameter.Name()))
|
||||
{
|
||||
LogicError("Checkpoint does not contain state for parameter %ls", parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const auto& smoothedGradientValue = m_smoothedGradientValues.at(parameter);
|
||||
const NDArrayView& checkpointedValue = checkpoint[parameter.Name()].GetValue<NDArrayView>();
|
||||
|
||||
if (smoothedGradientValue->GetDataType() != checkpointedValue.GetDataType())
|
||||
{
|
||||
LogicError("A value restored from a checkpoint for the smoothed gradient data type for parameter %ls does not match the expected value",
|
||||
parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const DictionaryValue& state = checkpoint[parameter.Name()];
|
||||
if (smoothedGradientValue->Shape() != checkpointedValue.Shape())
|
||||
{
|
||||
LogicError("A value restored from a checkpoint for the smoothed gradient shape for parameter %ls does not match the expected value",
|
||||
parameter.Name().c_str());
|
||||
}
|
||||
|
||||
const auto& data = smoothedGradientValue;
|
||||
|
||||
DeserializeFromVector(data, state.GetValue<vector<DictionaryValue>>());
|
||||
smoothedGradientValue->CopyFrom(checkpointedValue);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -313,23 +340,25 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void LearnerSGD::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const
|
||||
{
|
||||
UNUSED(trainingSampleCount);
|
||||
|
||||
const auto& parameterValue = parameter.Value();
|
||||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
const auto& learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
|
||||
// TODO: break up the NormalGrad into 3 different functions, each with its own set of parameters
|
||||
// (one for vanilla SGD, the other for momentum SGD, and the third one for NAG).
|
||||
smoothedGradientMatrix->NormalGrad(*gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample), m_useNesterovAcceleration);
|
||||
learningRate, momentum, m_useNesterovAcceleration);
|
||||
}
|
||||
|
||||
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
|
||||
: LearnerBase(parameters), m_needAveMultiplier(needAveMultiplier)
|
||||
LearnerAdaGrad::LearnerAdaGrad(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier)
|
||||
: LearnerBase(parameters, learningRates),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -348,15 +377,23 @@ namespace CNTK
|
|||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->Adagrad(*gradientMatrix, m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters)
|
||||
: LearnerMomentumSGD(parameters)
|
||||
LearnerFSAdaGrad::LearnerFSAdaGrad(const unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums)
|
||||
: LearnerMomentumSGD(parameters, learningRates, momentums, /*allocateSmoothGradients*/ false)
|
||||
{
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
auto shape = GetMatrixShape(parameter);
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], 2 * shape[1]});
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerFSAdaGrad::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
|
||||
|
@ -373,21 +410,33 @@ namespace CNTK
|
|||
const auto& smoothedGradientMatrix = GetWritableMatrix<ElementType>(smoothedGradientValue);
|
||||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
//const double momentum = MomentumPerMB(m_momentumPerSample, trainingSampleCount);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
|
||||
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix,
|
||||
learningRate, ElementType(m_momentumPerSample));
|
||||
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
auto momentum = ElementType(MomentumPerMB(m_momentums[m_sampleCount], trainingSampleCount));
|
||||
smoothedGradientMatrix->FSAdagrad(trainingSampleCount, *gradientMatrix, *parameterMatrix, learningRate, momentum);
|
||||
}
|
||||
|
||||
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
: LearnerBase(parameters),
|
||||
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
LearnerRMSProp::LearnerRMSProp(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
: LearnerBase(parameters, learningRates, /*allocateSmoothGradients*/ false),
|
||||
m_gamma(gamma), m_inc(inc), m_dec(dec), m_max(max), m_min(min),
|
||||
m_needAveMultiplier(needAveMultiplier)
|
||||
{
|
||||
for (const auto& parameter : parameters)
|
||||
{
|
||||
// When needAveMultiplier == true, CPU and GPU implementations of RMSProp require different number of columns.
|
||||
// TODO: verify that this is correct.
|
||||
size_t factor = 3;
|
||||
if (needAveMultiplier && parameter.Value()->Device().Type() == DeviceKind::GPU)
|
||||
{
|
||||
factor = 4;
|
||||
}
|
||||
|
||||
auto shape = GetMatrixShape(parameter);
|
||||
NDArrayViewPtr view = AllocateNDArrayView(parameter, {shape[0], factor * shape[1]});
|
||||
|
||||
m_smoothedGradientValues.insert(make_pair(parameter, view));
|
||||
}
|
||||
}
|
||||
|
||||
/*virtual*/ void LearnerRMSProp::Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const /*override*/
|
||||
|
@ -405,12 +454,12 @@ namespace CNTK
|
|||
const auto& gradientMatrix = GetWritableMatrix<ElementType>(gradientValue);
|
||||
const auto& parameterMatrix = GetWritableMatrix<ElementType>(parameterValue);
|
||||
|
||||
auto learningRate = ElementType(ParameterDependentLearningRate(parameter));
|
||||
auto learningRate = ElementType(m_learningRates[m_sampleCount]);
|
||||
|
||||
auto aveMultiplier = smoothedGradientMatrix->RmsProp(*gradientMatrix,
|
||||
ElementType(m_gamma), ElementType(m_inc),
|
||||
ElementType(m_max), ElementType(m_dec),
|
||||
ElementType(m_min), m_needAveMultiplier);
|
||||
ElementType(m_gamma), ElementType(m_inc),
|
||||
ElementType(m_max), ElementType(m_dec),
|
||||
ElementType(m_min), m_needAveMultiplier);
|
||||
Matrix<ElementType>::ScaleAndAdd(ElementType(-learningRate / aveMultiplier), *gradientMatrix, *parameterMatrix);
|
||||
}
|
||||
|
||||
|
@ -418,34 +467,35 @@ namespace CNTK
|
|||
template shared_ptr<Matrix<float>> LearnerBase::GetWritableMatrix<float>(const NDArrayViewPtr& arrayView);
|
||||
template shared_ptr<Matrix<double>> LearnerBase::GetWritableMatrix<double>(const NDArrayViewPtr& arrayView);
|
||||
|
||||
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, double learningRatePerSample)
|
||||
LearnerPtr SGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates)
|
||||
{
|
||||
return MakeSharedObject<LearnerSGD>(parameters, learningRatePerSample);
|
||||
return MakeSharedObject<LearnerSGD>(parameters, learningRates);
|
||||
}
|
||||
|
||||
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr MomentumSGDLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerMomentumSGD>(parameters);
|
||||
return MakeSharedObject<LearnerMomentumSGD>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr NesterovLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerNesterov>(parameters);
|
||||
return MakeSharedObject<LearnerNesterov>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, bool needAveMultiplier)
|
||||
LearnerPtr AdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, bool needAveMultiplier)
|
||||
{
|
||||
return MakeSharedObject<LearnerAdaGrad>(parameters, needAveMultiplier);
|
||||
return MakeSharedObject<LearnerAdaGrad>(parameters, learningRates, needAveMultiplier);
|
||||
}
|
||||
|
||||
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters)
|
||||
LearnerPtr FSAdaGradLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates, const MomentumsPerSample& momentums)
|
||||
{
|
||||
return MakeSharedObject<LearnerFSAdaGrad>(parameters);
|
||||
return MakeSharedObject<LearnerFSAdaGrad>(parameters, learningRates, momentums);
|
||||
}
|
||||
|
||||
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier)
|
||||
LearnerPtr RMSPropLearner(const unordered_set<Parameter>& parameters, const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min,
|
||||
bool needAveMultiplier)
|
||||
{
|
||||
return MakeSharedObject<LearnerRMSProp>(parameters, gamma, inc, dec, max, min, needAveMultiplier);
|
||||
return MakeSharedObject<LearnerRMSProp>(parameters, learningRates, gamma, inc, dec, max, min, needAveMultiplier);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
|
||||
namespace CNTK
|
||||
{
|
||||
// TODO: Move this to Trainer along with Pre-, PostProcess and ClipGradient.
|
||||
// A collection of additional options that are applicable for all standard learners
|
||||
// (after these options are set, they retain their value for the entire lifespan of a learner).
|
||||
struct AdditionalLearningOptions
|
||||
|
@ -18,7 +19,6 @@ namespace CNTK
|
|||
double gaussianNoiseInjectionStdDev = 0.0;
|
||||
bool gradientClippingWithTruncation = true;
|
||||
double gradientClippingThresholdPerSample = std::numeric_limits<double>::infinity();
|
||||
std::unordered_map<Parameter, double> learningRateMultipliers;
|
||||
};
|
||||
|
||||
// An abstract base class at the root of the standard learners hierarchy
|
||||
|
@ -33,32 +33,16 @@ namespace CNTK
|
|||
|
||||
virtual void RestoreFromCheckpoint(const Dictionary& checkpoint) override final;
|
||||
|
||||
void SetAdditionalOptions(const AdditionalLearningOptions& additionalOptions)
|
||||
{
|
||||
m_additionalOptions = additionalOptions;
|
||||
}
|
||||
|
||||
// TODO: should this be called ResetMomentum?
|
||||
// needed for BlockMomemtumSGD to reset SGD momentum after aggregation.
|
||||
void ResetSmoothedGradients();
|
||||
|
||||
// TODO: move learning rate and momentum scheduling and adjustment functionality
|
||||
// inside the learner and drop these setters.
|
||||
void SetLearningRate(double value) { m_learningRatePerSample = value; }
|
||||
|
||||
protected:
|
||||
LearnerBase(const std::unordered_set<Parameter>& parameters);
|
||||
LearnerBase(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients = true);
|
||||
|
||||
virtual void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const = 0;
|
||||
|
||||
double ParameterDependentLearningRate(const Parameter& parameter) const
|
||||
{
|
||||
return m_learningRatePerSample * m_additionalOptions.learningRateMultipliers.at(parameter);
|
||||
}
|
||||
|
||||
std::string LearnerType() const;
|
||||
|
||||
double m_learningRatePerSample;
|
||||
LearningRatesPerSample m_learningRates;
|
||||
|
||||
AdditionalLearningOptions m_additionalOptions;
|
||||
|
||||
|
@ -91,6 +75,16 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void PostProcess(const Parameter& parameter, const NDArrayViewPtr& gradientValue, size_t actualMBSize) const;
|
||||
|
||||
// Returns an NDArrayView with the required shape, with the same data type as parameter value
|
||||
// and allocated on the same device.
|
||||
static NDArrayViewPtr AllocateNDArrayView(const Parameter& parameter, const NDShape& shape);
|
||||
|
||||
// Retrieves the shape of the matrix corresponding to the parameter value.
|
||||
static NDShape GetMatrixShape(const Parameter& parameter);
|
||||
|
||||
size_t m_sampleCount;
|
||||
size_t m_minibatchCount;
|
||||
|
||||
private:
|
||||
// Templatized update function, it invokes preprocess and postprocess using the provided
|
||||
// template parameter and also invokes virtual Update method implemented in one of the subclasses.
|
||||
|
@ -101,18 +95,20 @@ namespace CNTK
|
|||
static bool HasNan(const NDArrayViewPtr& value, const char* name);
|
||||
static void Print(const NDArrayViewPtr& value, const char* msg);
|
||||
|
||||
size_t m_sampleCount;
|
||||
static const size_t checkpointVersion = 1;
|
||||
};
|
||||
|
||||
// Vanilla gradient descent optimization algorithm.
|
||||
class LearnerSGD : public LearnerBase
|
||||
{
|
||||
public:
|
||||
LearnerSGD(const std::unordered_set<Parameter>& parameters, double learningRatePerSample = 0)
|
||||
: LearnerBase(parameters), m_momentumPerSample(0.0), m_useNesterovAcceleration(false)
|
||||
{
|
||||
SetLearningRate(learningRatePerSample);
|
||||
}
|
||||
LearnerSGD(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool allocateSmoothGradients = true)
|
||||
: LearnerBase(parameters, learningRates, allocateSmoothGradients),
|
||||
m_momentums(0.0),
|
||||
m_useNesterovAcceleration(false)
|
||||
{ }
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -121,7 +117,8 @@ namespace CNTK
|
|||
template <typename ElementType>
|
||||
void Update(const Parameter& parameter, const NDArrayViewPtr& gradientValue, const NDArrayViewPtr& smoothedGradientValue, size_t trainingSampleCount) const;
|
||||
|
||||
double m_momentumPerSample;
|
||||
// TODO: Move m_momentums to LearnerMomentumSGD as soon as NormalGrad is refactored.
|
||||
MomentumsPerSample m_momentums;
|
||||
bool m_useNesterovAcceleration;
|
||||
};
|
||||
|
||||
|
@ -129,20 +126,25 @@ namespace CNTK
|
|||
class LearnerMomentumSGD : public LearnerSGD
|
||||
{
|
||||
public:
|
||||
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters)
|
||||
: LearnerSGD(parameters)
|
||||
{}
|
||||
|
||||
void SetMomentum(double value) { m_momentumPerSample = value; }
|
||||
LearnerMomentumSGD(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums,
|
||||
bool allocateSmoothGradients = true)
|
||||
: LearnerSGD(parameters, learningRates, allocateSmoothGradients)
|
||||
{
|
||||
m_momentums = momentums;
|
||||
}
|
||||
};
|
||||
|
||||
// Nesterov's accelerated SGDLearnerBase descent.
|
||||
class LearnerNesterov : public LearnerSGD
|
||||
class LearnerNesterov : public LearnerMomentumSGD
|
||||
{
|
||||
public:
|
||||
|
||||
LearnerNesterov(const std::unordered_set<Parameter>& parameters)
|
||||
: LearnerSGD(parameters)
|
||||
LearnerNesterov(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums)
|
||||
: LearnerMomentumSGD(parameters, learningRates, momentums)
|
||||
{
|
||||
m_useNesterovAcceleration = true;
|
||||
}
|
||||
|
@ -152,7 +154,9 @@ namespace CNTK
|
|||
{
|
||||
public:
|
||||
|
||||
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters, bool needAveMultiplier);
|
||||
LearnerAdaGrad(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
bool needAveMultiplier);
|
||||
|
||||
protected:
|
||||
bool m_needAveMultiplier;
|
||||
|
@ -167,7 +171,9 @@ namespace CNTK
|
|||
{
|
||||
public:
|
||||
|
||||
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters);
|
||||
LearnerFSAdaGrad(const std::unordered_set<Parameter>& parameters,
|
||||
const LearningRatesPerSample& learningRates,
|
||||
const MomentumsPerSample& momentums);
|
||||
|
||||
protected:
|
||||
|
||||
|
@ -182,7 +188,9 @@ namespace CNTK
|
|||
public:
|
||||
|
||||
LearnerRMSProp(const std::unordered_set<Parameter>& parameters,
|
||||
double gamma, double inc, double dec, double max, double min, bool needAveMultiplier);
|
||||
const LearningRatesPerSample& learningRates,
|
||||
double gamma, double inc, double dec, double max, double min,
|
||||
bool needAveMultiplier);
|
||||
|
||||
protected:
|
||||
|
||||
|
|
|
@ -49,10 +49,12 @@ namespace CNTK
|
|||
m_streamInfos.insert({ streamDesc->m_name, streamDesc->m_id, AsStorageFormat(streamDesc->m_storageType), AsDataType(streamDesc->m_elementType), AsNDShape(*(streamDesc->m_sampleLayout)) });
|
||||
}
|
||||
|
||||
/*virtual*/ std::unordered_map<StreamInfo, MinibatchData> CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/
|
||||
/*virtual*/ const std::unordered_map<StreamInfo, MinibatchData>&
|
||||
CompositeMinibatchSource::GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device /*= DeviceDescriptor::DefaultDevice()*/) /*override*/
|
||||
{
|
||||
std::unordered_map<StreamInfo, MinibatchData> minibatchData;
|
||||
m_minibatchData.clear();
|
||||
|
||||
if (!m_epochEndReached)
|
||||
{
|
||||
// TODO: Support different minibatch sizes for different streams
|
||||
|
@ -117,7 +119,9 @@ namespace CNTK
|
|||
auto currentStreamMinibatchData = compositeReaderMinibatchData.m_data[i];
|
||||
if (currentStreamDesc->m_elementType == ElementType::tfloat)
|
||||
{
|
||||
auto dataMatrix = std::make_shared<Matrix<float>>(CPUDEVICE);
|
||||
auto CNTKMatrixType = (currentStreamDesc->m_storageType == StorageType::dense) ? DENSE : SPARSE;
|
||||
auto CNTKMatrixFormat = (currentStreamDesc->m_storageType == StorageType::dense) ? matrixFormatDense : matrixFormatSparseCSC;
|
||||
auto dataMatrix = std::make_shared<Matrix<float>>(0, 0, CPUDEVICE, CNTKMatrixType, CNTKMatrixFormat);
|
||||
size_t sampleSize = currentStreamDesc->m_sampleLayout->GetNumElements();
|
||||
|
||||
// TODO: Eliminate the unnecessary CPU to CPU copy
|
||||
|
@ -127,14 +131,14 @@ namespace CNTK
|
|||
size_t numSamples = currentStreamMinibatchData->m_layout->GetActualNumSamples();
|
||||
size_t numSequences = currentStreamMinibatchData->m_layout->GetNumSequences();
|
||||
|
||||
minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
|
||||
m_minibatchData[currentStreamInfo] = { numSequences, numSamples, minibatchValuePtr };
|
||||
}
|
||||
else
|
||||
LogicError("Input data of type other than DataType::Float is currently unsupported by the CNTK built-in composite MinibatchSource!");
|
||||
}
|
||||
}
|
||||
|
||||
return minibatchData;
|
||||
return m_minibatchData;
|
||||
}
|
||||
|
||||
void ComputeInputPerDimMeansAndInvStdDevs(const MinibatchSourcePtr& minibatchSource,
|
||||
|
|
|
@ -19,8 +19,8 @@ namespace CNTK
|
|||
|
||||
virtual const std::unordered_set<StreamInfo>& StreamInfos() override { return m_streamInfos; }
|
||||
|
||||
virtual std::unordered_map<StreamInfo, MinibatchData> GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override;
|
||||
virtual const std::unordered_map<StreamInfo, MinibatchData>& GetNextMinibatch(const std::unordered_map<StreamInfo, std::pair<size_t, size_t>>& perStreamMBSizeLimits,
|
||||
const DeviceDescriptor& device = DeviceDescriptor::DefaultDevice()) override;
|
||||
|
||||
private:
|
||||
std::unordered_set<StreamInfo> m_streamInfos;
|
||||
|
@ -28,5 +28,6 @@ namespace CNTK
|
|||
bool m_epochEndReached;
|
||||
size_t m_prevMinibatchSize;
|
||||
size_t m_epochSize;
|
||||
std::unordered_map<StreamInfo, MinibatchData> m_minibatchData;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -61,11 +61,12 @@ namespace CNTK
|
|||
LogicError("The gradient value for a Parameter cannot have an associated mask!");
|
||||
}
|
||||
|
||||
auto trainingLossArguments = m_trainingLossVar.Owner()->Arguments();
|
||||
auto labelsVar = *(std::find_if(trainingLossArguments.begin(), trainingLossArguments.end(), [](const Variable& var) {
|
||||
return var.IsInput();
|
||||
}));
|
||||
auto argumentValue = arguments.at(labelsVar);
|
||||
auto trainingLossArgument = *(m_trainingLossVar.Owner()->Arguments().begin());
|
||||
|
||||
// Find the argument whose dynamic axes match the criterion operation's dynamic axes (i.e. label dynamic axes)
|
||||
auto argumentValue = std::find_if(arguments.begin(), arguments.end(), [trainingLossArgument](const std::pair<Variable, ValuePtr>& currentPair) {
|
||||
return (currentPair.first.DynamicAxes() == trainingLossArgument.DynamicAxes());
|
||||
})->second;
|
||||
auto argumentData = argumentValue->Data();
|
||||
auto argumentDataShape = argumentData->Shape();
|
||||
auto mask = argumentValue->Mask();
|
||||
|
|
|
@ -6,20 +6,100 @@
|
|||
#include "stdafx.h"
|
||||
#include "CNTKLibrary.h"
|
||||
#include "Utils.h"
|
||||
#include "File.h"
|
||||
#include <istream>
|
||||
#include <ostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace CNTK
|
||||
{
|
||||
// This wrapper redefines operator<< in terms of unformatted (binary) write operation.
|
||||
struct BinaryOStreamWrapper
|
||||
{
|
||||
BinaryOStreamWrapper(ostream& s) : m_stream(s) {}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<std::is_pod<T>::value, BinaryOStreamWrapper&>::type
|
||||
operator<<(const T& value)
|
||||
{
|
||||
m_stream.write(reinterpret_cast<const char*>(&value), sizeof(T));
|
||||
return *this ;
|
||||
}
|
||||
|
||||
BinaryOStreamWrapper& operator<<(const wstring& str)
|
||||
{
|
||||
*this << str.length();
|
||||
m_stream.write(reinterpret_cast<const char*>(str.c_str()), str.length() * sizeof(wchar_t));
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator ostream& () { return m_stream; }
|
||||
|
||||
ostream& m_stream;
|
||||
BinaryOStreamWrapper(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper(BinaryOStreamWrapper&&) = delete; BinaryOStreamWrapper& operator=(const BinaryOStreamWrapper&) = delete; BinaryOStreamWrapper& operator=(BinaryOStreamWrapper&&) = delete;
|
||||
};
|
||||
|
||||
// This wrapper redefines operator>> in terms of unformatted (binary) read operation.
|
||||
struct BinaryIStreamWrapper
|
||||
{
|
||||
BinaryIStreamWrapper(istream& s) : m_stream(s) {}
|
||||
|
||||
template<typename T>
|
||||
typename std::enable_if<std::is_pod<T>::value, BinaryIStreamWrapper&>::type
|
||||
operator>>(T& value)
|
||||
{
|
||||
static_assert(sizeof(T) <= sizeof(size_t), "size_t is the largest supported type.");
|
||||
m_stream.read(buf, sizeof(T));
|
||||
value = *(reinterpret_cast<T*>(buf));
|
||||
return *this ;
|
||||
}
|
||||
|
||||
BinaryIStreamWrapper& operator>>(wstring& str)
|
||||
{
|
||||
size_t length;
|
||||
*this >> length;
|
||||
str.resize(length);
|
||||
for (size_t i = 0; i < length; ++i)
|
||||
{
|
||||
m_stream.read(buf, sizeof(wchar_t));
|
||||
str[i] = *(reinterpret_cast<wchar_t*>(buf));
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator istream& () const { return m_stream ;}
|
||||
|
||||
istream& m_stream;
|
||||
char buf[sizeof(size_t)];
|
||||
BinaryIStreamWrapper(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper(BinaryIStreamWrapper&&) = delete; BinaryIStreamWrapper& operator=(const BinaryIStreamWrapper&) = delete; BinaryIStreamWrapper& operator=(BinaryIStreamWrapper&&) = delete;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
T* CreateDataPtr(const T& value)
|
||||
{
|
||||
return new T(value);
|
||||
}
|
||||
|
||||
template <>
|
||||
NDArrayView* CreateDataPtr<NDArrayView>(const NDArrayView& value)
|
||||
{
|
||||
// TODO: replace this copy with an alias to value.
|
||||
NDArrayView* viewPtr = new NDArrayView(value.GetDataType(), value.Shape(), DeviceDescriptor::CPUDevice());
|
||||
viewPtr->CopyFrom(value);
|
||||
return viewPtr;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DictionaryValue::AllocateDataPtr(const T& value)
|
||||
{
|
||||
static_assert(is_same<T, NDShape>::value ||
|
||||
is_same<T, wstring>::value ||
|
||||
is_same<T, vector<DictionaryValue>>::value ||
|
||||
is_same<T, Dictionary>::value, "AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = new T(value);
|
||||
is_same<T, Dictionary>::value ||
|
||||
is_same<T, NDArrayView>::value,
|
||||
"AllocateDataPtr called with invalid type");
|
||||
m_data.m_ptr = CreateDataPtr<T>(value);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
@ -31,12 +111,163 @@ namespace CNTK
|
|||
m_data.m_ptr = nullptr;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, DictionaryValue& us)
|
||||
template <typename ElementType>
|
||||
bool AreEqual(NDArrayView& view1, NDArrayView& view2)
|
||||
{
|
||||
if (view1.GetDataType() != view2.GetDataType() ||
|
||||
view1.Shape() != view2.Shape())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
ElementType* data1 = nullptr;
|
||||
ElementType* data2 = nullptr;
|
||||
if (view1.Device().Type() == DeviceKind::CPU)
|
||||
{
|
||||
data1 = view1.WritableDataBuffer<ElementType>();
|
||||
data2 = view2.WritableDataBuffer<ElementType>();
|
||||
}
|
||||
else
|
||||
{
|
||||
NDArrayViewPtr temp1CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view1.Shape(), DeviceDescriptor::CPUDevice());
|
||||
temp1CpuDataView->CopyFrom(view1);
|
||||
data1 = temp1CpuDataView->WritableDataBuffer<ElementType>();
|
||||
|
||||
NDArrayViewPtr temp2CpuDataView = MakeSharedObject<NDArrayView>(AsDataType<ElementType>(), view2.Shape(), DeviceDescriptor::CPUDevice());
|
||||
temp2CpuDataView->CopyFrom(view2);
|
||||
data2 = temp2CpuDataView->WritableDataBuffer<ElementType>();
|
||||
}
|
||||
|
||||
size_t numElements = view1.Shape().TotalSize();
|
||||
|
||||
for (size_t i = 0; i < numElements; ++i)
|
||||
{
|
||||
if (data1[i] != data2[i])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictionaryValue::operator==(const DictionaryValue& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_valueType != other.m_valueType)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
switch (m_valueType)
|
||||
{
|
||||
case DictionaryValue::Type::Bool:
|
||||
return (m_data.m_boolean == other.m_data.m_boolean);
|
||||
case DictionaryValue::Type::SizeT:
|
||||
return (m_data.m_sizeT == other.m_data.m_sizeT);
|
||||
case DictionaryValue::Type::Float:
|
||||
return (m_data.m_float == other.m_data.m_float);
|
||||
case DictionaryValue::Type::Double:
|
||||
return (m_data.m_double == other.m_data.m_double);
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* strPtr1 = reinterpret_cast<wstring*>(m_data.m_ptr);
|
||||
wstring* strPtr2 = reinterpret_cast<wstring*>(other.m_data.m_ptr);
|
||||
return (*strPtr1 == *strPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
NDShape* shapePtr1 = reinterpret_cast<NDShape*>(m_data.m_ptr);
|
||||
NDShape* shapePtr2 = reinterpret_cast<NDShape*>(other.m_data.m_ptr);
|
||||
return (*shapePtr1 == *shapePtr2);
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
vector<DictionaryValue>* vectorPtr1 = reinterpret_cast<vector<DictionaryValue>*>(m_data.m_ptr);
|
||||
vector<DictionaryValue>* vectorPtr2 = reinterpret_cast<vector<DictionaryValue>*>(other.m_data.m_ptr);
|
||||
return (*vectorPtr1 == *vectorPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr1 = reinterpret_cast<Dictionary*>(m_data.m_ptr);
|
||||
Dictionary* dictPtr2 = reinterpret_cast<Dictionary*>(other.m_data.m_ptr);
|
||||
return (*dictPtr1 == *dictPtr2);
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
NDArrayView* viewPtr1 = reinterpret_cast<NDArrayView*>(m_data.m_ptr);
|
||||
NDArrayView* viewPtr2 = reinterpret_cast<NDArrayView*>(other.m_data.m_ptr);
|
||||
|
||||
switch (viewPtr1->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return AreEqual<float>(*viewPtr1, *viewPtr2);
|
||||
case DataType::Double:
|
||||
return AreEqual<double>(*viewPtr1, *viewPtr2);
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
default:
|
||||
NOT_IMPLEMENTED;
|
||||
}
|
||||
}
|
||||
|
||||
bool DictionaryValue::operator!=(const DictionaryValue& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
|
||||
BinaryOStreamWrapper& operator<<(BinaryOStreamWrapper& stream, const NDShape& us)
|
||||
{
|
||||
auto size = us.NumAxes();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << us[i];
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Write(BinaryOStreamWrapper& stream, const NDArrayView& view)
|
||||
{
|
||||
assert(view.Device().Type() == DeviceKind::CPU);
|
||||
|
||||
auto numElements = view.Shape().TotalSize();
|
||||
const T* buffer = view.DataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
stream << buffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void Read(BinaryIStreamWrapper& stream, NDArrayView& view)
|
||||
{
|
||||
assert(view.Device().Type() == DeviceKind::CPU);
|
||||
|
||||
auto numElements = view.Shape().TotalSize();
|
||||
T* buffer = view.WritableDataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
stream >> buffer[i];
|
||||
}
|
||||
}
|
||||
|
||||
istream& operator>>(istream& stdStream, DictionaryValue& us)
|
||||
{
|
||||
BinaryIStreamWrapper stream(stdStream);
|
||||
size_t version;
|
||||
stream >> version;
|
||||
|
||||
stream >> us.m_valueType;
|
||||
|
||||
unsigned int type;
|
||||
stream >> type;
|
||||
us.m_valueType = static_cast<DictionaryValue::Type>(type);
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
|
@ -52,28 +283,72 @@ namespace CNTK
|
|||
case DictionaryValue::Type::Double:
|
||||
stream >> us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* strPtr = new wstring();
|
||||
stream >> *strPtr;
|
||||
us.m_data.m_ptr = strPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<size_t> dims(size);
|
||||
NDShape* shapePtr = new NDShape(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> dims[i];
|
||||
stream >> shapePtr->operator[](i);
|
||||
}
|
||||
us.AllocateDataPtr(NDShape(dims));
|
||||
us.m_data.m_ptr = shapePtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
{
|
||||
{
|
||||
size_t size;
|
||||
stream >> size;
|
||||
vector<DictionaryValue> values(size);
|
||||
vector<DictionaryValue>* vectorPtr = new vector<DictionaryValue>(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> values[i];
|
||||
stream >> vectorPtr->at(i);
|
||||
}
|
||||
us.AllocateDataPtr(values);
|
||||
us.m_data.m_ptr = vectorPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr = new Dictionary();
|
||||
stream >> *dictPtr;
|
||||
us.m_data.m_ptr = dictPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
unsigned int type;
|
||||
stream >> type;
|
||||
DataType dtype = static_cast<DataType>(type);
|
||||
|
||||
size_t size;
|
||||
stream >> size;
|
||||
NDShape shape(size);
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream >> shape[i];
|
||||
}
|
||||
|
||||
NDArrayView* viewPtr = new NDArrayView(dtype, shape, DeviceDescriptor::CPUDevice());
|
||||
switch (dtype)
|
||||
{
|
||||
case DataType::Float:
|
||||
Read<float>(stream, *viewPtr);
|
||||
break;
|
||||
case DataType::Double:
|
||||
Read<double>(stream, *viewPtr);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(dtype));
|
||||
}
|
||||
|
||||
us.m_data.m_ptr = viewPtr;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
|
@ -82,11 +357,13 @@ namespace CNTK
|
|||
return stream;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const DictionaryValue& us)
|
||||
ostream& operator<<(ostream& stdStream, const DictionaryValue& us)
|
||||
{
|
||||
BinaryOStreamWrapper stream(stdStream);
|
||||
|
||||
stream << us.version;
|
||||
|
||||
stream << us.ValueType();
|
||||
stream << static_cast<unsigned int>(us.ValueType());
|
||||
|
||||
switch (us.ValueType())
|
||||
{
|
||||
|
@ -102,15 +379,16 @@ namespace CNTK
|
|||
case DictionaryValue::Type::Double:
|
||||
stream << us.m_data.m_double;
|
||||
break;
|
||||
case DictionaryValue::Type::String:
|
||||
{
|
||||
wstring* stringPtr = reinterpret_cast<wstring*>(us.m_data.m_ptr);
|
||||
stream << *stringPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDShape:
|
||||
{
|
||||
NDShape* shapePtr = reinterpret_cast<NDShape*>(us.m_data.m_ptr);
|
||||
auto size = shapePtr->NumAxes();
|
||||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << shapePtr->operator[](i);
|
||||
}
|
||||
stream << *shapePtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Vector:
|
||||
|
@ -121,7 +399,31 @@ namespace CNTK
|
|||
stream << size;
|
||||
for (auto i = 0; i < size; i++)
|
||||
{
|
||||
stream << vectorPtr->operator[](i);
|
||||
stream << vectorPtr->at(i);
|
||||
}
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::Dictionary:
|
||||
{
|
||||
Dictionary* dictPtr = reinterpret_cast<Dictionary*>(us.m_data.m_ptr);
|
||||
stream << *dictPtr;
|
||||
break;
|
||||
}
|
||||
case DictionaryValue::Type::NDArrayView:
|
||||
{
|
||||
NDArrayView* viewPtr = reinterpret_cast<NDArrayView*>(us.m_data.m_ptr);
|
||||
stream << static_cast<unsigned int>(viewPtr->GetDataType());
|
||||
stream << viewPtr->Shape();
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
Write<float>(stream, *viewPtr);
|
||||
break;
|
||||
case DataType::Double:
|
||||
Write<double>(stream, *viewPtr);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -148,7 +450,7 @@ namespace CNTK
|
|||
Dictionary& Dictionary::operator=(const Dictionary& other)
|
||||
{
|
||||
assert(this != &other);
|
||||
m_dictionaryData.reset(new std::unordered_map<std::wstring, DictionaryValue>(*(other.m_dictionaryData)));
|
||||
m_dictionaryData.reset(new unordered_map<wstring, DictionaryValue>(*(other.m_dictionaryData)));
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -183,20 +485,51 @@ namespace CNTK
|
|||
return (m_dictionaryData->find(key) != m_dictionaryData->end());
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator<<(Microsoft::MSR::CNTK::File& stream, const Dictionary& us)
|
||||
bool Dictionary::operator==(const Dictionary& other) const
|
||||
{
|
||||
if (this == &other)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if (m_dictionaryData->size() != other.m_dictionaryData->size())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto& kv : *m_dictionaryData)
|
||||
{
|
||||
auto result = other.m_dictionaryData->find(kv.first);
|
||||
if (result == other.m_dictionaryData->end() || kv.second != result->second)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Dictionary::operator!=(const Dictionary& other) const
|
||||
{
|
||||
return !(*this == other);
|
||||
}
|
||||
|
||||
ostream& operator<<(ostream& stdStream, const Dictionary& us)
|
||||
{
|
||||
BinaryOStreamWrapper stream(stdStream);
|
||||
stream << us.version;
|
||||
stream << us.m_dictionaryData->size();
|
||||
for (auto it = us.m_dictionaryData->begin(); it != us.m_dictionaryData->end(); ++it)
|
||||
for (auto& kv : *(us.m_dictionaryData))
|
||||
{
|
||||
stream << it->first;
|
||||
stream << it->second;
|
||||
stream << kv.first;
|
||||
stream << kv.second;
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
Microsoft::MSR::CNTK::File& operator>>(Microsoft::MSR::CNTK::File& stream, Dictionary& us)
|
||||
istream& operator>>(istream& stdStream, Dictionary& us)
|
||||
{
|
||||
BinaryIStreamWrapper stream(stdStream);
|
||||
size_t version;
|
||||
stream >> version;
|
||||
size_t size;
|
||||
|
@ -206,113 +539,36 @@ namespace CNTK
|
|||
{
|
||||
wstring key;
|
||||
stream >> key;
|
||||
DictionaryValue value;
|
||||
stream >> value;
|
||||
us.m_dictionaryData->insert(make_pair(key, value));
|
||||
stream >> us[key];
|
||||
}
|
||||
return stream;
|
||||
}
|
||||
|
||||
// Returns the element whose key is greater than the required sample count
|
||||
// or the last element if no such key exists.
|
||||
template <typename T>
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
const T& TrainingParameterSchedule<T>::operator[](size_t sampleCount) const
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
assert(m_schedule.size() > 0);
|
||||
auto it = m_schedule.upper_bound(sampleCount);
|
||||
if (it == m_schedule.end())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be serialized into a vector.");
|
||||
--it;
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
vector<DictionaryValue> values(numElements);
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
cpuDataViewPtr->CopyFrom(*viewPtr);
|
||||
}
|
||||
|
||||
const T* buffer = cpuDataViewPtr->DataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
T v = buffer[i];
|
||||
values[i] = DictionaryValue(v);
|
||||
}
|
||||
|
||||
return values;
|
||||
return it->second;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
if (viewPtr->IsSparse())
|
||||
{
|
||||
LogicError("Sparse NDArrayView cannot be deserialized from a vector.");
|
||||
}
|
||||
|
||||
auto numElements = viewPtr->Shape().TotalSize();
|
||||
|
||||
if (values.size() != numElements)
|
||||
{
|
||||
LogicError("Number of elements (%lu) in the deserialized representation does not match the expected value (%lu)",
|
||||
values.size(), numElements);
|
||||
}
|
||||
|
||||
NDArrayViewPtr cpuDataViewPtr = viewPtr;
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
cpuDataViewPtr = MakeSharedObject<NDArrayView>(viewPtr->GetDataType(), viewPtr->Shape(), DeviceDescriptor::CPUDevice());
|
||||
}
|
||||
|
||||
T* buffer = cpuDataViewPtr->WritableDataBuffer<T>();
|
||||
for (auto i = 0; i < numElements; ++i)
|
||||
{
|
||||
buffer[i] = values[i].GetValue<T>();
|
||||
}
|
||||
|
||||
if ((viewPtr->Device().Type() != DeviceKind::CPU))
|
||||
{
|
||||
viewPtr->CopyFrom(*cpuDataViewPtr);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: we store the type info for every element in the vector, which is extremely redundant.
|
||||
// Instead, it'd be nice to introduce some sort of DictionaryValueVector.
|
||||
vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
return SerializeToVector<float>(viewPtr);
|
||||
case DataType::Double:
|
||||
return SerializeToVector<double>(viewPtr);
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const vector<DictionaryValue>& values)
|
||||
{
|
||||
switch (viewPtr->GetDataType())
|
||||
{
|
||||
case DataType::Float:
|
||||
DeserializeFromVector<float>(viewPtr, values);
|
||||
break;
|
||||
case DataType::Double:
|
||||
DeserializeFromVector<double>(viewPtr, values);
|
||||
break;
|
||||
default:
|
||||
LogicError("Unsupported DataType %s", DataTypeName(viewPtr->GetDataType()));
|
||||
}
|
||||
}
|
||||
|
||||
template void DictionaryValue::AllocateDataPtr<NDShape>(const NDShape& value);
|
||||
template void DictionaryValue::AllocateDataPtr<vector<DictionaryValue>>(const vector<DictionaryValue>& value);
|
||||
template void DictionaryValue::AllocateDataPtr<wstring>(const wstring& value);
|
||||
template void DictionaryValue::AllocateDataPtr<Dictionary>(const Dictionary& value);
|
||||
template void DictionaryValue::AllocateDataPtr<NDArrayView>(const NDArrayView& value);
|
||||
|
||||
template void DictionaryValue::FreePtrAsType<NDShape>();
|
||||
template void DictionaryValue::FreePtrAsType<vector<DictionaryValue>>();
|
||||
template void DictionaryValue::FreePtrAsType<wstring>();
|
||||
template void DictionaryValue::FreePtrAsType<Dictionary>();
|
||||
template void DictionaryValue::FreePtrAsType<NDArrayView>();
|
||||
|
||||
template const double& TrainingParameterSchedule<double>::operator[](size_t key) const;
|
||||
}
|
||||
|
|
|
@ -167,9 +167,6 @@ namespace CNTK
|
|||
return var.IsInput() && var.IsSparse();
|
||||
}
|
||||
|
||||
std::vector<DictionaryValue> SerializeToVector(const NDArrayViewPtr& viewPtr);
|
||||
|
||||
void DeserializeFromVector(const NDArrayViewPtr& viewPtr, const std::vector<DictionaryValue>& values);
|
||||
|
||||
inline void AddIndentation(std::wstringstream& s, size_t numIndentationSpaces)
|
||||
{
|
||||
|
@ -250,7 +247,8 @@ namespace CNTK
|
|||
static_assert(std::is_same<T, bool>::value ||
|
||||
std::is_same<T, size_t>::value ||
|
||||
std::is_same<T, float>::value ||
|
||||
std::is_same<T, double>::value, "Unsupported ValueType");
|
||||
std::is_same<T, double>::value ||
|
||||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
|
||||
|
||||
std::vector<DictionaryValue> dictionaryValueVector;
|
||||
for (auto value : basicElementTypeVector)
|
||||
|
@ -265,7 +263,8 @@ namespace CNTK
|
|||
static_assert(std::is_same<T, bool>::value ||
|
||||
std::is_same<T, size_t>::value ||
|
||||
std::is_same<T, float>::value ||
|
||||
std::is_same<T, double>::value, "Unsupported ValueType");
|
||||
std::is_same<T, double>::value ||
|
||||
std::is_same<T, std::wstring>::value, "Unsupported ValueType");
|
||||
|
||||
std::vector<T> basicElementTypeVector;
|
||||
for (auto value : dictionaryValueVector)
|
||||
|
@ -313,4 +312,19 @@ namespace CNTK
|
|||
|
||||
return{ paddedOutputMapCount, kernelShape };
|
||||
}
|
||||
|
||||
inline CNTK::Constant ScalarConstant(CNTK::DataType dataType, float value, const CNTK::DeviceDescriptor& device = CNTK::DeviceDescriptor::CPUDevice())
|
||||
{
|
||||
if (dataType == CNTK::DataType::Float)
|
||||
return CNTK::Constant({}, value, device);
|
||||
else if (dataType == CNTK::DataType::Double)
|
||||
return CNTK::Constant({}, (double)value, device);
|
||||
else
|
||||
LogicError("CNTK::ScalarConstant: Unsupported DataType %s", DataTypeName(dataType));
|
||||
}
|
||||
|
||||
inline double MomentumPerMB(double momentumPerSample, size_t minibatchSize)
|
||||
{
|
||||
return std::pow(momentumPerSample, minibatchSize);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -7,6 +7,8 @@
|
|||
|
||||
namespace CNTK
|
||||
{
|
||||
/*static*/ const std::vector<Axis> Variable::s_defaultInputVariableDynamicAxes = { Axis::DefaultDynamicAxis(), Axis::DefaultBatchAxis() };
|
||||
|
||||
Variable::Variable(const FunctionPtr& function)
|
||||
: Variable(function->Output())
|
||||
{
|
||||
|
|
|
@ -11,6 +11,13 @@
|
|||
#define __UNIX__
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// TODO: thread_local is supported in VS2015. Remove this macro when we uprade to VS2015
|
||||
#define THREAD_LOCAL __declspec(thread)
|
||||
#else
|
||||
#define THREAD_LOCAL thread_local
|
||||
#endif
|
||||
|
||||
// ===========================================================================
|
||||
// compiler differences
|
||||
// ===========================================================================
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include <stdio.h>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <random>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -24,6 +25,31 @@ static inline size_t rand(const size_t begin, const size_t end)
|
|||
return begin + randno % (end - begin);
|
||||
}
|
||||
|
||||
// Rand based on Mersenne Twister.
|
||||
// We use our own distribution in order to match baselines between different operating systems,
|
||||
// because uniform_distribution is not guranteed to provide the same numbers on different platforms.
|
||||
// TODO: Switching to Boost would eliminate this problem.
|
||||
static inline size_t RandMT(const size_t begin, const size_t end, std::mt19937_64& rng)
|
||||
{
|
||||
const size_t randomNumber = rng();
|
||||
return begin + randomNumber % (end - begin);
|
||||
}
|
||||
|
||||
// Rand based on Mersenne Twister.
|
||||
// We use our own distribution in order to match baselines between different operating systems,
|
||||
// instead of using std::shuffle which uses unitform_distribution internally.
|
||||
// TODO: Switching to Boost would eliminate this problem.
|
||||
template <typename TVector>
|
||||
inline void RandomShuffleMT(TVector& v, std::mt19937_64& rng)
|
||||
{
|
||||
foreach_index(currentLocation, v)
|
||||
{
|
||||
// Pick a random location a location and swap with current
|
||||
const size_t randomLocation = RandMT(0, v.size(), rng);
|
||||
std::swap(v[currentLocation], v[randomLocation]);
|
||||
}
|
||||
}
|
||||
|
||||
class RandomOrdering // note: NOT thread-safe at all
|
||||
{
|
||||
// constants for randomization
|
||||
|
|
|
@ -258,13 +258,20 @@ public:
|
|||
m_evalOrders[rootNode] = nodes;
|
||||
}
|
||||
|
||||
bool EvalOrderExists(const ComputationNodeBasePtr& rootNode) const
|
||||
{
|
||||
return m_evalOrders.find(rootNode) != m_evalOrders.end();
|
||||
}
|
||||
|
||||
// get depth-first traversal order
|
||||
// TODO: This is currently not immutable because it gets patched w.r.t. recurrent loops. Ideally we don't patch. Need to review and verify that it is sufficient.
|
||||
const std::list<ComputationNodeBasePtr>& GetEvalOrder(const ComputationNodeBasePtr& rootNode) const
|
||||
{
|
||||
auto iter = m_evalOrders.find(rootNode);
|
||||
if (iter == m_evalOrders.end())
|
||||
{
|
||||
LogicError("GetEvalOrder: Called without prior call to FormEvalOrder() for %ls %ls operation", rootNode->NodeName().c_str(), rootNode->OperationName().c_str());
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
|
|
|
@ -76,6 +76,9 @@ void ComputationNetwork::CopySubTree(const ComputationNetwork& fromNet,
|
|||
|
||||
ComputationNodeBasePtr fromRoot = fromNet.GetNodeFromName(fromName);
|
||||
|
||||
if (!fromNet.EvalOrderExists(fromRoot))
|
||||
const_cast<ComputationNetwork&>(fromNet).FormEvalOrder(fromRoot);
|
||||
|
||||
for (const auto& fromNode : fromNet.GetEvalOrder(fromRoot)) // BUGBUG: This probably will fail because the precomputed eval orders are invalid at this point.
|
||||
{
|
||||
wstring fromNodeName = fromNode->NodeName();
|
||||
|
@ -353,6 +356,9 @@ void ComputationNetwork::SetLearnableNodesBelowLearningRateMultiplier(const floa
|
|||
else
|
||||
{
|
||||
// for calculating a specific node
|
||||
if (!EvalOrderExists(rootNode))
|
||||
const_cast<ComputationNetwork&>(*this).FormEvalOrder(rootNode);
|
||||
|
||||
for (const auto& node : GetAllNodesForRoot(rootNode))
|
||||
{
|
||||
if (node->OperationName() == OperationNameOf(LearnableParameter))
|
||||
|
|
|
@ -32,15 +32,16 @@
|
|||
#define CNTK_MODEL_VERSION_1 1
|
||||
#define CNTK_MODEL_VERSION_2 2
|
||||
#define CNTK_MODEL_VERSION_3 3
|
||||
#define CNTK_MODEL_VERSION_4 4 // PastValue
|
||||
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
|
||||
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
|
||||
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
|
||||
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
|
||||
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
|
||||
#define CNTK_MODEL_VERSION_4 4 // PastValue
|
||||
#define CNTK_MODEL_VERSION_5 5 // ND convolution and pooling
|
||||
#define CNTK_MODEL_VERSION_6 6 // batch-norm blending
|
||||
#define CNTK_MODEL_VERSION_7 7 // ElemType tag in model file
|
||||
#define CNTK_MODEL_VERSION_8 8 // DynamicAxis for inputs
|
||||
#define CNTK_MODEL_VERSION_9 9 // transpose flag in ConvolutionNode to support deconvolution
|
||||
#define CNTK_MODEL_VERSION_10 10 // learning-rate multiplier for input nodes
|
||||
#define CNTK_MODEL_VERSION_11 11 // Times() m_inputRank to support parameter-rank inference
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_11
|
||||
#define CNTK_MODEL_VERSION_11 11 // dynamic axis name for where nodes
|
||||
#define CNTK_MODEL_VERSION_12 12 // Times() m_inputRank to support parameter-rank inference
|
||||
#define CURRENT_CNTK_MODEL_VERSION CNTK_MODEL_VERSION_12
|
||||
|
||||
extern bool g_shareNodeValueMatrices;
|
||||
|
||||
|
|
|
@ -365,6 +365,7 @@ public:
|
|||
TensorShape outputShape;
|
||||
// If 2D convolution syntax is used then some of the tensor dimensions need to be inferred.
|
||||
if (m_convolution2D)
|
||||
// NOTE: when m_convolution2D is true, it's a legacy branch. Code should not enter here any more.
|
||||
{
|
||||
// Need to update some tensors with correct input dims.
|
||||
auto inDims = ImageDimensions(GetInputSampleLayout(inputIdx), m_imageLayout);
|
||||
|
@ -396,6 +397,8 @@ public:
|
|||
|
||||
outputShape = ConvolveGeometry::ComputeOutputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
|
||||
// ConvolveGeometry always uses CHW.
|
||||
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -414,9 +417,12 @@ public:
|
|||
outputShape = ConvolveGeometry::ComputeInputShape(inputShape, m_kernelShape, m_mapCount, m_stride,
|
||||
m_sharing, m_autoPad, m_lowerPad, m_upperPad);
|
||||
}
|
||||
|
||||
if (m_imageLayout == ImageLayoutKind::CHW)
|
||||
SetDims(outputShape, HasMBLayout());
|
||||
else // legacy format
|
||||
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
|
||||
}
|
||||
// ConvolveGeometry always uses CHW.
|
||||
SetDims(ImageDimensions(outputShape, ImageLayoutKind::CHW).AsTensorShape(m_imageLayout), HasMBLayout());
|
||||
|
||||
// update LearnableParameter if it has 0 dimensions (to be inferred)
|
||||
// Typically this would be the #inputChannels (C).
|
||||
|
|
|
@ -450,9 +450,9 @@ public:
|
|||
assert(dimsA.size() == m_outputRank + numReductionDims);
|
||||
while (numReductionDims < inputRank)
|
||||
{
|
||||
dimsA.push_back(0);
|
||||
numReductionDims++;
|
||||
}
|
||||
dimsA.push_back(0);
|
||||
numReductionDims++;
|
||||
}
|
||||
}
|
||||
|
||||
// fill in the missing ones
|
||||
|
@ -561,8 +561,8 @@ class TransposeTimesNode : public TimesNodeBase<ElemType, true>
|
|||
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(TransposeTimesNode);
|
||||
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name)
|
||||
: Base(deviceId, name, /*outputRank=*/1, /*inputRank=*/1)
|
||||
TransposeTimesNode(DEVICEID_TYPE deviceId, const wstring& name, size_t outputRank = 1)
|
||||
: Base(deviceId, name, outputRank, /*inputRank=*/1)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
@ -665,6 +665,9 @@ public:
|
|||
m_axis1 = 1, m_axis2 = 2; // default
|
||||
}
|
||||
|
||||
int Axis1() const { return m_axis1; }
|
||||
int Axis2() const { return m_axis2; }
|
||||
|
||||
private:
|
||||
// compute the transposed tensor shape (in-place)
|
||||
void TransposeShape(TensorShape& shape) const
|
||||
|
|
|
@ -300,7 +300,7 @@ template <class ElemType>
|
|||
if (!m_pMBLayout)
|
||||
{
|
||||
m_pMBLayout = make_shared<MBLayout>(); // this generates a new layout
|
||||
m_pMBLayout->SetUniqueAxisName(L"WhereNodeAxis");
|
||||
m_pMBLayout->SetUniqueAxisName(m_dynamicAxisName);
|
||||
}
|
||||
// we map scalars to scalars
|
||||
if (isFinalValidationPass && Input(0)->GetSampleLayout().GetNumElements() != 1)
|
||||
|
|
|
@ -217,6 +217,9 @@ public:
|
|||
virtual bool /*ComputationNodeBase::*/ InputUsedInComputingInputNodesGradients(size_t childIndex) const override;
|
||||
virtual void /*ComputationNodeBase::*/ Validate(bool isFinalValidationPass) override;
|
||||
|
||||
std::wstring ReductionOpName() const { return m_operation; }
|
||||
int ReductionAxis() const { return m_axis; }
|
||||
|
||||
private:
|
||||
// operation attributes
|
||||
int m_axis;
|
||||
|
@ -341,11 +344,12 @@ public:
|
|||
fstream << m_axis;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
// these implement numpy-style negative bound values to index from the end
|
||||
size_t BeginIndex() const { return m_beginIndex >= 0 ? (size_t)m_beginIndex : (size_t)(m_beginIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
|
||||
size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
|
||||
size_t EndIndex() const { return m_endIndex > 0 ? (size_t)m_endIndex : (size_t)(m_endIndex + Input(0)->GetSampleLayout()[m_axis - 1]); }
|
||||
int Axis() const { return m_axis; }
|
||||
|
||||
private:
|
||||
|
||||
// determine the tensor shape that represents slice of the input that we are taking
|
||||
TensorShape GetInputSlice(size_t rank, const FrameRange & fr) const
|
||||
|
@ -655,10 +659,11 @@ class WhereNode : public ComputationNodeNonLooping<ElemType>, public NumInputs<1
|
|||
typedef ComputationNodeNonLooping<ElemType> Base; UsingComputationNodeMembersBoilerplate;
|
||||
static const std::wstring TypeName() { return L"Where"; }
|
||||
|
||||
static const std::wstring DefaultWhereNodeDynamicAxisName() { return L"WhereNodeAxis"; }
|
||||
public:
|
||||
DeclareConstructorFromConfigWithNumInputs(WhereNode);
|
||||
WhereNode(DEVICEID_TYPE deviceId, const wstring& name) :
|
||||
Base(deviceId, name)
|
||||
WhereNode(DEVICEID_TYPE deviceId, const wstring& name, const wstring& dynamicAxisName = DefaultWhereNodeDynamicAxisName()) :
|
||||
Base(deviceId, name), m_dynamicAxisName(dynamicAxisName)
|
||||
{
|
||||
MarkValueNonSharable();
|
||||
}
|
||||
|
@ -669,11 +674,29 @@ public:
|
|||
virtual bool InputUsedInComputingInputNodesGradients(size_t /*childIndex*/) const override { return false; }
|
||||
virtual void Validate(bool isFinalValidationPass) override;
|
||||
|
||||
virtual void Load(File& fstream, size_t modelVersion) override
|
||||
{
|
||||
Base::Load(fstream, modelVersion);
|
||||
if (modelVersion >= CNTK_MODEL_VERSION_11)
|
||||
fstream >> m_dynamicAxisName;
|
||||
else
|
||||
m_dynamicAxisName = DefaultWhereNodeDynamicAxisName();
|
||||
}
|
||||
|
||||
virtual void Save(File& fstream) const override
|
||||
{
|
||||
Base::Save(fstream);
|
||||
fstream << m_dynamicAxisName;
|
||||
}
|
||||
|
||||
std::wstring DynamicAxisName() const { return m_dynamicAxisName; }
|
||||
|
||||
private:
|
||||
// buffers for creating the result sequences (kept as object state to avoid memory allocations)
|
||||
std::vector<std::vector<size_t>> m_indexSequenceBuffer; // [sequenceIndex][t] for creating the result sequences
|
||||
std::vector<size_t> m_rowAllocationsBuffer; // [row] for determining new MBLayout packing
|
||||
std::vector<std::pair<size_t, size_t>> m_placementBuffer; // [sequenceIndex] assigned location for a sequence
|
||||
std::wstring m_dynamicAxisName;
|
||||
};
|
||||
|
||||
// -----------------------------------------------------------------------
|
||||
|
|
|
@ -37,14 +37,8 @@
|
|||
#pragma warning(disable : 4244) // unreachable code; triggered for unknown reasons
|
||||
#pragma warning(disable : 4702) // conversion from 'double' to 'float'
|
||||
|
||||
#ifdef USE_ACML
|
||||
// Download ACML 5.3.1 (e.g., acml5.3.1-ifort64.exe) or above
|
||||
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
|
||||
// Install the ifort64_mp variant (compiled with intel compiler) of the library
|
||||
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.1\ifort64_mp or the folder you installed acml
|
||||
// to point to your folder for the include file and link library
|
||||
#include <acml.h> // requires ACML 5.3.1 and above
|
||||
#elif defined(USE_MKL)
|
||||
|
||||
#ifdef USE_MKL
|
||||
// requires MKL 10.0 and above
|
||||
#include <mkl.h>
|
||||
#else
|
||||
|
@ -57,12 +51,6 @@
|
|||
#include <lapacke.h>
|
||||
#endif
|
||||
|
||||
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
|
||||
#define BLAS_COLMAJOR
|
||||
#else
|
||||
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
|
||||
#endif
|
||||
|
||||
#define SWAP(a, b) \
|
||||
{ \
|
||||
(a) ^= (b); \
|
||||
|
@ -912,11 +900,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
|
|||
#pragma omp parallel for
|
||||
foreach_column (j, us)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
|
||||
#else
|
||||
cblas_dcopy((int) numRows, reinterpret_cast<double*>(pArray + j), (int) numCols, reinterpret_cast<double*>(bufPtr + LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -926,11 +910,7 @@ void CPUMatrix<ElemType>::SetValue(const size_t numRows, const size_t numCols, E
|
|||
{
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
|
||||
#else
|
||||
cblas_scopy((int) numRows, reinterpret_cast<float*>(pArray + j), (int) numCols, reinterpret_cast<float*>(bufPtr + LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2844,20 +2824,12 @@ ElemType CPUMatrix<ElemType>::SumOfAbsElements() const
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
return (ElemType) dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
|
||||
#else
|
||||
return (ElemType) cblas_dasum((int) GetNumElements(), reinterpret_cast<double*>(Data()), 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
return sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
|
||||
#else
|
||||
return cblas_sasum((int) GetNumElements(), reinterpret_cast<float*>(Data()), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3028,11 +3000,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
|
|||
#pragma omp parallel for
|
||||
foreach_column (j, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = (ElemType) dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = (ElemType) cblas_dnrm2(m, reinterpret_cast<double*>(bufPtr + us.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -3041,11 +3009,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
|
|||
foreach_column (j, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = cblas_snrm2(m, reinterpret_cast<float*>(bufPtr + us.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -3058,11 +3022,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
|
|||
#pragma omp parallel for
|
||||
foreach_row (i, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = cblas_dnrm2(n, reinterpret_cast<double*>(bufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -3071,11 +3031,7 @@ void CPUMatrix<ElemType>::VectorNorm2(CPUMatrix<ElemType>& c, const bool isColWi
|
|||
foreach_row (i, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = cblas_snrm2(n, reinterpret_cast<float*>(bufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4486,34 +4442,22 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
|
|||
|
||||
int m, n, k, l;
|
||||
int lda, ldb, ldc;
|
||||
#ifdef USE_ACML
|
||||
char transA, transB;
|
||||
#else
|
||||
CBLAS_TRANSPOSE mklTransA;
|
||||
CBLAS_TRANSPOSE mklTransB;
|
||||
#endif
|
||||
|
||||
if (transposeA)
|
||||
{
|
||||
m = (int) a.GetNumCols();
|
||||
k = (int) a.GetNumRows();
|
||||
lda = k;
|
||||
#ifdef USE_ACML
|
||||
transA = (char) MatrixTranspose::Trans;
|
||||
#else
|
||||
mklTransA = CBLAS_TRANSPOSE::CblasTrans;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
m = (int) a.GetNumRows();
|
||||
k = (int) a.GetNumCols();
|
||||
lda = m;
|
||||
#ifdef USE_ACML
|
||||
transA = (char) MatrixTranspose::NoTrans;
|
||||
#else
|
||||
mklTransA = CBLAS_TRANSPOSE::CblasNoTrans;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (transposeB)
|
||||
|
@ -4521,22 +4465,14 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
|
|||
l = (int) b.GetNumCols();
|
||||
n = (int) b.GetNumRows();
|
||||
ldb = n;
|
||||
#ifdef USE_ACML
|
||||
transB = (char) MatrixTranspose::Trans;
|
||||
#else
|
||||
mklTransB = CBLAS_TRANSPOSE::CblasTrans;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
l = (int) b.GetNumRows();
|
||||
n = (int) b.GetNumCols();
|
||||
ldb = l;
|
||||
#ifdef USE_ACML
|
||||
transB = (char) MatrixTranspose::NoTrans;
|
||||
#else
|
||||
mklTransB = CBLAS_TRANSPOSE::CblasNoTrans;
|
||||
#endif
|
||||
}
|
||||
|
||||
assert(m > 0 && k > 0 && l > 0 && n > 0); // converting from size_t to int may cause overflow
|
||||
|
@ -4553,20 +4489,12 @@ void CPUMatrix<ElemType>::MultiplyAndWeightedAdd(ElemType alpha, const CPUMatrix
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
dgemm(transA, transB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
|
||||
#else
|
||||
cblas_dgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
|
||||
#endif
|
||||
cblas_dgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<double*>(a.Data()), lda, reinterpret_cast<double*>(b.Data()), ldb, beta, reinterpret_cast<double*>(c.Data()), ldc);
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
sgemm(BLAS_COLMAJOR transA, transB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
|
||||
#else
|
||||
cblas_sgemm((CBLAS_ORDER) BLAS_COLMAJOR mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
|
||||
#endif
|
||||
cblas_sgemm((CBLAS_ORDER) (int)MatrixOrder::ColMajor, mklTransA, mklTransB, m, n, k, alpha, reinterpret_cast<float*>(a.Data()), lda, reinterpret_cast<float*>(b.Data()), ldb, beta, reinterpret_cast<float*>(c.Data()), ldc);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4611,9 +4539,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
dgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &info);
|
||||
#elif defined(USE_MKL)
|
||||
#ifdef USE_MKL
|
||||
double wkopt;
|
||||
int lwork = -1;
|
||||
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
|
||||
|
@ -4622,16 +4548,13 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
|
|||
dgesvd("All", "All", &m, &n, reinterpret_cast<double*>(A.Data()), &lda, reinterpret_cast<double*>(SIGMA.Data()), reinterpret_cast<double*>(U.Data()), &ldu, reinterpret_cast<double*>(VT.Data()), &ldvt, reinterpret_cast<double*>(W.Data()), &lwork, &info);
|
||||
#else
|
||||
std::vector<double> superb(std::max(std::min(m, n) - 1, 1));
|
||||
info = LAPACKE_dgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()),
|
||||
info = LAPACKE_dgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<double*>(A.Data()), (int) lda, reinterpret_cast<double*>(SIGMA.Data()),
|
||||
reinterpret_cast<double*>(U.Data()), (int) ldu, reinterpret_cast<double*>(VT.Data()), (int) ldvt, &superb[0]);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
#pragma warning(suppress : 4244)
|
||||
sgesvd('A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &info);
|
||||
#elif defined(USE_MKL)
|
||||
#ifdef USE_MKL
|
||||
float wkopt;
|
||||
int lwork = -1;
|
||||
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, &wkopt, &lwork, &info);
|
||||
|
@ -4640,7 +4563,7 @@ void CPUMatrix<ElemType>::SVD(const CPUMatrix<ElemType>& A, CPUMatrix<ElemType>&
|
|||
sgesvd("All", "All", &m, &n, reinterpret_cast<float*>(A.Data()), &lda, reinterpret_cast<float*>(SIGMA.Data()), reinterpret_cast<float*>(U.Data()), &ldu, reinterpret_cast<float*>(VT.Data()), &ldvt, reinterpret_cast<float*>(W.Data()), &lwork, &info);
|
||||
#else
|
||||
std::vector<float> superb(std::max(std::min(m, n) - 1, 1));
|
||||
info = LAPACKE_sgesvd(BLAS_COLMAJOR 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()),
|
||||
info = LAPACKE_sgesvd((int) MatrixOrder::ColMajor, 'A', 'A', (int) m, (int) n, reinterpret_cast<float*>(A.Data()), (int) lda, reinterpret_cast<float*>(SIGMA.Data()),
|
||||
reinterpret_cast<float*>(U.Data()), (int) ldu, reinterpret_cast<float*>(VT.Data()), (int) ldvt, &superb[0]);
|
||||
#endif
|
||||
}
|
||||
|
@ -4837,20 +4760,12 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
|
||||
#else
|
||||
cblas_daxpy(len, alpha, reinterpret_cast<double*>(a.Data()), incx, reinterpret_cast<double*>(c.Data()), incy);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
|
||||
#else
|
||||
cblas_saxpy(len, alpha, reinterpret_cast<float*>(a.Data()), incx, reinterpret_cast<float*>(c.Data()), incy);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else if (a.GetNumElements() == 1) // scalar, add to all elements
|
||||
|
@ -4889,11 +4804,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
|
|||
#pragma omp parallel for
|
||||
foreach_column (j, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
|
||||
#else
|
||||
cblas_daxpy(m, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + c.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -4902,11 +4813,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
|
|||
foreach_column (j, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
|
||||
#else
|
||||
cblas_saxpy(m, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + c.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4925,11 +4832,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
|
|||
#pragma omp parallel for
|
||||
foreach_row (i, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
|
||||
#else
|
||||
cblas_daxpy(n, alpha, reinterpret_cast<double*>(aBufPtr), 1, reinterpret_cast<double*>(cBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -4938,11 +4841,7 @@ void CPUMatrix<ElemType>::ScaleAndAdd(ElemType alpha, const CPUMatrix<ElemType>&
|
|||
foreach_row (i, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
|
||||
#else
|
||||
cblas_saxpy(n, alpha, reinterpret_cast<float*>(aBufPtr), 1, reinterpret_cast<float*>(cBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5163,20 +5062,12 @@ template <class ElemType>
|
|||
}
|
||||
else if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx); // TODO: Use overloads.
|
||||
#else
|
||||
cblas_dscal(len, alpha, reinterpret_cast<double*>(a.Data()), incx);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
|
||||
#else
|
||||
cblas_sscal(len, alpha, reinterpret_cast<float*>(a.Data()), incx);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5224,11 +5115,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
|
|||
#pragma omp parallel for
|
||||
foreach_column (j, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -5237,11 +5124,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
|
|||
foreach_column (j, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5256,11 +5139,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
|
|||
#pragma omp parallel for
|
||||
foreach_row (i, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -5269,11 +5148,7 @@ void CPUMatrix<ElemType>::InnerProduct(const CPUMatrix<ElemType>& a, const CPUMa
|
|||
foreach_row (i, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5298,20 +5173,12 @@ ElemType CPUMatrix<ElemType>::InnerProductOfMatrices(const CPUMatrix<ElemType>&
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
return (ElemType) ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
|
||||
#else
|
||||
return (ElemType) cblas_ddot((int) a.GetNumElements(), reinterpret_cast<double*>(a.Data()), 1, reinterpret_cast<double*>(b.Data()), 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
return (ElemType) sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
|
||||
#else
|
||||
return (ElemType) cblas_sdot((int) a.GetNumElements(), reinterpret_cast<float*>(a.Data()), 1, reinterpret_cast<float*>(b.Data()), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -5539,21 +5406,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
|
|||
{
|
||||
for (long j = 0; j < n; j++)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
for (long j = 0; j < n; j++)
|
||||
{
|
||||
for (long i = 1; i < negnumber + 1; i++)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(i, j) = (ElemType) ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
|
||||
#else
|
||||
c(i, j) = (ElemType) cblas_ddot(m, reinterpret_cast<double*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<double*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5561,21 +5420,13 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
|
|||
{
|
||||
for (long j = 0; j < n; j++)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(0, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#else
|
||||
c(0, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn(j)), 1);
|
||||
#endif
|
||||
}
|
||||
for (long j = 0; j < n; j++)
|
||||
{
|
||||
for (long i = 1; i < negnumber + 1; i++)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(i, j) = (ElemType) sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
|
||||
#else
|
||||
c(i, j) = (ElemType) cblas_sdot(m, reinterpret_cast<float*>(aBufPtr + a.LocateColumn(j)), 1, reinterpret_cast<float*>(bBufPtr + b.LocateColumn((j + shift + i - 1) % n)), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5593,11 +5444,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
|
|||
#pragma omp parallel for
|
||||
foreach_row (i, c)
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = (ElemType) ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = (ElemType) cblas_ddot(n, reinterpret_cast<double*>(aBufPtr + i), m, reinterpret_cast<double*>(bBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -5606,11 +5453,7 @@ void CPUMatrix<ElemType>::InnerProductWithShiftNeg(const CPUMatrix<ElemType>& a,
|
|||
foreach_row (i, c)
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
c(i, 0) = sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
|
||||
#else
|
||||
c(i, 0) = cblas_sdot(n, reinterpret_cast<float*>(aBufPtr + i), m, reinterpret_cast<float*>(bBufPtr + i), m);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -6025,13 +5868,11 @@ int CPUMatrix<ElemType>::SetNumThreads(int numThreads)
|
|||
omp_set_num_threads(numThreads);
|
||||
numThreads = omp_get_max_threads();
|
||||
|
||||
#ifdef USE_ACML
|
||||
acmlsetnumthreads(numThreads);
|
||||
#elif defined(USE_MKL)
|
||||
mkl_set_num_threads(numThreads);
|
||||
#elif defined(USE_OPENBLAS)
|
||||
openblas_set_num_threads(numThreads);
|
||||
#endif
|
||||
#ifdef USE_MKL
|
||||
mkl_set_num_threads(numThreads);
|
||||
#elif defined(USE_OPENBLAS)
|
||||
openblas_set_num_threads(numThreads);
|
||||
#endif
|
||||
#endif
|
||||
return numThreads;
|
||||
}
|
||||
|
|
|
@ -23,15 +23,7 @@
|
|||
|
||||
#pragma warning(disable : 4127) // conditional expression is constant; "if (sizeof(ElemType)==sizeof(float))" triggers this
|
||||
|
||||
#ifdef USE_ACML
|
||||
// use ACML as default.
|
||||
// Download ACML 5.3.0 (e.g., acml5.3.0-ifort64.exe) or above
|
||||
// from http://developer.amd.com/tools/cpu-development/amd-core-math-library-acml/acml-downloads-resources/
|
||||
// Install the ifort64 variant (compiled with intel compiler) of the library
|
||||
// Set Environment variable ACML_PATH to C:\AMD\acml5.3.0\ifort64_mp or the folder you installed acml
|
||||
// to point to your folder for the include file and link library
|
||||
#include <acml.h> // requires ACML 5.3.0 and above
|
||||
#elif defined(USE_MKL)
|
||||
#ifdef USE_MKL
|
||||
// requires MKL 10.0 and above
|
||||
#include <mkl.h>
|
||||
#else
|
||||
|
@ -53,12 +45,6 @@
|
|||
// return 42;
|
||||
//}
|
||||
|
||||
#ifdef USE_ACML // MKL has one additional parameter for different matrix order
|
||||
#define BLAS_COLMAJOR
|
||||
#else
|
||||
#define BLAS_COLMAJOR (int) MatrixOrder::ColMajor,
|
||||
#endif
|
||||
|
||||
// TODO: Move to CommonMatrix.h
|
||||
#define IDX2C(i, j, ld) (((j) * (ld)) + (i)) // 0 based indexing
|
||||
|
||||
|
@ -261,11 +247,23 @@ void CPUSparseMatrix<ElemType>::SetValue(const CPUSparseMatrix<ElemType>& v)
|
|||
RequireSizeAndAllocate(v.GetNumRows(), v.GetNumCols(), v.NzSize());
|
||||
let nz = v.NzCount();
|
||||
|
||||
auto matrixFormat = v.GetFormat();
|
||||
if (((matrixFormat == matrixFormatSparseBlockCol) || (matrixFormat == matrixFormatSparseBlockRow)) && (v.GetBlockIdShift() > 0))
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
if (nz > 0)
|
||||
{
|
||||
memcpy(NzValues(), v.NzValues(), v.NzSize());
|
||||
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
|
||||
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
|
||||
|
||||
if ((matrixFormat == matrixFormatSparseCSC) || (matrixFormat == matrixFormatSparseCSR))
|
||||
{
|
||||
memcpy(RowLocation(), v.RowLocation(), v.RowSize());
|
||||
memcpy(ColLocation(), v.ColLocation(), v.ColSize());
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(GetBlockIds(), v.GetBlockIds(), v.GetBlockSize());
|
||||
}
|
||||
}
|
||||
if (v.m_sliceViewOffset > 0)
|
||||
{
|
||||
|
@ -384,6 +382,66 @@ CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoGatherColumnsOf(ElemType
|
|||
return *this;
|
||||
}
|
||||
|
||||
// *this[:,idx[j]] = a[:,j] * alpha + *this[:,idx[j]] * beta
|
||||
template <class ElemType>
|
||||
CPUSparseMatrix<ElemType>& CPUSparseMatrix<ElemType>::DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha)
|
||||
{
|
||||
VerifyWritable(__func__);
|
||||
|
||||
if ((a.GetFormat() != matrixFormatSparseCSC) || (GetFormat() != matrixFormatSparseCSC))
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
if (idx.GetNumRows() != 1) // index is 1-dimensional only
|
||||
InvalidArgument("DoScatterColumnsOf: Map must be a row vector.");
|
||||
|
||||
if (beta != 0)
|
||||
NOT_IMPLEMENTED;
|
||||
|
||||
if (NzCount() != 0)
|
||||
InvalidArgument("CPUSparseMatrix::DoScatterColumnsOf: The target matrix cannot have pre-existing non-zero values when being scattered into");
|
||||
|
||||
size_t numNonZeroElements = a.NzCount();
|
||||
|
||||
if (beta == 0)
|
||||
RequireSizeAndAllocate(GetNumRows(), GetNumCols(), numNonZeroElements);
|
||||
|
||||
// Setup the Secondary index
|
||||
std::vector<int> columnElementCounts(GetNumCols(), 0);
|
||||
size_t numColsToWrite = idx.GetNumCols();
|
||||
for (long j = 0; j < numColsToWrite; j++)
|
||||
{
|
||||
auto jOutF = idx(0, j); // this is the column we need to write to
|
||||
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
|
||||
continue;
|
||||
size_t jOut = (size_t)jOutF;
|
||||
columnElementCounts[jOut] = a.SecondaryIndexLocation()[j + 1] - a.SecondaryIndexLocation()[j];
|
||||
}
|
||||
|
||||
// TODO: Replace with std::exclusive_scan when we switch to C++17
|
||||
for (size_t i = 1; i <= GetNumCols(); ++i)
|
||||
SecondaryIndexLocation()[i] = SecondaryIndexLocation()[i - 1] + columnElementCounts[i - 1];
|
||||
|
||||
size_t offset = a.SecondaryIndexLocation()[0];
|
||||
// TODO: Does it make sense to parallelize this?
|
||||
for (long j = 0; j < numColsToWrite; j++)
|
||||
{
|
||||
auto jOutF = idx(0, j); // this is the column we need to write to
|
||||
if (::isnan(jOutF) || (jOutF < 0)) // negative index means gap
|
||||
continue;
|
||||
size_t jOut = (size_t)jOutF;
|
||||
|
||||
auto start = SecondaryIndexLocation()[jOut];
|
||||
auto end = SecondaryIndexLocation()[jOut + 1];
|
||||
for (auto p = start; p < end; p++, offset++)
|
||||
{
|
||||
GetUnCompIndex()[p] = a.GetUnCompIndex()[offset];
|
||||
Buffer()[p] = a.Buffer()[offset] * alpha;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
void CPUSparseMatrix<ElemType>::Print(const char* matrixName) const
|
||||
{
|
||||
|
@ -587,13 +645,7 @@ void CPUSparseMatrix<ElemType>::SetMatrixFromCSCFormat(const CPUSPARSE_INDEX_TYP
|
|||
}
|
||||
|
||||
template <class ElemType>
|
||||
ElemType* CPUSparseMatrix<ElemType>::Data() const
|
||||
{
|
||||
return Buffer() + GetCompIndex()[m_sliceViewOffset];
|
||||
}
|
||||
|
||||
template <class ElemType>
|
||||
ElemType* CPUSparseMatrix<ElemType>::Data()
|
||||
ElemType* CPUSparseMatrix<ElemType>::Data() const
|
||||
{
|
||||
return (Buffer() +
|
||||
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
|
||||
|
@ -1340,20 +1392,12 @@ ElemType CPUSparseMatrix<ElemType>::SumOfAbsElements() const
|
|||
|
||||
if (sizeof(ElemType) == sizeof(double))
|
||||
{
|
||||
#ifdef USE_ACML
|
||||
return (ElemType) dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
|
||||
#else
|
||||
return (ElemType) cblas_dasum((int) this->NzCount(), reinterpret_cast<double*>(Data()), 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma warning(suppress : 4244)
|
||||
#ifdef USE_ACML
|
||||
return sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
|
||||
#else
|
||||
return cblas_sasum((int) this->NzCount(), reinterpret_cast<float*>(Data()), 1);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1495,7 +1539,6 @@ template void CPUSparseMatrix<char>::SetValue(size_t, size_t, char);
|
|||
template void CPUSparseMatrix<char>::SetValue(CPUSparseMatrix<char> const&);
|
||||
//template void CPUSparseMatrix<char>::SetValue(GPUSparseMatrix<char> const&);
|
||||
template char* CPUSparseMatrix<char>::Data() const;
|
||||
template char* CPUSparseMatrix<char>::Data();
|
||||
template void CPUSparseMatrix<char>::Reset(void);
|
||||
template void CPUSparseMatrix<char>::Resize(const size_t, const size_t, const size_t, const bool);
|
||||
template void CPUSparseMatrix<char>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
|
||||
|
@ -1518,7 +1561,6 @@ template void CPUSparseMatrix<short>::SetValue(size_t, size_t, short);
|
|||
template void CPUSparseMatrix<short>::SetValue(CPUSparseMatrix<short> const&);
|
||||
//template void CPUSparseMatrix<short>::SetValue(GPUSparseMatrix<short> const&);
|
||||
template short* CPUSparseMatrix<short>::Data() const;
|
||||
template short* CPUSparseMatrix<short>::Data();
|
||||
template void CPUSparseMatrix<short>::Reset(void);
|
||||
template void CPUSparseMatrix<short>::Resize(const size_t, const size_t, const size_t, const bool);
|
||||
template void CPUSparseMatrix<short>::RequireSizeAndAllocate(const size_t, const size_t, const size_t, const bool, bool);
|
||||
|
|
|
@ -92,13 +92,13 @@ public:
|
|||
void MaskColumnsValue(const CPUMatrix<char>& columnsMask, ElemType val);
|
||||
|
||||
CPUSparseMatrix<ElemType>& DoGatherColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
|
||||
CPUSparseMatrix<ElemType>& DoScatterColumnsOf(ElemType beta, const CPUMatrix<ElemType>& idx, const CPUSparseMatrix<ElemType>& a, ElemType alpha);
|
||||
|
||||
size_t BufferSize() const
|
||||
{
|
||||
return GetSizeAllocated() * sizeof(ElemType);
|
||||
}
|
||||
ElemType* Data() const;
|
||||
ElemType* Data();
|
||||
inline size_t GetNumElemAllocated() const
|
||||
{
|
||||
return GetSizeAllocated();
|
||||
|
@ -262,7 +262,8 @@ public:
|
|||
|
||||
CPUSPARSE_INDEX_TYPE* MajorIndexLocation() const
|
||||
{
|
||||
return GetUnCompIndex() + GetCompIndex()[m_sliceViewOffset];
|
||||
return (GetUnCompIndex() +
|
||||
((GetFormat() == matrixFormatSparseCSC || GetFormat() == matrixFormatSparseCSR) ? GetCompIndex()[m_sliceViewOffset] : 0));
|
||||
} // this is the major index, row/col ids in CSC/CSR format
|
||||
|
||||
size_t MajorIndexCount() const
|
||||
|
|
|
@ -237,7 +237,7 @@ std::pair<size_t, size_t> TracingGPUMemoryAllocator::GetFreeAndTotalMemoryInMBs(
|
|||
// deviceId - the device on which the operation will take place
|
||||
void PrepareDevice(DEVICEID_TYPE deviceId)
|
||||
{
|
||||
static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED;
|
||||
THREAD_LOCAL static DEVICEID_TYPE currentDevice = DEVICEID_NOTYETDETERMINED;
|
||||
// and if we last set the device to be this device we are good
|
||||
if (deviceId == currentDevice)
|
||||
return;
|
||||
|
|
|
@ -227,6 +227,5 @@
|
|||
<Target Name="CheckDependencies">
|
||||
<Error Condition="'$(MathLibrary)' == 'MKL' And '$(CNTK_MKL_PATH)' == ''" Text="CNTK custom MKL location not specified, see https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
|
||||
<Error Condition="'$(MathLibrary)' == 'MKL' And !Exists('$(CNTKCustomMKLPath)')" Text="CNTK custom MKL not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#optional-mkl for instructions." />
|
||||
<Error Condition="'$(MathLibrary)' == 'ACML' And !Exists('$(ACML_PATH)')" Text="ACML not found. See https://github.com/Microsoft/CNTK/wiki/Setup-CNTK-on-Windows#acml for instructions." />
|
||||
</Target>
|
||||
</Project>
|
||||
|
|
|
@ -1081,7 +1081,7 @@ Matrix<ElemType>& Matrix<ElemType>::DoScatterColumnsOf(ElemType beta, const Matr
|
|||
DISPATCH_MATRIX_ON_FLAG(&a, this,
|
||||
{ m_CPUMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUMatrix, alpha); },
|
||||
{ m_GPUMatrix->DoScatterColumnsOf(beta, *idx.m_GPUMatrix, *a.m_GPUMatrix, alpha); },
|
||||
{ NOT_IMPLEMENTED; },
|
||||
{ m_CPUSparseMatrix->DoScatterColumnsOf(beta, *idx.m_CPUMatrix, *a.m_CPUSparseMatrix, alpha); },
|
||||
{ NOT_IMPLEMENTED; });
|
||||
|
||||
return *this;
|
||||
|
|
|
@ -377,8 +377,8 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
|
|||
|
||||
// second, remove trailing slash if there is any
|
||||
// TODO: when gcc -v is 4.9 or greater, this should be: std::regex_replace(rootpath, L"\\/+$", wstring());
|
||||
size_t stringPos = 0;
|
||||
for (stringPos = rootpath.length() - 1; stringPos >= 0; stringPos--)
|
||||
int stringPos = 0;
|
||||
for (stringPos = (int) (rootpath.length() - 1); stringPos >= 0; stringPos--)
|
||||
{
|
||||
if (rootpath[stringPos] != L'/')
|
||||
{
|
||||
|
@ -517,11 +517,11 @@ void HTKMLFReader<ElemType>::PrepareForTrainingOrTesting(const ConfigRecordType&
|
|||
m_lattices->setverbosity(m_verbosity);
|
||||
|
||||
// now get the frame source. This has better randomization and doesn't create temp files
|
||||
bool minimizeReaderMemoryFootprint = readerConfig(L"minimizeReaderMemoryFootprint", true);
|
||||
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(infilesmulti, labelsmulti, m_featDims, m_labelDims,
|
||||
bool useMersenneTwisterRand = readerConfig(L"useMersenneTwisterRand", false);
|
||||
m_frameSource.reset(new msra::dbn::minibatchutterancesourcemulti(useMersenneTwisterRand, infilesmulti, labelsmulti, m_featDims, m_labelDims,
|
||||
numContextLeft, numContextRight, randomize,
|
||||
*m_lattices, m_latticeMap, m_frameMode,
|
||||
minimizeReaderMemoryFootprint, m_expandToUtt));
|
||||
m_expandToUtt));
|
||||
m_frameSource->setverbosity(m_verbosity);
|
||||
}
|
||||
else if (EqualCI(readMethod, L"rollingWindow"))
|
||||
|
|
|
@ -12,7 +12,8 @@
|
|||
#include "latticearchive.h" // for reading HTK phoneme lattices (MMI training)
|
||||
#include "minibatchsourcehelpers.h"
|
||||
#include "minibatchiterator.h"
|
||||
#include "unordered_set"
|
||||
#include <unordered_set>
|
||||
#include <random>
|
||||
|
||||
namespace msra { namespace dbn {
|
||||
|
||||
|
@ -38,6 +39,10 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
// const std::vector<unique_ptr<latticesource>> &lattices;
|
||||
const latticesource &lattices;
|
||||
|
||||
// Flag indicating whether to use Mersenne Twister random generator.
|
||||
bool m_useMersenneTwister;
|
||||
std::mt19937_64 m_rng;
|
||||
|
||||
// std::vector<latticesource> lattices;
|
||||
// word-level transcripts (for MMI mode when adding best path to lattices)
|
||||
const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts; // (used for getting word-level transcripts)
|
||||
|
@ -413,6 +418,7 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
// When true we use a rolling window of randomized framerefs to minimize memory
|
||||
// footprint, instead of using a large vector listing all frames in the training corpus
|
||||
// Functionally, the 2 methods are identical.
|
||||
// When it is true, we also use Mersenne Twister random generator for randomization.
|
||||
const bool m_minimizeMemoryFootprint;
|
||||
|
||||
// [globalt-sweepts] -> (chunk, utt, frame) lookup table for randomized frames --this can be REALLY big!
|
||||
|
@ -429,6 +435,10 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
size_t m_currentRangeEndChunkIdx;
|
||||
size_t m_nextFramePosNotYetRandomized;
|
||||
|
||||
// If m_minimizeMemoryFootprint is true, Mersenne Twister is used for randomization
|
||||
// because rand has problems in distributed case.
|
||||
std::mt19937_64 m_rng;
|
||||
|
||||
public:
|
||||
framerandomizer(const std::vector<std::vector<chunk>>& randomizedChunks, bool minimizeMemoryFootprint)
|
||||
: m_randomizedChunks(randomizedChunks), m_minimizeMemoryFootprint(minimizeMemoryFootprint), m_currentRangeBeginChunkIdx(0), m_currentRangeEndChunkIdx(0), m_nextFramePosNotYetRandomized(0)
|
||||
|
@ -496,7 +506,9 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
|
||||
for (;;) // (randomization retry loop)
|
||||
{
|
||||
size_t tswap = Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range
|
||||
size_t tswap = m_minimizeMemoryFootprint ?
|
||||
Microsoft::MSR::CNTK::RandMT(postbegin, postend, m_rng) :
|
||||
Microsoft::MSR::CNTK::rand(postbegin, postend); // random frame position within allowed range
|
||||
// We want to swap 't' to 'tswap' and 'tswap' to 't'.
|
||||
// - Both may have been swapped before.
|
||||
// - Both must stay within the randomization window of their respective position.
|
||||
|
@ -542,11 +554,11 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
|
||||
void reset(unsigned int randSeed)
|
||||
{
|
||||
srand(randSeed);
|
||||
size_t sweepts = m_randomizedChunks[0][0].globalts;
|
||||
size_t totalFrames = m_randomizedChunks[0].back().globalte() - sweepts;
|
||||
if (m_minimizeMemoryFootprint)
|
||||
{
|
||||
m_rng.seed(randSeed);
|
||||
m_randomizedframerefsWindow.clear();
|
||||
m_currentRangeBeginChunkIdx = m_randomizedChunks[0][0].windowbegin;
|
||||
m_currentRangeEndChunkIdx = m_currentRangeBeginChunkIdx;
|
||||
|
@ -554,6 +566,7 @@ class minibatchutterancesourcemulti : public minibatchsource
|
|||
}
|
||||
else
|
||||
{
|
||||
srand(randSeed + 1);
|
||||
if (m_randomizedframerefs.size() != totalFrames)
|
||||
m_randomizedframerefs.resize(totalFrames);
|
||||
|
||||
|
@ -866,10 +879,11 @@ public:
|
|||
// constructor
|
||||
// Pass empty labels to denote unsupervised training (so getbatch() will not return uids).
|
||||
// This mode requires utterances with time stamps.
|
||||
minibatchutterancesourcemulti(const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels,
|
||||
minibatchutterancesourcemulti(bool useMersenneTwister, const std::vector<std::vector<wstring>> &infiles, const std::vector<map<wstring, std::vector<msra::asr::htkmlfentry>>> &labels,
|
||||
std::vector<size_t> vdim, std::vector<size_t> udim, std::vector<size_t> leftcontext, std::vector<size_t> rightcontext, size_t randomizationrange,
|
||||
const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, bool minimizeMemoryFootprint, std::vector<bool> expandToUtt)
|
||||
: vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, minimizeMemoryFootprint), expandToUtt(expandToUtt)
|
||||
const latticesource &lattices, const map<wstring, msra::lattices::lattice::htkmlfwordsequence> &allwordtranscripts, const bool framemode, std::vector<bool> expandToUtt)
|
||||
: vdim(vdim), leftcontext(leftcontext), rightcontext(rightcontext), sampperiod(0), featdim(0), randomizationrange(randomizationrange), currentsweep(SIZE_MAX), lattices(lattices), allwordtranscripts(allwordtranscripts), framemode(framemode), chunksinram(0), timegetbatch(0), verbosity(2), m_generatePhoneBoundaries(!lattices.empty()), m_frameRandomizer(randomizedchunks, useMersenneTwister), expandToUtt(expandToUtt),
|
||||
m_useMersenneTwister(useMersenneTwister)
|
||||
// [v-hansu] change framemode (lattices.empty()) into framemode (false) to run utterance mode without lattice
|
||||
// you also need to change another line, search : [v-hansu] comment out to run utterance mode without lattice
|
||||
{
|
||||
|
@ -1251,8 +1265,16 @@ private:
|
|||
randomizedchunkrefs[i].push_back(allchunks[i].begin() + j);
|
||||
assert(randomizedchunkrefs[i].size() == allchunks[i].size());
|
||||
|
||||
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams
|
||||
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep)
|
||||
if (m_useMersenneTwister)
|
||||
{
|
||||
m_rng.seed((unsigned long)sweep);
|
||||
Microsoft::MSR::CNTK::RandomShuffleMT(randomizedchunkrefs[i], m_rng); // bring into random order (with random seed depending on sweep)
|
||||
}
|
||||
else
|
||||
{
|
||||
// note that sincew randomshuffle() uses sweep as seed, this will keep the randomization common across all feature streams
|
||||
randomshuffle(randomizedchunkrefs[i], sweep); // bring into random order (with random seed depending on sweep)
|
||||
}
|
||||
}
|
||||
|
||||
// place them onto the global timeline -> randomizedchunks[]
|
||||
|
@ -1348,7 +1370,7 @@ private:
|
|||
// check we got those setup right
|
||||
|
||||
// we now randomly shuffle randomizedutterancerefs[pos], while considering the constraints of what chunk range needs to be in memory
|
||||
srand((unsigned int) sweep + 1);
|
||||
m_useMersenneTwister ? m_rng.seed((unsigned long)sweep) : srand((unsigned int)sweep + 1);
|
||||
for (size_t i = 0; i < randomizedutterancerefs.size(); i++)
|
||||
{
|
||||
// get valid randomization range, expressed in chunks
|
||||
|
@ -1364,7 +1386,9 @@ private:
|
|||
for (;;)
|
||||
{
|
||||
// pick a random location
|
||||
const size_t j = Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window
|
||||
const size_t j = m_useMersenneTwister ?
|
||||
Microsoft::MSR::CNTK::RandMT(posbegin, posend, m_rng) :
|
||||
Microsoft::MSR::CNTK::rand(posbegin, posend); // a random number within the window
|
||||
if (i == j)
|
||||
break; // the random gods say "this one points to its original position"... nothing wrong about that, but better not try to swap
|
||||
|
||||
|
@ -1416,7 +1440,7 @@ private:
|
|||
}
|
||||
else // frame mode
|
||||
{
|
||||
m_frameRandomizer.reset((unsigned int)sweep + 1);
|
||||
m_frameRandomizer.reset((unsigned int)sweep);
|
||||
}
|
||||
|
||||
return sweep;
|
||||
|
|
|
@ -21,7 +21,7 @@ public:
|
|||
ByteReader() = default;
|
||||
virtual ~ByteReader() = default;
|
||||
|
||||
virtual void Register(size_t seqId, const std::string& path) = 0;
|
||||
virtual void Register(const std::map<std::string, size_t>& sequences) = 0;
|
||||
virtual cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) = 0;
|
||||
|
||||
DISABLE_COPY_AND_MOVE(ByteReader);
|
||||
|
@ -30,7 +30,7 @@ public:
|
|||
class FileByteReader : public ByteReader
|
||||
{
|
||||
public:
|
||||
void Register(size_t, const std::string&) override {}
|
||||
void Register(const std::map<std::string, size_t>&) override {}
|
||||
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
|
||||
};
|
||||
|
||||
|
@ -40,7 +40,7 @@ class ZipByteReader : public ByteReader
|
|||
public:
|
||||
ZipByteReader(const std::string& zipPath);
|
||||
|
||||
void Register(size_t seqId, const std::string& path) override;
|
||||
void Register(const std::map<std::string, size_t>& sequences) override;
|
||||
cv::Mat Read(size_t seqId, const std::string& path, bool grayscale) override;
|
||||
|
||||
private:
|
||||
|
|
|
@ -13,6 +13,7 @@
|
|||
#include "ImageConfigHelper.h"
|
||||
#include "StringUtil.h"
|
||||
#include "ConfigUtil.h"
|
||||
#include "TimerUtility.h"
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -135,6 +136,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
|
|||
}
|
||||
|
||||
string precision = (ConfigValue)config("precision", "float");
|
||||
m_verbosity = config(L"verbosity", 0);
|
||||
|
||||
// Feature stream.
|
||||
ConfigParameters featureSection = inputs(featureNames[0]);
|
||||
|
@ -144,6 +146,7 @@ ImageDataDeserializer::ImageDataDeserializer(CorpusDescriptorPtr corpus, const C
|
|||
features->m_storageType = StorageType::dense;
|
||||
features->m_elementType = AreEqualIgnoreCase(precision, "float") ? ElementType::tfloat : ElementType::tdouble;
|
||||
m_streams.push_back(features);
|
||||
m_featureElementType = features->m_elementType;
|
||||
|
||||
// Label stream.
|
||||
ConfigParameters label = inputs(labelNames[0]);
|
||||
|
@ -179,6 +182,8 @@ ImageDataDeserializer::ImageDataDeserializer(const ConfigParameters& config)
|
|||
const auto& label = m_streams[configHelper.GetLabelStreamId()];
|
||||
const auto& feature = m_streams[configHelper.GetFeatureStreamId()];
|
||||
|
||||
m_verbosity = config(L"verbosity", 0);
|
||||
|
||||
// Expect data in HWC.
|
||||
ImageDimensions dimensions(*feature->m_sampleLayout, configHelper.GetDataFormat());
|
||||
feature->m_sampleLayout = std::make_shared<TensorShape>(dimensions.AsTensorShape(HWC));
|
||||
|
@ -240,9 +245,13 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
|
|||
size_t curId = 0;
|
||||
std::string line;
|
||||
PathReaderMap knownReaders;
|
||||
ReaderSequenceMap readerSequences;
|
||||
ImageSequenceDescription description;
|
||||
description.m_numberOfSamples = 1;
|
||||
|
||||
Timer timer;
|
||||
timer.Start();
|
||||
|
||||
auto& stringRegistry = corpus->GetStringRegistry();
|
||||
for (size_t lineIndex = 0; std::getline(mapFile, line); ++lineIndex)
|
||||
{
|
||||
|
@ -296,9 +305,20 @@ void ImageDataDeserializer::CreateSequenceDescriptions(CorpusDescriptorPtr corpu
|
|||
|
||||
m_keyToSequence[description.m_key.m_sequence] = m_imageSequences.size();
|
||||
m_imageSequences.push_back(description);
|
||||
RegisterByteReader(description.m_id, description.m_path, knownReaders);
|
||||
RegisterByteReader(description.m_id, description.m_path, knownReaders, readerSequences);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& reader : knownReaders)
|
||||
{
|
||||
reader.second->Register(readerSequences[reader.first]);
|
||||
}
|
||||
|
||||
timer.Stop();
|
||||
if (m_verbosity > 1)
|
||||
{
|
||||
fprintf(stderr, "ImageDeserializer: Read information about %d images in %.6g seconds\n", (int)m_imageSequences.size(), timer.ElapsedSeconds());
|
||||
}
|
||||
}
|
||||
|
||||
ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
|
||||
|
@ -307,7 +327,7 @@ ChunkPtr ImageDataDeserializer::GetChunk(ChunkIdType chunkId)
|
|||
return std::make_shared<ImageChunk>(sequenceDescription, *this);
|
||||
}
|
||||
|
||||
void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders)
|
||||
void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences)
|
||||
{
|
||||
assert(!path.empty());
|
||||
|
||||
|
@ -330,16 +350,19 @@ void ImageDataDeserializer::RegisterByteReader(size_t seqId, const std::string&
|
|||
{
|
||||
reader = std::make_shared<ZipByteReader>(containerPath);
|
||||
knownReaders[containerPath] = reader;
|
||||
readerSequences[containerPath] = std::map<std::string, size_t>();
|
||||
}
|
||||
else
|
||||
{
|
||||
reader = (*r).second;
|
||||
}
|
||||
reader->Register(seqId, itemPath);
|
||||
|
||||
readerSequences[containerPath][itemPath] = seqId;
|
||||
m_readers[seqId] = reader;
|
||||
#else
|
||||
UNUSED(seqId);
|
||||
UNUSED(knownReaders);
|
||||
UNUSED(readerSequences);
|
||||
RuntimeError("The code is built without zip container support. Only plain image files are supported.");
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -72,7 +72,8 @@ private:
|
|||
|
||||
// Not using nocase_compare here as it's not correct on Linux.
|
||||
using PathReaderMap = std::unordered_map<std::string, std::shared_ptr<ByteReader>>;
|
||||
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders);
|
||||
using ReaderSequenceMap = std::map<std::string, std::map<std::string, size_t>>;
|
||||
void RegisterByteReader(size_t seqId, const std::string& path, PathReaderMap& knownReaders, ReaderSequenceMap& readerSequences);
|
||||
cv::Mat ReadImage(size_t seqId, const std::string& path, bool grayscale);
|
||||
|
||||
// REVIEW alexeyk: can potentially use vector instead of map. Need to handle default reader and resizing though.
|
||||
|
@ -80,6 +81,7 @@ private:
|
|||
SeqReaderMap m_readers;
|
||||
|
||||
FileByteReader m_defaultReader;
|
||||
int m_verbosity;
|
||||
};
|
||||
|
||||
}}}
|
||||
|
|
|
@ -44,16 +44,46 @@ ZipByteReader::ZipPtr ZipByteReader::OpenZip()
|
|||
});
|
||||
}
|
||||
|
||||
void ZipByteReader::Register(size_t seqId, const std::string& path)
|
||||
void ZipByteReader::Register(const std::map<std::string, size_t>& sequences)
|
||||
{
|
||||
auto zipFile = m_zips.pop_or_create([this]() { return OpenZip(); });
|
||||
zip_stat_t stat;
|
||||
zip_stat_init(&stat);
|
||||
int err = zip_stat(zipFile.get(), path.c_str(), 0, &stat);
|
||||
if (ZIP_ER_OK != err)
|
||||
RuntimeError("Failed to get file info of %s, zip library error: %s", path.c_str(), GetZipError(err).c_str());
|
||||
m_seqIdToIndex[seqId] = std::make_pair(stat.index, stat.size);
|
||||
|
||||
size_t numberOfEntries = 0;
|
||||
size_t numEntries = zip_get_num_entries(zipFile.get(), 0);
|
||||
for (size_t i = 0; i < numEntries; ++i) {
|
||||
int err = zip_stat_index(zipFile.get(), i, 0, &stat);
|
||||
if (ZIP_ER_OK != err)
|
||||
RuntimeError("Failed to get file info for index %d, zip library error: %s", (int)i, GetZipError(err).c_str());
|
||||
|
||||
auto sequenceId = sequences.find(std::string(stat.name));
|
||||
if (sequenceId == sequences.end())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_seqIdToIndex[sequenceId->second] = std::make_pair(stat.index, stat.size);
|
||||
numberOfEntries++;
|
||||
}
|
||||
}
|
||||
m_zips.push(std::move(zipFile));
|
||||
|
||||
if (numberOfEntries != sequences.size())
|
||||
{
|
||||
// Not all sequences have been found. Let's print them out and throw.
|
||||
for (const auto& s : sequences)
|
||||
{
|
||||
auto index = m_seqIdToIndex.find(s.second);
|
||||
if (index == m_seqIdToIndex.end())
|
||||
{
|
||||
fprintf(stderr, "Sequence %s is not found in container %s.\n", s.first.c_str(), m_zipPath.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
RuntimeError("Cannot retrieve image data for some sequences. For more detail, please see the log file.");
|
||||
}
|
||||
}
|
||||
|
||||
cv::Mat ZipByteReader::Read(size_t seqId, const std::string& path, bool grayscale)
|
||||
|
|
|
@ -22,17 +22,10 @@
|
|||
== Preeliminaries ==
|
||||
|
||||
To build the cpu version, you have to install intel MKL blas library
|
||||
or ACML library first. Note that ACML is free, whereas MKL may not be.
|
||||
|
||||
for MKL:
|
||||
1. Download from https://software.intel.com/en-us/intel-mkl
|
||||
|
||||
for ACML:
|
||||
1. Download from
|
||||
http://developer.amd.com/tools-and-sdks/archive/amd-core-math-library-acml/acml-downloads-resources/
|
||||
We have seen some problems with some versions of the library on Intel
|
||||
processors, but have had success with acml-5-3-1-ifort-64bit.tgz
|
||||
|
||||
for Kaldi:
|
||||
1. In kaldi-trunk/tools/Makefile, uncomment # OPENFST_VERSION = 1.4.1, and
|
||||
re-install OpenFst using the makefile.
|
||||
|
@ -54,8 +47,7 @@ build in the directory "build" type
|
|||
(For an in source build, just run configure in the $CNTK directory).
|
||||
|
||||
You will see various options for configure, as well as their default
|
||||
values. CNTK needs a CPU math directory, either acml or mkl. If you
|
||||
do not specify one and both are available, acml will be used. For GPU
|
||||
values. CNTK needs a CPU math library (mkl). For GPU
|
||||
use, a cuda and gdk directory are also required. Similary, to build
|
||||
the kaldi plugin a kaldi directory is required. You may also specify
|
||||
whether you want a debug or release build, as well as add additional
|
||||
|
|
|
@ -171,6 +171,12 @@ public:
|
|||
// setup all the state variables and state tables for state machine
|
||||
void Init();
|
||||
|
||||
// convenience function for setting the flags
|
||||
inline unsigned int SetSequenceFlags()
|
||||
{
|
||||
return (m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak;
|
||||
}
|
||||
|
||||
// Parser destructor
|
||||
~SequenceParser();
|
||||
|
||||
|
@ -334,8 +340,7 @@ public:
|
|||
case EndOfLine:
|
||||
if (seqPos)
|
||||
{
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
(m_beginSequence ? seqFlagStartLabel : 0) | (m_endSequence ? seqFlagStopLabel : 0) | seqFlagLineBreak);
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
|
||||
// add a sequence element to the list
|
||||
seqPos->push_back(sequencePos);
|
||||
sequencePositionLast = sequencePos;
|
||||
|
@ -429,8 +434,7 @@ public:
|
|||
// this could probably be fixed by taking another pass through the loop above, but this is easier
|
||||
if (seqPos)
|
||||
{
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
|
||||
// add the final sequence element if needed
|
||||
if (!(sequencePos.labelPos == sequencePositionLast.labelPos && sequencePos.numberPos == sequencePositionLast.numberPos))
|
||||
{
|
||||
|
@ -510,6 +514,7 @@ public:
|
|||
using SequenceParser<NumType, LabelType>::m_totalNumbersConverted;
|
||||
using SequenceParser<NumType, LabelType>::m_dimLabelsOut;
|
||||
using SequenceParser<NumType, LabelType>::m_bufferStart;
|
||||
using SequenceParser<NumType, LabelType>::SetSequenceFlags;
|
||||
LMSequenceParser()
|
||||
{
|
||||
mFile = nullptr;
|
||||
|
@ -594,8 +599,7 @@ public:
|
|||
labels->push_back(std::move(vstr[i])); // TODO: is this an entire sequence, or multiple columns describing a single token?
|
||||
|
||||
// add a sequence element to the list
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(),
|
||||
m_beginSequence ? seqFlagStartLabel : 0 | m_endSequence ? seqFlagStopLabel : 0 | seqFlagLineBreak);
|
||||
SequencePosition sequencePos(numbers->size(), labels->size(), SetSequenceFlags());
|
||||
seqPos->push_back(sequencePos);
|
||||
|
||||
lineCount++;
|
||||
|
|
|
@ -80,8 +80,8 @@ void BlockRandomizer::StartEpoch(const EpochConfiguration& config)
|
|||
|
||||
#ifdef _DEBUG
|
||||
size_t epochStartFrame = config.m_epochIndex * m_epochSize;
|
||||
fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": frames [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), data subset %" PRIu64 " of %" PRIu64 "\n",
|
||||
config.m_epochIndex,
|
||||
fprintf(stderr, "BlockRandomizer::StartEpoch: epoch %" PRIu64 ": samples [%" PRIu64 "..%" PRIu64 "] (first sequence at sample %" PRIu64 "), worker rank %" PRIu64 ", total workers %" PRIu64 "\n",
|
||||
config.m_epochIndex + 1,
|
||||
epochStartFrame,
|
||||
epochStartFrame + m_epochSize,
|
||||
m_globalSamplePosition,
|
||||
|
@ -107,7 +107,7 @@ void BlockRandomizer::PrepareNewSweepIfNeeded(size_t samplePosition)
|
|||
m_chunkRandomizer->Randomize((unsigned int)m_sweep);
|
||||
|
||||
// Resetting sequence randomizer.
|
||||
m_sequenceRandomizer->Reset(m_sweep + 1);
|
||||
m_sequenceRandomizer->Reset(m_sweep);
|
||||
m_lastSeenChunkId = CHUNKID_MAX;
|
||||
}
|
||||
}
|
||||
|
@ -138,8 +138,8 @@ Sequences BlockRandomizer::GetNextSequences(size_t sampleCount)
|
|||
|
||||
if (m_verbosity >= Debug)
|
||||
fprintf(stderr, "BlockRandomizer::GetNextSequences(): getting %" PRIu64 " out of %" PRIu64 " sequences for %" PRIu64 " requested samples in sweep %" PRIu64 "\n",
|
||||
sequences.size(),
|
||||
decimated.size(),
|
||||
sequences.size(),
|
||||
sampleCount,
|
||||
m_sweep);
|
||||
|
||||
|
|
|
@ -10,25 +10,6 @@
|
|||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
// NOTE: This is an old code, used for legacy randomization to make sure we preserve the same behavior for the tests.
|
||||
// TODO: Deprecate when the new randomizer is in place.
|
||||
template <typename TVector>
|
||||
void RandomShuffle(TVector& v, size_t randomSeed)
|
||||
{
|
||||
if (v.size() > RAND_MAX * static_cast<size_t>(RAND_MAX))
|
||||
{
|
||||
RuntimeError("RandomShuffle: too large set: need to change to different random generator!");
|
||||
}
|
||||
|
||||
srand(static_cast<unsigned int>(randomSeed));
|
||||
foreach_index(currentLocation, v)
|
||||
{
|
||||
// Pick a random location a location and swap with current
|
||||
const size_t randomLocation = rand(0, v.size());
|
||||
std::swap(v[currentLocation], v[randomLocation]);
|
||||
}
|
||||
}
|
||||
|
||||
ChunkRandomizer::ChunkRandomizer(IDataDeserializerPtr deserializer, size_t randomizationRangeInSamples, bool legacy) :
|
||||
m_deserializer(deserializer), m_legacy(legacy), m_randomizationRangeInSamples(randomizationRangeInSamples)
|
||||
{
|
||||
|
@ -52,15 +33,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
randomizedChunkIndices.push_back(i);
|
||||
}
|
||||
|
||||
if (m_legacy)
|
||||
{
|
||||
RandomShuffle(randomizedChunkIndices, seed);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::mt19937 m_rng(static_cast<int>(seed));
|
||||
std::shuffle(randomizedChunkIndices.begin(), randomizedChunkIndices.end(), m_rng);
|
||||
}
|
||||
m_rng.seed(seed);
|
||||
RandomShuffleMT(randomizedChunkIndices, m_rng);
|
||||
|
||||
// Place randomized chunks on the timeline
|
||||
m_randomizedChunks.clear();
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
|
||||
#include <vector>
|
||||
#include "DataDeserializer.h"
|
||||
#include <random>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -68,6 +69,8 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
bool m_legacy;
|
||||
// Randomization range in samples.
|
||||
size_t m_randomizationRangeInSamples;
|
||||
|
||||
std::mt19937_64 m_rng;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<ChunkRandomizer> ChunkRandomizerPtr;
|
||||
|
|
|
@ -45,7 +45,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
// Resets the current sweep according to the randomization seed provided.
|
||||
void SequenceRandomizer::Reset(size_t randSeed)
|
||||
{
|
||||
srand((unsigned int)randSeed);
|
||||
m_rng.seed((unsigned long)randSeed);
|
||||
|
||||
m_sequenceWindow.clear();
|
||||
m_chunkWindow.clear();
|
||||
|
@ -197,7 +197,7 @@ namespace Microsoft { namespace MSR { namespace CNTK {
|
|||
for (;;)
|
||||
{
|
||||
// Pick a sequence position from [posBegin, posEnd)
|
||||
const size_t j = rand(posBegin, posEnd);
|
||||
const size_t j = RandMT(posBegin, posEnd, m_rng);
|
||||
|
||||
// Pick up j sequence.
|
||||
ChunkIdType jChunkIndex = GetChunkIndexForSequencePosition(j);
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
#include "DataDeserializer.h"
|
||||
#include "ChunkRandomizer.h"
|
||||
#include <deque>
|
||||
#include <random>
|
||||
|
||||
namespace Microsoft { namespace MSR { namespace CNTK {
|
||||
|
||||
|
@ -164,6 +165,8 @@ private:
|
|||
|
||||
// General configuration
|
||||
int m_verbosity;
|
||||
|
||||
std::mt19937_64 m_rng;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<SequenceRandomizer> SequenceRandomizerPtr;
|
||||
|
|
|
@ -40,30 +40,10 @@ template SGD<double>::SGD(const ScriptableObjects::IConfigRecord&);
|
|||
// -----------------------------------------------------------------------
|
||||
|
||||
template <class ElemType>
|
||||
void SGD<ElemType>::Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId,
|
||||
void SGD<ElemType>::Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
|
||||
IDataReader* trainSetDataReader,
|
||||
IDataReader* validationSetDataReader,
|
||||
const bool makeMode)
|
||||
IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint)
|
||||
{
|
||||
// determine which epoch to start with, including recovering a checkpoint if any and 'makeMode' enabled
|
||||
int startEpoch = DetermineStartEpoch(makeMode);
|
||||
if (startEpoch == m_maxEpochs)
|
||||
{
|
||||
LOGPRINTF(stderr, "No further training is necessary.\n");
|
||||
return;
|
||||
}
|
||||
|
||||
wstring modelFileName = GetModelNameForEpoch(int(startEpoch) - 1);
|
||||
bool loadNetworkFromCheckpoint = startEpoch >= 0;
|
||||
fprintf(stderr, "\n");
|
||||
if (loadNetworkFromCheckpoint)
|
||||
LOGPRINTF(stderr, "Starting from checkpoint. Loading network from '%ls'.\n", modelFileName.c_str());
|
||||
else
|
||||
LOGPRINTF(stderr, "Creating virgin network.\n");
|
||||
|
||||
// create or load from checkpoint
|
||||
shared_ptr<ComputationNetwork> net = !loadNetworkFromCheckpoint ? createNetworkFn(deviceId) : ComputationNetwork::CreateFromFile<ElemType>(deviceId, modelFileName);
|
||||
|
||||
// log the device we are computing on
|
||||
LOGPRINTF(stderr, "%s model with %d nodes", loadNetworkFromCheckpoint ? "Loaded" : "Created", (int)net->GetTotalNumberOfNodes());
|
||||
if (net->GetDeviceId() < 0)
|
||||
|
|
|
@ -110,6 +110,8 @@ struct SGDParams : public ScriptableObjects::Object
|
|||
|
||||
// SGDParams(SGDParams&&) = default; // (does not compile in VS 2013; not critical)
|
||||
|
||||
size_t GetMaxEpochs() { return m_maxEpochs; }
|
||||
|
||||
protected:
|
||||
// learning rate per sample provided outside
|
||||
floatargvector m_learningRatesParam;
|
||||
|
@ -342,10 +344,9 @@ public:
|
|||
m_parallelizationMethod = ParallelizationMethod::none;
|
||||
}
|
||||
|
||||
void Train(function<ComputationNetworkPtr(DEVICEID_TYPE)> createNetworkFn, DEVICEID_TYPE deviceId,
|
||||
void Train(shared_ptr<ComputationNetwork> net, DEVICEID_TYPE deviceId,
|
||||
IDataReader* trainSetDataReader,
|
||||
IDataReader* validationSetDataReader,
|
||||
const bool makeMode = true);
|
||||
IDataReader* validationSetDataReader, int startEpoch, bool loadNetworkFromCheckpoint);
|
||||
void Adapt(wstring origModelFileName, wstring refNodeName,
|
||||
IDataReader* trainSetDataReader,
|
||||
IDataReader* validationSetDataReader,
|
||||
|
@ -483,6 +484,10 @@ public:
|
|||
const double L1RegWeight,
|
||||
const bool needAveMultiplier,
|
||||
const bool useNesterovMomentum);
|
||||
// return -1 if nothing exists
|
||||
int DetermineStartEpoch(const bool makeMode);
|
||||
|
||||
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
|
||||
|
||||
protected:
|
||||
// UpdateWeights - update the weights in
|
||||
|
@ -517,10 +522,6 @@ protected:
|
|||
/*out*/ size_t& minibatchSize);
|
||||
|
||||
wstring GetCheckPointFileNameForEpoch(const int epoch);
|
||||
wstring GetModelNameForEpoch(const int epoch, bool bLastModel = false);
|
||||
|
||||
// return -1 if nothing exists
|
||||
int DetermineStartEpoch(const bool makeMode);
|
||||
|
||||
GradientsUpdateType GradUpdateType() const
|
||||
{
|
||||
|
|
|
@ -180,7 +180,7 @@ public:
|
|||
m_gradHeader.reset(DistGradHeader::Create(evalNodes.size()), [](DistGradHeader* ptr) {
|
||||
DistGradHeader::Destroy(ptr);
|
||||
});
|
||||
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false, m_traceLevel);
|
||||
m_distGradAgg = make_shared<SimpleDistGradAggregator<ElemType>>(m_mpi, false /*useAsyncAggregation*/, 0 /*syncStatsTrace*/);
|
||||
}
|
||||
|
||||
m_gradHeader->numEvalNode = evalNodes.size();
|
||||
|
|
|
@ -21,6 +21,7 @@ mkdir $DataDir
|
|||
cp -R $DataSourceDir/MNIST/v0/Train-28x28_cntk_text.txt $DataDir || exit $?
|
||||
cp -R $DataSourceDir/CIFAR/v0/cifar-10-batches-py $DataDir || exit $?
|
||||
cp -R $TEST_DIR/../../../../Examples/Other/Simple2d/Data/SimpleDataTrain_cntk_text.txt $DataDir || exit $?
|
||||
cp -R $TEST_DIR/../../Text/SequenceClassification/Data/Train.ctf $DataDir || exit $?
|
||||
|
||||
pushd $DataDir
|
||||
|
||||
|
|
|
@ -272,8 +272,8 @@ Post-processing network...
|
|||
|
||||
4 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
errTop1 = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
errTop1 = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 17 nodes to process in pass 1.
|
||||
|
@ -292,9 +292,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
|
||||
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
|
||||
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
|
||||
|
||||
Validating network. 9 nodes to process in pass 2.
|
||||
|
||||
|
@ -314,8 +314,8 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 15:10:02: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:10:02: errTop1 = ClassificationError
|
||||
05/13/2016 15:10:02: err = ClassificationError
|
||||
05/13/2016 15:10:02: errTop1 = ErrorPrediction
|
||||
05/13/2016 15:10:02: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -390,8 +390,8 @@ Post-processing network...
|
|||
|
||||
4 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
errTop1 = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
errTop1 = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 17 nodes to process in pass 1.
|
||||
|
@ -410,9 +410,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
|
||||
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
|
||||
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
|
||||
|
||||
Validating network. 9 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -270,8 +270,8 @@ Post-processing network...
|
|||
|
||||
4 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
errTop1 = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
errTop1 = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 17 nodes to process in pass 1.
|
||||
|
@ -290,9 +290,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
|
||||
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
|
||||
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *], [10 x 1 x *], [1 x 1] -> [1]
|
||||
|
||||
Validating network. 9 nodes to process in pass 2.
|
||||
|
||||
|
@ -312,8 +312,8 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:15:53: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:15:53: errTop1 = ClassificationError
|
||||
05/13/2016 08:15:53: err = ClassificationError
|
||||
05/13/2016 08:15:53: errTop1 = ErrorPrediction
|
||||
05/13/2016 08:15:53: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -388,8 +388,8 @@ Post-processing network...
|
|||
|
||||
4 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
errTop1 = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
errTop1 = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 17 nodes to process in pass 1.
|
||||
|
@ -408,9 +408,9 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 200], [200 x 1 x *1] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> unnamed81 = LearnableParameter() : -> [1 x 1]
|
||||
Validating --> errTop1 = ClassificationError (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
|
||||
Validating --> errTop1 = ErrorPrediction (labels, ol.z, unnamed81) : [10 x *1], [10 x 1 x *1], [1 x 1] -> [1]
|
||||
|
||||
Validating network. 9 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -284,7 +284,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 27 nodes to process in pass 1.
|
||||
|
@ -315,7 +315,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
@ -343,7 +343,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 15:10:11: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:10:11: err = ClassificationError
|
||||
05/13/2016 15:10:11: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -429,7 +429,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 27 nodes to process in pass 1.
|
||||
|
@ -460,7 +460,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -282,7 +282,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 27 nodes to process in pass 1.
|
||||
|
@ -313,7 +313,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
@ -341,7 +341,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:16:18: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:16:18: err = ClassificationError
|
||||
05/13/2016 08:16:18: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -427,7 +427,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 27 nodes to process in pass 1.
|
||||
|
@ -458,7 +458,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x 1 x *1] -> [10 x 1
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x 1 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -287,7 +287,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 36 nodes to process in pass 1.
|
||||
|
@ -329,7 +329,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
@ -363,7 +363,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 15:10:29: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:10:29: err = ClassificationError
|
||||
05/13/2016 15:10:29: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -462,7 +462,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 36 nodes to process in pass 1.
|
||||
|
@ -502,7 +502,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -285,7 +285,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 36 nodes to process in pass 1.
|
||||
|
@ -327,7 +327,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *] -> [10 x *]
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *], [10 x 1] -> [10 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
@ -361,7 +361,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:16:58: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:16:58: err = ClassificationError
|
||||
05/13/2016 08:16:58: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -460,7 +460,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ClassificationError()
|
||||
err = ErrorPrediction()
|
||||
ol.z = Plus()
|
||||
|
||||
Validating network. 36 nodes to process in pass 1.
|
||||
|
@ -500,7 +500,7 @@ Validating --> ol.t = Times (ol.W, h1.y) : [10 x 128], [128 x *1] -> [10 x *1]
|
|||
Validating --> ol.b = LearnableParameter() : -> [10 x 1]
|
||||
Validating --> ol.z = Plus (ol.t, ol.b) : [10 x *1], [10 x 1] -> [10 x 1 x *1]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ClassificationError (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, ol.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 16 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -1,49 +1,62 @@
|
|||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
|
||||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 264172964 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: May 13 2016 14:50:25
|
||||
Last modified date: Thu May 12 14:00:37 2016
|
||||
Built time: Aug 16 2016 09:41:56
|
||||
Last modified date: Fri Aug 12 07:32:43 2016
|
||||
Build type: release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: no
|
||||
Math lib: acml
|
||||
Math lib: mkl
|
||||
CUDA_PATH: /usr/local/cuda-7.5
|
||||
CUB_PATH: /usr/local/cub-1.4.1
|
||||
CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
|
||||
Built by philly on d8dc82703b0f
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by philly on f67b30a647de
|
||||
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
05/13/2016 15:10:47: Redirecting stderr to file -_Train_Test.log
|
||||
05/13/2016 15:10:47: -------------------------------------------------------------------
|
||||
05/13/2016 15:10:47: Build info:
|
||||
Changed current directory to /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
08/16/2016 10:50:36: Redirecting stderr to file -_Train_Test.log
|
||||
08/16/2016 10:50:36: -------------------------------------------------------------------
|
||||
08/16/2016 10:50:36: Build info:
|
||||
|
||||
05/13/2016 15:10:47: Built time: May 13 2016 14:50:25
|
||||
05/13/2016 15:10:47: Last modified date: Thu May 12 14:00:37 2016
|
||||
05/13/2016 15:10:47: Build type: release
|
||||
05/13/2016 15:10:47: Build target: GPU
|
||||
05/13/2016 15:10:47: With 1bit-SGD: no
|
||||
05/13/2016 15:10:47: Math lib: acml
|
||||
05/13/2016 15:10:47: CUDA_PATH: /usr/local/cuda-7.5
|
||||
05/13/2016 15:10:47: CUB_PATH: /usr/local/cub-1.4.1
|
||||
05/13/2016 15:10:47: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
05/13/2016 15:10:47: Build Branch: HEAD
|
||||
05/13/2016 15:10:47: Build SHA1: 35fadc316f045d843bbd9b85061250a959268787
|
||||
05/13/2016 15:10:47: Built by philly on d8dc82703b0f
|
||||
05/13/2016 15:10:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
05/13/2016 15:10:47: -------------------------------------------------------------------
|
||||
08/16/2016 10:50:36: Built time: Aug 16 2016 09:41:56
|
||||
08/16/2016 10:50:36: Last modified date: Fri Aug 12 07:32:43 2016
|
||||
08/16/2016 10:50:36: Build type: release
|
||||
08/16/2016 10:50:36: Build target: GPU
|
||||
08/16/2016 10:50:36: With 1bit-SGD: no
|
||||
08/16/2016 10:50:36: Math lib: mkl
|
||||
08/16/2016 10:50:36: CUDA_PATH: /usr/local/cuda-7.5
|
||||
08/16/2016 10:50:36: CUB_PATH: /usr/local/cub-1.4.1
|
||||
08/16/2016 10:50:36: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
08/16/2016 10:50:36: Build Branch: HEAD
|
||||
08/16/2016 10:50:36: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 10:50:36: Built by philly on f67b30a647de
|
||||
08/16/2016 10:50:36: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
08/16/2016 10:50:36: -------------------------------------------------------------------
|
||||
08/16/2016 10:50:37: -------------------------------------------------------------------
|
||||
08/16/2016 10:50:37: GPU info:
|
||||
|
||||
05/13/2016 15:10:47: Running on localhost at 2016/05/13 15:10:47
|
||||
05/13/2016 15:10:47: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/../../../../Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/Config/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
|
||||
08/16/2016 10:50:37: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:50:37: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:50:37: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:50:37: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:50:37: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 10:50:37: Running on localhost at 2016/08/16 10:50:37
|
||||
08/16/2016 10:50:37: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/gpu/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Conv.cntk currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10 OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu DeviceId=0 timestamping=true Train=[SGD=[maxEpochs=10]] Train=[SGD=[epochSize=100]] stderr=-
|
||||
|
||||
|
||||
|
||||
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
05/13/2016 15:10:47: RootDir = "."
|
||||
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:50:37: RootDir = "."
|
||||
ConfigDir = "$RootDir$"
|
||||
DataDir = "$RootDir$"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
|
@ -53,7 +66,6 @@ precision = "float"
|
|||
deviceId = 0
|
||||
imageLayout = "cudnn"
|
||||
initOnCPUOnly=true
|
||||
prefetch = "true"
|
||||
command = Train:Test
|
||||
modelPath = "$ModelDir$/01_Convolution"
|
||||
stderr = "$OutputDir$/01_Conv"
|
||||
|
@ -86,7 +98,7 @@ Train = [
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
Test = [
|
||||
action = "test"
|
||||
|
@ -104,42 +116,41 @@ Test = [
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
Train=[SGD=[maxEpochs=10]]
|
||||
Train=[SGD=[epochSize=100]]
|
||||
stderr=-
|
||||
|
||||
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
05/13/2016 15:10:47: RootDir = "."
|
||||
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:50:37: RootDir = "."
|
||||
ConfigDir = "."
|
||||
DataDir = "."
|
||||
OutputDir = "./Output"
|
||||
ModelDir = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models"
|
||||
ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl"
|
||||
ModelDir = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models"
|
||||
ndlMacros = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl"
|
||||
precision = "float"
|
||||
deviceId = 0
|
||||
imageLayout = "cudnn"
|
||||
initOnCPUOnly=true
|
||||
prefetch = "true"
|
||||
command = Train:Test
|
||||
modelPath = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution"
|
||||
stderr = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv"
|
||||
modelPath = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution"
|
||||
stderr = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/01_Conv"
|
||||
traceLevel = 1
|
||||
numMBsToShowResult = 500
|
||||
Train = [
|
||||
action = "train"
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 49984
|
||||
|
@ -152,7 +163,7 @@ Train = [
|
|||
]
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
|
||||
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
|
||||
input = [
|
||||
features = [
|
||||
dim = 3072
|
||||
|
@ -163,14 +174,14 @@ Train = [
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
Test = [
|
||||
action = "test"
|
||||
minibatchSize = 16
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
|
||||
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
|
||||
input = [
|
||||
features = [
|
||||
dim = 3072
|
||||
|
@ -181,45 +192,44 @@ Test = [
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
Train=[SGD=[maxEpochs=10]]
|
||||
Train=[SGD=[epochSize=100]]
|
||||
stderr=-
|
||||
|
||||
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/13/2016 15:10:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:50:37: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: 01_Conv.cntk:command=Train:Test
|
||||
configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
configparameters: 01_Conv.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10
|
||||
configparameters: 01_Conv.cntk:currentDirectory=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
configparameters: 01_Conv.cntk:DataDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData
|
||||
configparameters: 01_Conv.cntk:deviceId=0
|
||||
configparameters: 01_Conv.cntk:imageLayout=cudnn
|
||||
configparameters: 01_Conv.cntk:initOnCPUOnly=true
|
||||
configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models
|
||||
configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
|
||||
configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl
|
||||
configparameters: 01_Conv.cntk:ModelDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models
|
||||
configparameters: 01_Conv.cntk:modelPath=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
|
||||
configparameters: 01_Conv.cntk:ndlMacros=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/Macros.ndl
|
||||
configparameters: 01_Conv.cntk:numMBsToShowResult=500
|
||||
configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
configparameters: 01_Conv.cntk:OutputDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
configparameters: 01_Conv.cntk:precision=float
|
||||
configparameters: 01_Conv.cntk:prefetch=true
|
||||
configparameters: 01_Conv.cntk:RootDir=.
|
||||
configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
configparameters: 01_Conv.cntk:RunDir=/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu
|
||||
configparameters: 01_Conv.cntk:stderr=-
|
||||
configparameters: 01_Conv.cntk:Test=[
|
||||
action = "test"
|
||||
minibatchSize = 16
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
|
||||
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Test_cntk_text.txt"
|
||||
input = [
|
||||
features = [
|
||||
dim = 3072
|
||||
|
@ -230,7 +240,7 @@ configparameters: 01_Conv.cntk:Test=[
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
|
||||
configparameters: 01_Conv.cntk:timestamping=true
|
||||
|
@ -238,7 +248,7 @@ configparameters: 01_Conv.cntk:traceLevel=1
|
|||
configparameters: 01_Conv.cntk:Train=[
|
||||
action = "train"
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Image/Miscellaneous/CIFAR-10/01_Convolution/../../../../../../../Examples/Image/Miscellaneous/CIFAR-10/01_Convolution.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 49984
|
||||
|
@ -251,7 +261,7 @@ configparameters: 01_Conv.cntk:Train=[
|
|||
]
|
||||
reader = [
|
||||
readerType = "CNTKTextFormatReader"
|
||||
file = "/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
|
||||
file = "/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/TestData/Train_cntk_text.txt"
|
||||
input = [
|
||||
features = [
|
||||
dim = 3072
|
||||
|
@ -262,33 +272,57 @@ configparameters: 01_Conv.cntk:Train=[
|
|||
format = "dense"
|
||||
]
|
||||
]
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=10]] [SGD=[epochSize=100]]
|
||||
|
||||
05/13/2016 15:10:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
05/13/2016 15:10:47: Commands: Train Test
|
||||
05/13/2016 15:10:47: Precision = "float"
|
||||
05/13/2016 15:10:47: CNTKModelPath: /tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
|
||||
05/13/2016 15:10:47: CNTKCommandTrainInfo: Train : 10
|
||||
05/13/2016 15:10:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10
|
||||
08/16/2016 10:50:37: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:50:37: Commands: Train Test
|
||||
08/16/2016 10:50:37: Precision = "float"
|
||||
08/16/2016 10:50:37: CNTKModelPath: /tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution
|
||||
08/16/2016 10:50:37: CNTKCommandTrainInfo: Train : 10
|
||||
08/16/2016 10:50:37: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 10
|
||||
|
||||
05/13/2016 15:10:47: ##############################################################################
|
||||
05/13/2016 15:10:47: # #
|
||||
05/13/2016 15:10:47: # Action "train" #
|
||||
05/13/2016 15:10:47: # #
|
||||
05/13/2016 15:10:47: ##############################################################################
|
||||
08/16/2016 10:50:37: ##############################################################################
|
||||
08/16/2016 10:50:37: # #
|
||||
08/16/2016 10:50:37: # Action "train" #
|
||||
08/16/2016 10:50:37: # #
|
||||
08/16/2016 10:50:37: ##############################################################################
|
||||
|
||||
05/13/2016 15:10:47: CNTKCommandTrainBegin: Train
|
||||
08/16/2016 10:50:37: CNTKCommandTrainBegin: Train
|
||||
NDLBuilder Using GPU 0
|
||||
|
||||
05/13/2016 15:10:47: Creating virgin network.
|
||||
08/16/2016 10:50:37: Creating virgin network.
|
||||
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 0.000000.
|
||||
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- 0.000000.
|
||||
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
|
||||
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- 0.000000.
|
||||
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
|
||||
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- 0.000000.
|
||||
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
|
||||
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- 0.000000.
|
||||
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
|
||||
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- 0.000000.
|
||||
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
|
||||
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
|
||||
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
|
||||
Node 'featOffs' (LearnableParameter operation): Initializing Parameter[1 x 1] <- 128.000000.
|
||||
Node 'conv1_act.W' (LearnableParameter operation): Initializing Parameter[32 x 75] <- gaussian(seed=1, range=0.023094*0.004300, onCPU=false).
|
||||
SetGaussianRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
|
||||
Node 'conv1_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
|
||||
Node 'conv2_act.W' (LearnableParameter operation): Initializing Parameter[32 x 800] <- gaussian(seed=2, range=0.007071*1.414000, onCPU=false).
|
||||
Node 'conv2_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 32] <- 0.000000.
|
||||
Node 'conv3_act.W' (LearnableParameter operation): Initializing Parameter[64 x 800] <- gaussian(seed=3, range=0.007071*1.414000, onCPU=false).
|
||||
Node 'conv3_act.b' (LearnableParameter operation): Initializing Parameter[1 x 1 x 64] <- 0.000000.
|
||||
Node 'h1.W' (LearnableParameter operation): Initializing Parameter[64 x 3 x 3 x 64] <- gaussian(seed=4, range=0.008333*12.000000, onCPU=false).
|
||||
Node 'h1.b' (LearnableParameter operation): Initializing Parameter[64 x 1] <- 0.000000.
|
||||
Node 'OutputNodes.W' (LearnableParameter operation): Initializing Parameter[10 x 64] <- gaussian(seed=5, range=0.025000*1.500000, onCPU=false).
|
||||
Node 'OutputNodes.b' (LearnableParameter operation): Initializing Parameter[10] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 34 nodes to process in pass 1.
|
||||
|
@ -326,7 +360,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 21 nodes to process in pass 2.
|
||||
|
||||
|
@ -334,165 +368,183 @@ Validating network. 21 nodes to process in pass 2.
|
|||
Validating network, final pass.
|
||||
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
|
||||
13 out of 34 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
05/13/2016 15:10:48: Created model with 34 nodes on GPU 0.
|
||||
08/16/2016 10:50:38: Created model with 34 nodes on GPU 0.
|
||||
|
||||
05/13/2016 15:10:48: Training criterion node(s):
|
||||
05/13/2016 15:10:48: CE = CrossEntropyWithSoftmax
|
||||
08/16/2016 10:50:38: Training criterion node(s):
|
||||
08/16/2016 10:50:38: CE = CrossEntropyWithSoftmax
|
||||
|
||||
05/13/2016 15:10:48: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:10:48: Err = ClassificationError
|
||||
08/16/2016 10:50:38: Evaluation criterion node(s):
|
||||
08/16/2016 10:50:38: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
Memory Sharing: Out of 63 matrices, 38 are shared as 17, and 25 are not shared.
|
||||
|
||||
(nil): {[Err Gradient[1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *]] [features Gradient[32 x 32 x 3 x *]] [labels Gradient[10 x *]] }
|
||||
0x2485d28: {[OutputNodes.z Value[10 x 1 x *]] }
|
||||
0x2485ee8: {[CE Value[1]] }
|
||||
0x2486168: {[conv1_act.W Gradient[32 x 75]] [conv1_act.p Value[32 x 32 x 32 x *]] }
|
||||
0x2486328: {[conv1_act.c Gradient[32 x 32 x 32 x *]] [conv1_act.y Value[32 x 32 x 32 x *]] }
|
||||
0x24864e8: {[conv1_act.p Gradient[32 x 32 x 32 x *]] [pool1 Value[15 x 15 x 32 x *]] }
|
||||
0x249a638: {[features Value[32 x 32 x 3 x *]] }
|
||||
0x2975298: {[conv1_act.b Value[1 x 1 x 32]] }
|
||||
0x2976b48: {[conv2_act.W Value[32 x 800]] }
|
||||
0x2977ae8: {[conv2_act.b Value[1 x 1 x 32]] }
|
||||
0x2979668: {[conv3_act.W Value[64 x 800]] }
|
||||
0x2979f08: {[conv3_act.b Value[1 x 1 x 64]] }
|
||||
0x297bae8: {[h1.W Value[64 x 3 x 3 x 64]] }
|
||||
0x297c538: {[h1.b Value[64 x 1]] }
|
||||
0x297d5c8: {[OutputNodes.W Value[10 x 64]] }
|
||||
0x297ea98: {[OutputNodes.b Value[10]] }
|
||||
0x2dd1458: {[featOffs Value[1 x 1]] }
|
||||
0x2dd2678: {[labels Value[10 x *]] }
|
||||
0x2dd2eb8: {[conv1_act.W Value[32 x 75]] }
|
||||
0x7a59dd8: {[Err Value[1]] }
|
||||
0x7a5d378: {[featScaled Value[32 x 32 x 3 x *]] }
|
||||
0x7a5d6d8: {[conv1_act.c Value[32 x 32 x 32 x *]] }
|
||||
0x7a5e478: {[conv2_act.c Value[15 x 15 x 32 x *]] }
|
||||
0x7a5e638: {[conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.y Gradient[32 x 32 x 32 x *]] }
|
||||
0x7a5e7f8: {[conv2_act.W Gradient[32 x 800]] [conv2_act.p Value[15 x 15 x 32 x *]] }
|
||||
0x7a7ade8: {[conv2_act.c Gradient[15 x 15 x 32 x *]] [conv2_act.y Value[15 x 15 x 32 x *]] }
|
||||
0x7a7afa8: {[conv2_act.p Gradient[15 x 15 x 32 x *]] [pool1 Gradient[15 x 15 x 32 x *]] [pool2 Value[7 x 7 x 32 x *]] }
|
||||
0x7a7b168: {[conv3_act.c Value[7 x 7 x 64 x *]] }
|
||||
0x7a7b328: {[conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.y Gradient[15 x 15 x 32 x *]] }
|
||||
0x7a7b4e8: {[conv3_act.W Gradient[64 x 800]] [conv3_act.p Value[7 x 7 x 64 x *]] }
|
||||
0x7a7b6a8: {[conv3_act.c Gradient[7 x 7 x 64 x *]] [conv3_act.y Value[7 x 7 x 64 x *]] }
|
||||
0x7a7b868: {[conv3_act.p Gradient[7 x 7 x 64 x *]] [pool2 Gradient[7 x 7 x 32 x *]] [pool3 Value[3 x 3 x 64 x *]] }
|
||||
0x7a7ba28: {[conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.y Gradient[7 x 7 x 64 x *]] [h1.t Value[64 x *]] }
|
||||
0x7a7bbe8: {[h1.W Gradient[64 x 3 x 3 x 64]] [h1.z Value[64 x 1 x *]] }
|
||||
0x7a7bda8: {[h1.t Gradient[64 x *]] [h1.y Value[64 x 1 x *]] }
|
||||
0x7a7bf68: {[h1_d Value[64 x 1 x *]] }
|
||||
0x7a7c128: {[h1.z Gradient[64 x 1 x *]] [pool3 Gradient[3 x 3 x 64 x *]] }
|
||||
0x7a7c2e8: {[OutputNodes.t Value[10 x 1 x *]] [h1.b Gradient[64 x 1]] [h1.y Gradient[64 x 1 x *]] }
|
||||
0x7a7cdc8: {[CE Gradient[1]] }
|
||||
0x7a7cf88: {[OutputNodes.W Gradient[10 x 64]] [OutputNodes.z Gradient[10 x 1 x *]] }
|
||||
0x7a7d148: {[OutputNodes.t Gradient[10 x 1 x *]] }
|
||||
0x7a7d308: {[OutputNodes.b Gradient[10]] }
|
||||
0x7a7d4c8: {[h1_d Gradient[64 x 1 x *]] }
|
||||
{ conv1_act.W : [32 x 75] (gradient)
|
||||
conv1_act.p : [32 x 32 x 32 x *] }
|
||||
{ conv1_act.c : [32 x 32 x 32 x *] (gradient)
|
||||
conv1_act.y : [32 x 32 x 32 x *] }
|
||||
{ conv1_act.p : [32 x 32 x 32 x *] (gradient)
|
||||
pool1 : [15 x 15 x 32 x *] }
|
||||
{ conv1_act.b : [1 x 1 x 32] (gradient)
|
||||
conv1_act.y : [32 x 32 x 32 x *] (gradient) }
|
||||
{ conv2_act.W : [32 x 800] (gradient)
|
||||
conv2_act.p : [15 x 15 x 32 x *] }
|
||||
{ conv2_act.c : [15 x 15 x 32 x *] (gradient)
|
||||
conv2_act.y : [15 x 15 x 32 x *] }
|
||||
{ conv2_act.p : [15 x 15 x 32 x *] (gradient)
|
||||
pool1 : [15 x 15 x 32 x *] (gradient)
|
||||
pool2 : [7 x 7 x 32 x *] }
|
||||
{ conv2_act.b : [1 x 1 x 32] (gradient)
|
||||
conv2_act.y : [15 x 15 x 32 x *] (gradient) }
|
||||
{ conv3_act.W : [64 x 800] (gradient)
|
||||
conv3_act.p : [7 x 7 x 64 x *] }
|
||||
{ conv3_act.c : [7 x 7 x 64 x *] (gradient)
|
||||
conv3_act.y : [7 x 7 x 64 x *] }
|
||||
{ conv3_act.p : [7 x 7 x 64 x *] (gradient)
|
||||
pool2 : [7 x 7 x 32 x *] (gradient)
|
||||
pool3 : [3 x 3 x 64 x *] }
|
||||
{ conv3_act.b : [1 x 1 x 64] (gradient)
|
||||
conv3_act.y : [7 x 7 x 64 x *] (gradient)
|
||||
h1.t : [64 x *] }
|
||||
{ h1.W : [64 x 3 x 3 x 64] (gradient)
|
||||
h1.z : [64 x 1 x *] }
|
||||
{ h1.t : [64 x *] (gradient)
|
||||
h1.y : [64 x 1 x *] }
|
||||
{ h1.z : [64 x 1 x *] (gradient)
|
||||
pool3 : [3 x 3 x 64 x *] (gradient) }
|
||||
{ OutputNodes.t : [10 x 1 x *]
|
||||
h1.b : [64 x 1] (gradient)
|
||||
h1.y : [64 x 1 x *] (gradient) }
|
||||
{ OutputNodes.W : [10 x 64] (gradient)
|
||||
OutputNodes.z : [10 x 1 x *] (gradient) }
|
||||
|
||||
05/13/2016 15:10:48: No PreCompute nodes found, skipping PreCompute step.
|
||||
|
||||
05/13/2016 15:10:48: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:38: Training 116906 parameters in 10 out of 10 parameter tensors and 29 nodes with gradient:
|
||||
|
||||
05/13/2016 15:10:48: Starting minibatch loop.
|
||||
05/13/2016 15:10:51: Finished Epoch[ 1 of 10]: [Training] CE = 2.30242050 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.55904s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1'
|
||||
08/16/2016 10:50:38: Node 'OutputNodes.W' (LearnableParameter operation) : [10 x 64]
|
||||
08/16/2016 10:50:38: Node 'OutputNodes.b' (LearnableParameter operation) : [10]
|
||||
08/16/2016 10:50:38: Node 'conv1_act.W' (LearnableParameter operation) : [32 x 75]
|
||||
08/16/2016 10:50:38: Node 'conv1_act.b' (LearnableParameter operation) : [1 x 1 x 32]
|
||||
08/16/2016 10:50:38: Node 'conv2_act.W' (LearnableParameter operation) : [32 x 800]
|
||||
08/16/2016 10:50:38: Node 'conv2_act.b' (LearnableParameter operation) : [1 x 1 x 32]
|
||||
08/16/2016 10:50:38: Node 'conv3_act.W' (LearnableParameter operation) : [64 x 800]
|
||||
08/16/2016 10:50:38: Node 'conv3_act.b' (LearnableParameter operation) : [1 x 1 x 64]
|
||||
08/16/2016 10:50:38: Node 'h1.W' (LearnableParameter operation) : [64 x 3 x 3 x 64]
|
||||
08/16/2016 10:50:38: Node 'h1.b' (LearnableParameter operation) : [64 x 1]
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:38: No PreCompute nodes found, or all already computed. Skipping pre-computation step.
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
05/13/2016 15:10:51: Finished Epoch[ 2 of 10]: [Training] CE = 2.30175842 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.011903s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
|
||||
08/16/2016 10:50:38: Starting Epoch 1: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 0: frames [0..100] (first sequence at sample 0), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:38: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Finished Epoch[ 1 of 10]: [Training] CE = 2.30223602 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 100; learningRatePerSample = 0.00015625; epochTime=3.51082s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.1'
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
05/13/2016 15:10:51: Finished Epoch[ 3 of 10]: [Training] CE = 2.30054413 * 100; Err = 0.90000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012701s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
|
||||
08/16/2016 10:50:41: Starting Epoch 2: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 1: frames [100..200] (first sequence at sample 100), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Finished Epoch[ 2 of 10]: [Training] CE = 2.30189240 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 200; learningRatePerSample = 0.00015625; epochTime=0.012555s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.2'
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
05/13/2016 15:10:51: Finished Epoch[ 4 of 10]: [Training] CE = 2.30022812 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.01144s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
|
||||
08/16/2016 10:50:41: Starting Epoch 3: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 2: frames [200..300] (first sequence at sample 200), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Finished Epoch[ 3 of 10]: [Training] CE = 2.29965256 * 100; Err = 0.86000000 * 100; totalSamplesSeen = 300; learningRatePerSample = 0.00015625; epochTime=0.012394s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.3'
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
05/13/2016 15:10:51: Finished Epoch[ 5 of 10]: [Training] CE = 2.29579636 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.011529s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
|
||||
08/16/2016 10:50:41: Starting Epoch 4: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 3: frames [300..400] (first sequence at sample 300), data subset 0 of 1
|
||||
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Finished Epoch[ 4 of 10]: [Training] CE = 2.29966064 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 400; learningRatePerSample = 0.00015625; epochTime=0.0124s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.4'
|
||||
|
||||
08/16/2016 10:50:41: Starting Epoch 5: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 4: frames [400..500] (first sequence at sample 400), data subset 0 of 1
|
||||
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Finished Epoch[ 5 of 10]: [Training] CE = 2.30450394 * 100; Err = 0.94000000 * 100; totalSamplesSeen = 500; learningRatePerSample = 0.00015625; epochTime=0.012302s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.5'
|
||||
Setting dropout rate to 0.5.
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting Epoch 6: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 5: frames [500..600] (first sequence at sample 500), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
(GPU): creating curand object with seed 5
|
||||
05/13/2016 15:10:51: Finished Epoch[ 6 of 10]: [Training] CE = 2.30121231 * 100; Err = 0.84000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012276s
|
||||
05/13/2016 15:10:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6'
|
||||
08/16/2016 10:50:41: Finished Epoch[ 6 of 10]: [Training] CE = 2.29013916 * 100; Err = 0.81000000 * 100; totalSamplesSeen = 600; learningRatePerSample = 0.00015625; epochTime=0.012412s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.6'
|
||||
|
||||
05/13/2016 15:10:51: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting Epoch 7: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 6: frames [600..700] (first sequence at sample 600), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:51: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
(GPU): creating curand object with seed 6
|
||||
05/13/2016 15:10:52: Finished Epoch[ 7 of 10]: [Training] CE = 2.28975647 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.011495s
|
||||
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7'
|
||||
08/16/2016 10:50:41: Finished Epoch[ 7 of 10]: [Training] CE = 2.29815765 * 100; Err = 0.93000000 * 100; totalSamplesSeen = 700; learningRatePerSample = 0.00015625; epochTime=0.012303s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.7'
|
||||
|
||||
05/13/2016 15:10:52: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting Epoch 8: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 7: frames [700..800] (first sequence at sample 700), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:52: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
(GPU): creating curand object with seed 7
|
||||
05/13/2016 15:10:52: Finished Epoch[ 8 of 10]: [Training] CE = 2.29035095 * 100; Err = 0.91000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012157s
|
||||
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8'
|
||||
08/16/2016 10:50:41: Finished Epoch[ 8 of 10]: [Training] CE = 2.28805603 * 100; Err = 0.89000000 * 100; totalSamplesSeen = 800; learningRatePerSample = 0.00015625; epochTime=0.012517s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.8'
|
||||
|
||||
05/13/2016 15:10:52: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting Epoch 9: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 8: frames [800..900] (first sequence at sample 800), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:52: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
(GPU): creating curand object with seed 8
|
||||
05/13/2016 15:10:52: Finished Epoch[ 9 of 10]: [Training] CE = 2.29797729 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.011451s
|
||||
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9'
|
||||
08/16/2016 10:50:41: Finished Epoch[ 9 of 10]: [Training] CE = 2.29380524 * 100; Err = 0.88000000 * 100; totalSamplesSeen = 900; learningRatePerSample = 0.00015625; epochTime=0.012463s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution.9'
|
||||
|
||||
05/13/2016 15:10:52: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
08/16/2016 10:50:41: Starting Epoch 10: learning rate per sample = 0.000156 effective momentum = 0.900000 momentum as time constant = 607.4 samples
|
||||
BlockRandomizer::StartEpoch: epoch 9: frames [900..1000] (first sequence at sample 900), data subset 0 of 1
|
||||
|
||||
05/13/2016 15:10:52: Starting minibatch loop.
|
||||
08/16/2016 10:50:41: Starting minibatch loop.
|
||||
(GPU): creating curand object with seed 9
|
||||
05/13/2016 15:10:52: Finished Epoch[10 of 10]: [Training] CE = 2.29764435 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012689s
|
||||
05/13/2016 15:10:52: SGD: Saving checkpoint model '/tmp/cntk-test-20160513145544.775982/CNTKTextFormatReader/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution'
|
||||
05/13/2016 15:10:52: CNTKCommandTrainEnd: Train
|
||||
08/16/2016 10:50:41: Finished Epoch[10 of 10]: [Training] CE = 2.27814423 * 100; Err = 0.87000000 * 100; totalSamplesSeen = 1000; learningRatePerSample = 0.00015625; epochTime=0.012432s
|
||||
08/16/2016 10:50:41: SGD: Saving checkpoint model '/tmp/cntk-test-20160816095502.258817/Examples/Image/Miscellaneous/CIFAR-10_01_Convolution@release_gpu/Models/01_Convolution'
|
||||
08/16/2016 10:50:41: CNTKCommandTrainEnd: Train
|
||||
|
||||
05/13/2016 15:10:52: Action "train" complete.
|
||||
08/16/2016 10:50:41: Action "train" complete.
|
||||
|
||||
|
||||
05/13/2016 15:10:52: ##############################################################################
|
||||
05/13/2016 15:10:52: # #
|
||||
05/13/2016 15:10:52: # Action "test" #
|
||||
05/13/2016 15:10:52: # #
|
||||
05/13/2016 15:10:52: ##############################################################################
|
||||
08/16/2016 10:50:41: ##############################################################################
|
||||
08/16/2016 10:50:41: # #
|
||||
08/16/2016 10:50:41: # Action "test" #
|
||||
08/16/2016 10:50:41: # #
|
||||
08/16/2016 10:50:41: ##############################################################################
|
||||
|
||||
|
||||
Post-processing network...
|
||||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 34 nodes to process in pass 1.
|
||||
|
@ -530,7 +582,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 21 nodes to process in pass 2.
|
||||
|
||||
|
@ -538,17 +590,17 @@ Validating network. 21 nodes to process in pass 2.
|
|||
Validating network, final pass.
|
||||
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv1_act.c: using cuDNN convolution engine for geometry: Input: 32 x 32 x 3, Output: 32 x 32 x 32, Kernel: 5 x 5 x 3, Map: 1 x 1 x 32, Stride: 1 x 1 x 3, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool1: using cuDNN convolution engine for geometry: Input: 32 x 32 x 32, Output: 15 x 15 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv2_act.c: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 15 x 15 x 32, Kernel: 5 x 5 x 32, Map: 1 x 1 x 32, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool2: using cuDNN convolution engine for geometry: Input: 15 x 15 x 32, Output: 7 x 7 x 32, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
conv3_act.c: using cuDNN convolution engine for geometry: Input: 7 x 7 x 32, Output: 7 x 7 x 64, Kernel: 5 x 5 x 32, Map: 1 x 1 x 64, Stride: 1 x 1 x 32, Sharing: (1), AutoPad: (1), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
Using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
pool3: using cuDNN convolution engine for geometry: Input: 7 x 7 x 64, Output: 3 x 3 x 64, Kernel: 3 x 3 x 1, Map: 1, Stride: 2 x 2 x 1, Sharing: (1), AutoPad: (0), LowerPad: 0, UpperPad: 0.
|
||||
|
||||
|
||||
13 out of 34 nodes do not share the minibatch layout with the input data.
|
||||
|
@ -560,46 +612,14 @@ evalNodeNames are not specified, using all the default evalnodes and training cr
|
|||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
Memory Sharing: Out of 34 matrices, 0 are shared as 0, and 34 are not shared.
|
||||
|
||||
(nil): {[CE Gradient[1]] [Err Gradient[1]] [OutputNodes.W Gradient[10 x 64]] [OutputNodes.b Gradient[10]] [OutputNodes.t Gradient[10 x 1 x *1]] [OutputNodes.z Gradient[10 x 1 x *1]] [conv1_act.W Gradient[32 x 75]] [conv1_act.b Gradient[1 x 1 x 32]] [conv1_act.c Gradient[32 x 32 x 32 x *1]] [conv1_act.p Gradient[32 x 32 x 32 x *1]] [conv1_act.y Gradient[32 x 32 x 32 x *1]] [conv2_act.W Gradient[32 x 800]] [conv2_act.b Gradient[1 x 1 x 32]] [conv2_act.c Gradient[15 x 15 x 32 x *1]] [conv2_act.p Gradient[15 x 15 x 32 x *1]] [conv2_act.y Gradient[15 x 15 x 32 x *1]] [conv3_act.W Gradient[64 x 800]] [conv3_act.b Gradient[1 x 1 x 64]] [conv3_act.c Gradient[7 x 7 x 64 x *1]] [conv3_act.p Gradient[7 x 7 x 64 x *1]] [conv3_act.y Gradient[7 x 7 x 64 x *1]] [featOffs Gradient[1 x 1]] [featScaled Gradient[32 x 32 x 3 x *1]] [features Gradient[32 x 32 x 3 x *1]] [h1.W Gradient[64 x 3 x 3 x 64]] [h1.b Gradient[64 x 1]] [h1.t Gradient[64 x *1]] [h1.y Gradient[64 x 1 x *1]] [h1.z Gradient[64 x 1 x *1]] [h1_d Gradient[64 x 1 x *1]] [labels Gradient[10 x *1]] [pool1 Gradient[15 x 15 x 32 x *1]] [pool2 Gradient[7 x 7 x 32 x *1]] [pool3 Gradient[3 x 3 x 64 x *1]] }
|
||||
0x7fc883e04ba8: {[conv1_act.b Value[1 x 1 x 32]] }
|
||||
0x7fc883e05fc8: {[conv1_act.W Value[32 x 75]] }
|
||||
0x7fc883e06768: {[conv2_act.b Value[1 x 1 x 32]] }
|
||||
0x7fc883e06928: {[conv2_act.W Value[32 x 800]] }
|
||||
0x7fc883e085b8: {[conv3_act.b Value[1 x 1 x 64]] }
|
||||
0x7fc883e09528: {[conv3_act.W Value[64 x 800]] }
|
||||
0x7fc883e0b568: {[featOffs Value[1 x 1]] }
|
||||
0x7fc883e0c1e8: {[features Value[32 x 32 x 3 x *1]] }
|
||||
0x7fc883e0cc38: {[h1.b Value[64 x 1]] }
|
||||
0x7fc883e0cf08: {[h1.W Value[64 x 3 x 3 x 64]] }
|
||||
0x7fc883e0eb48: {[labels Value[10 x *1]] }
|
||||
0x7fc883e0f558: {[OutputNodes.b Value[10]] }
|
||||
0x7fc883e10068: {[OutputNodes.W Value[10 x 64]] }
|
||||
0x7fc883e286b8: {[Err Value[1]] }
|
||||
0x7fc883e2bd28: {[CE Value[1]] }
|
||||
0x7fc883e2bfa8: {[conv1_act.y Value[32 x 32 x 32 x *1]] }
|
||||
0x7fc883e54728: {[conv1_act.c Value[32 x 32 x 32 x *1]] }
|
||||
0x7fc883e54a88: {[featScaled Value[32 x 32 x 3 x *1]] }
|
||||
0x7fc883e54c18: {[conv1_act.p Value[32 x 32 x 32 x *1]] }
|
||||
0x7fc883e71a78: {[pool1 Value[15 x 15 x 32 x *1]] }
|
||||
0x7fc883e71c38: {[conv2_act.c Value[15 x 15 x 32 x *1]] }
|
||||
0x7fc883e71fb8: {[conv2_act.p Value[15 x 15 x 32 x *1]] }
|
||||
0x7fc883e72178: {[conv2_act.y Value[15 x 15 x 32 x *1]] }
|
||||
0x7fc883e72338: {[pool2 Value[7 x 7 x 32 x *1]] }
|
||||
0x7fc883e724f8: {[conv3_act.c Value[7 x 7 x 64 x *1]] }
|
||||
0x7fc883e72878: {[conv3_act.p Value[7 x 7 x 64 x *1]] }
|
||||
0x7fc883e72a38: {[conv3_act.y Value[7 x 7 x 64 x *1]] }
|
||||
0x7fc883e72bf8: {[pool3 Value[3 x 3 x 64 x *1]] }
|
||||
0x7fc883e72db8: {[h1.t Value[64 x *1]] }
|
||||
0x7fc883e72f78: {[h1.z Value[64 x 1 x *1]] }
|
||||
0x7fc883e73138: {[h1.y Value[64 x 1 x *1]] }
|
||||
0x7fc883e732f8: {[h1_d Value[64 x 1 x *1]] }
|
||||
0x7fc883e73678: {[OutputNodes.t Value[10 x 1 x *1]] }
|
||||
0x7fc883e73838: {[OutputNodes.z Value[10 x 1 x *1]] }
|
||||
|
||||
05/13/2016 15:10:58: Final Results: Minibatch[1-625]: Err = 0.86430000 * 10000; CE = 2.28476029 * 10000; perplexity = 9.82333117
|
||||
BlockRandomizer::StartEpoch: epoch 0: frames [0..10000] (first sequence at sample 0), data subset 0 of 1
|
||||
08/16/2016 10:50:43: Minibatch[1-500]: Err = 0.86125000 * 8000; CE = 2.28389484 * 8000
|
||||
08/16/2016 10:50:43: Minibatch[501-625]: Err = 0.86350000 * 2000; CE = 2.28027481 * 2000
|
||||
08/16/2016 10:50:43: Final Results: Minibatch[1-625]: Err = 0.86170000 * 10000; CE = 2.28317084 * 10000; perplexity = 9.80772986
|
||||
|
||||
05/13/2016 15:10:58: Action "test" complete.
|
||||
08/16/2016 10:50:43: Action "test" complete.
|
||||
|
||||
05/13/2016 15:10:58: __COMPLETED__
|
||||
08/16/2016 10:50:43: __COMPLETED__
|
|
@ -286,7 +286,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 34 nodes to process in pass 1.
|
||||
|
@ -324,7 +324,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *], [10] -> [10 x 1 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x 1 x *] -> [1]
|
||||
|
||||
Validating network. 21 nodes to process in pass 2.
|
||||
|
||||
|
@ -356,7 +356,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:17:53: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:17:53: Err = ClassificationError
|
||||
05/13/2016 08:17:53: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -490,7 +490,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 34 nodes to process in pass 1.
|
||||
|
@ -528,7 +528,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1_d) : [10 x 64], [64 x 1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x 1 x *1], [10] -> [10 x 1 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x 1 x *1] -> [1]
|
||||
|
||||
Validating network. 21 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -291,7 +291,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 45 nodes to process in pass 1.
|
||||
|
@ -340,7 +340,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 20 nodes to process in pass 2.
|
||||
|
||||
|
@ -380,7 +380,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 15:10:59: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:10:59: Err = ClassificationError
|
||||
05/13/2016 15:10:59: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -491,7 +491,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 45 nodes to process in pass 1.
|
||||
|
@ -540,7 +540,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 20 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -289,7 +289,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 45 nodes to process in pass 1.
|
||||
|
@ -338,7 +338,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *]
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 20 nodes to process in pass 2.
|
||||
|
||||
|
@ -378,7 +378,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:18:26: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:18:26: Err = ClassificationError
|
||||
05/13/2016 08:18:26: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -489,7 +489,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 45 nodes to process in pass 1.
|
||||
|
@ -538,7 +538,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, h1.y) : [10 x 64], [64 x *1
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 20 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -356,7 +356,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 184 nodes to process in pass 1.
|
||||
|
@ -546,7 +546,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 75 nodes to process in pass 2.
|
||||
|
||||
|
@ -652,7 +652,7 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 18:13:08: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 18:13:08: Err = ClassificationError
|
||||
05/03/2016 18:13:08: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -907,7 +907,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 184 nodes to process in pass 1.
|
||||
|
@ -1095,7 +1095,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 75 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -354,7 +354,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 184 nodes to process in pass 1.
|
||||
|
@ -544,7 +544,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 75 nodes to process in pass 2.
|
||||
|
||||
|
@ -650,7 +650,7 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 14:04:12: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 14:04:12: Err = ClassificationError
|
||||
05/03/2016 14:04:12: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -905,7 +905,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 184 nodes to process in pass 1.
|
||||
|
@ -1093,7 +1093,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 75 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -356,7 +356,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 949 nodes to process in pass 1.
|
||||
|
@ -1311,7 +1311,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 390 nodes to process in pass 2.
|
||||
|
||||
|
@ -1777,7 +1777,7 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 18:17:55: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 18:17:55: Err = ClassificationError
|
||||
05/03/2016 18:17:55: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -2932,7 +2932,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 949 nodes to process in pass 1.
|
||||
|
@ -3885,7 +3885,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 390 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -354,7 +354,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 949 nodes to process in pass 1.
|
||||
|
@ -1309,7 +1309,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 390 nodes to process in pass 2.
|
||||
|
||||
|
@ -1775,7 +1775,7 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 14:05:00: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 14:05:00: Err = ClassificationError
|
||||
05/03/2016 14:05:00: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -2930,7 +2930,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 949 nodes to process in pass 1.
|
||||
|
@ -3883,7 +3883,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, pool) : [10 x 1 x 1 x 64],
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 390 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -282,7 +282,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 32 nodes to process in pass 1.
|
||||
|
@ -318,7 +318,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 19 nodes to process in pass 2.
|
||||
|
||||
|
@ -350,7 +350,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 15:11:11: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 15:11:11: Err = ClassificationError
|
||||
05/13/2016 15:11:11: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -446,7 +446,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 32 nodes to process in pass 1.
|
||||
|
@ -482,7 +482,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 19 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -280,7 +280,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 32 nodes to process in pass 1.
|
||||
|
@ -316,7 +316,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *], [10] -> [10 x *]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *], [10 x *] -> [1]
|
||||
|
||||
Validating network. 19 nodes to process in pass 2.
|
||||
|
||||
|
@ -348,7 +348,7 @@ Post-processing network complete.
|
|||
|
||||
05/13/2016 08:19:02: Evaluation criterion node(s):
|
||||
|
||||
05/13/2016 08:19:02: Err = ClassificationError
|
||||
05/13/2016 08:19:02: Err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
@ -444,7 +444,7 @@ Post-processing network...
|
|||
|
||||
3 roots:
|
||||
CE = CrossEntropyWithSoftmax()
|
||||
Err = ClassificationError()
|
||||
Err = ErrorPrediction()
|
||||
OutputNodes.z = Plus()
|
||||
|
||||
Validating network. 32 nodes to process in pass 1.
|
||||
|
@ -480,7 +480,7 @@ Validating --> OutputNodes.t = Times (OutputNodes.W, conv4.y) : [10 x 7 x 7 x 32
|
|||
Validating --> OutputNodes.b = LearnableParameter() : -> [10]
|
||||
Validating --> OutputNodes.z = Plus (OutputNodes.t, OutputNodes.b) : [10 x *1], [10] -> [10 x *1]
|
||||
Validating --> CE = CrossEntropyWithSoftmax (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ClassificationError (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
Validating --> Err = ErrorPrediction (labels, OutputNodes.z) : [10 x *1], [10 x *1] -> [1]
|
||||
|
||||
Validating network. 19 nodes to process in pass 2.
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -370,7 +370,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -399,7 +399,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -423,14 +423,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:21:43: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:21:43: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:21:43: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0x1abc7c8: {[InvStdOfFeatures Value[2]] }
|
||||
0x1b40348: {[features Value[2 x *]] }
|
||||
0x1b408b8: {[MeanOfFeatures Value[2]] }
|
||||
|
@ -443,7 +443,7 @@ Memory Sharing Structure:
|
|||
0x1b46708: {[labels Value[2 x *]] }
|
||||
0x1b473e8: {[Prior Value[2]] }
|
||||
0x1b4b138: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
0x1b4cc28: {[EvalClassificationError Value[1]] }
|
||||
0x1b4cc28: {[EvalErrorPrediction Value[1]] }
|
||||
0x1b4cea8: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x1b4d388: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
0x1b4d548: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
|
||||
|
@ -473,139 +473,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:21:44: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:44: Starting minibatch loop.
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7
|
||||
05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0538s; samplesPerSecond = 4647.4
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.1073s; samplesPerSecond = 2329.6
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0631s; samplesPerSecond = 3961.3
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0747s; samplesPerSecond = 3346.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0900s; samplesPerSecond = 2778.4
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0605s; samplesPerSecond = 4135.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0619s; samplesPerSecond = 4039.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0769s; samplesPerSecond = 3249.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0799s; samplesPerSecond = 3129.0
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0620s; samplesPerSecond = 4031.5
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1278s; samplesPerSecond = 1955.9
|
||||
05/03/2016 15:21:44: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0821s; samplesPerSecond = 3044.1
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0657s; samplesPerSecond = 3804.8
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.1049s; samplesPerSecond = 2382.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.1180s; samplesPerSecond = 2117.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.1065s; samplesPerSecond = 2347.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2709s; samplesPerSecond = 922.9
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.1375s; samplesPerSecond = 1818.4
|
||||
05/03/2016 15:21:45: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.1143s; samplesPerSecond = 2186.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.1079s; samplesPerSecond = 2317.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0917s; samplesPerSecond = 2727.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0923s; samplesPerSecond = 2707.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0887s; samplesPerSecond = 2819.0
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0634s; samplesPerSecond = 3945.8
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0885s; samplesPerSecond = 2823.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0601s; samplesPerSecond = 4162.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0630s; samplesPerSecond = 3968.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0849s; samplesPerSecond = 2944.1
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0879s; samplesPerSecond = 2844.6
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0752s; samplesPerSecond = 3325.7
|
||||
05/03/2016 15:21:46: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0958s; samplesPerSecond = 2610.3
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.1009s; samplesPerSecond = 2478.7
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.1607s; samplesPerSecond = 1555.6
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.1131s; samplesPerSecond = 2211.4
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.1047s; samplesPerSecond = 2388.5
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0896s; samplesPerSecond = 2791.5
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0727s; samplesPerSecond = 3438.8
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2624s; samplesPerSecond = 952.6
|
||||
05/03/2016 15:21:47: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0842s; samplesPerSecond = 2967.7
|
||||
05/03/2016 15:21:47: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.93174s
|
||||
05/03/2016 15:21:47: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
|
||||
|
||||
05/03/2016 15:21:47: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:47: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7
|
||||
05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20720006 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0545s; samplesPerSecond = 4583.4
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19690290 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0641s; samplesPerSecond = 3899.7
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16064646 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0770s; samplesPerSecond = 3247.1
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13547171 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0640s; samplesPerSecond = 3904.2
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.18000261 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0732s; samplesPerSecond = 3413.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17787841 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0790s; samplesPerSecond = 3164.0
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16821879 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0880s; samplesPerSecond = 2839.4
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16363456 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0854s; samplesPerSecond = 2926.8
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19533907 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0774s; samplesPerSecond = 3228.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19318692 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0820s; samplesPerSecond = 3049.5
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12726279 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0766s; samplesPerSecond = 3261.6
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18620067 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0773s; samplesPerSecond = 3235.5
|
||||
05/03/2016 15:21:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11547500 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0797s; samplesPerSecond = 3136.6
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16675950 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0833s; samplesPerSecond = 2999.8
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15807389 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0822s; samplesPerSecond = 3042.5
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18389093 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18269750 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0897s; samplesPerSecond = 2787.7
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18737841 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0963s; samplesPerSecond = 2597.3
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20174757 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0811s; samplesPerSecond = 3081.1
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13336708 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0732s; samplesPerSecond = 3414.6
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13851332 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0879s; samplesPerSecond = 2843.0
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15422288 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0821s; samplesPerSecond = 3044.3
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15478799 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0815s; samplesPerSecond = 3069.2
|
||||
05/03/2016 15:21:49: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14530201 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0810s; samplesPerSecond = 3086.3
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12192809 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.2596s; samplesPerSecond = 962.9
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13975597 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0569s; samplesPerSecond = 4394.5
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12566363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0911s; samplesPerSecond = 2744.6
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18963051 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0765s; samplesPerSecond = 3267.2
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17955467 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0914s; samplesPerSecond = 2736.4
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18862103 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0772s; samplesPerSecond = 3236.7
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17503073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0775s; samplesPerSecond = 3225.8
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14741998 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0774s; samplesPerSecond = 3230.1
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13803981 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0726s; samplesPerSecond = 3443.0
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14139232 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0820s; samplesPerSecond = 3048.4
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13886877 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0766s; samplesPerSecond = 3264.1
|
||||
05/03/2016 15:21:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15025864 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0852s; samplesPerSecond = 2933.5
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14659342 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0903s; samplesPerSecond = 2767.4
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13078795 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0784s; samplesPerSecond = 3187.6
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19832882 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0772s; samplesPerSecond = 3240.4
|
||||
05/03/2016 15:21:51: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15828904 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0721s; samplesPerSecond = 3468.7
|
||||
05/03/2016 15:21:51: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16210811 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=3.34279s
|
||||
05/03/2016 15:21:51: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
|
||||
|
||||
05/03/2016 15:21:51: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:51: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0
|
||||
05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalClassificationError = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.19031988 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0960s; samplesPerSecond = 2604.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13920714 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0967s; samplesPerSecond = 2585.3
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14595162 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0869s; samplesPerSecond = 2877.8
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13324012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0817s; samplesPerSecond = 3060.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17358728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0804s; samplesPerSecond = 3109.2
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17949159 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0660s; samplesPerSecond = 3788.1
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15009323 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0653s; samplesPerSecond = 3829.5
|
||||
05/03/2016 15:21:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17060954 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0660s; samplesPerSecond = 3787.3
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10410764 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0762s; samplesPerSecond = 3280.0
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20572259 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.2571s; samplesPerSecond = 972.5
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16519130 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0640s; samplesPerSecond = 3906.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14908187 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0593s; samplesPerSecond = 4213.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19227612 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0688s; samplesPerSecond = 3632.8
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13670934 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0532s; samplesPerSecond = 4700.3
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21113164 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0693s; samplesPerSecond = 3609.4
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13129944 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0882s; samplesPerSecond = 2833.6
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17304376 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2975.2
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16479250 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0685s; samplesPerSecond = 3648.5
|
||||
05/03/2016 15:21:52: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14591786 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0976s; samplesPerSecond = 2561.0
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12562012 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0969s; samplesPerSecond = 2580.7
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13442773 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0959s; samplesPerSecond = 2607.8
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17125328 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0754s; samplesPerSecond = 3314.6
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22482522 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.1037s; samplesPerSecond = 2410.8
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18291792 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0650s; samplesPerSecond = 3844.3
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20296558 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0823s; samplesPerSecond = 3038.9
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22849719 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0828s; samplesPerSecond = 3020.2
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12500068 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0864s; samplesPerSecond = 2894.1
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15719802 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0840s; samplesPerSecond = 2976.4
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11520810 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0687s; samplesPerSecond = 3636.7
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14159592 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0974s; samplesPerSecond = 2567.1
|
||||
05/03/2016 15:21:53: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18509569 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0721s; samplesPerSecond = 3465.4
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15008345 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0905s; samplesPerSecond = 2763.6
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12866435 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0902s; samplesPerSecond = 2770.5
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17640526 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0896s; samplesPerSecond = 2789.2
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14982110 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.2845s; samplesPerSecond = 878.8
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11472753 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0867s; samplesPerSecond = 2882.5
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16524783 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0755s; samplesPerSecond = 3312.4
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14961037 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0958s; samplesPerSecond = 2608.8
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15972387 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0972s; samplesPerSecond = 2572.7
|
||||
05/03/2016 15:21:54: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17867958 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0969s; samplesPerSecond = 2581.0
|
||||
05/03/2016 15:21:54: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16073358 * 10000; EvalErrorPrediction = 0.07780000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=3.65495s
|
||||
05/03/2016 15:21:54: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
|
||||
05/03/2016 15:21:54: CNTKCommandTrainEnd: Multigpu_Demo_Train
|
||||
|
||||
|
@ -623,7 +623,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -652,7 +652,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -676,7 +676,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0x1abbf28: {[B0 Value[50 x 1]] }
|
||||
0x1b47908: {[W1 Value[50 x 50]] }
|
||||
0x1b48278: {[W2 Value[2 x 50]] }
|
||||
|
@ -688,7 +688,7 @@ Memory Sharing Structure:
|
|||
0x1b50cd8: {[Prior Value[2]] }
|
||||
0x1b514f8: {[W0 Value[50 x 2]] }
|
||||
0x1b53938: {[B1 Value[50 x 1]] }
|
||||
0x1c0fd98: {[EvalClassificationError Value[1]] }
|
||||
0x1c0fd98: {[EvalErrorPrediction Value[1]] }
|
||||
0x1c0fef8: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x1c10438: {[LogOfPrior Value[2]] }
|
||||
0x1c11f48: {[MVNormalizedFeatures Value[2 x *1]] }
|
||||
|
@ -701,7 +701,7 @@ Memory Sharing Structure:
|
|||
0x1c12d78: {[W2*H1 Value[2 x 1 x *1]] }
|
||||
0x1c12f38: {[HLast Value[2 x 1 x *1]] }
|
||||
|
||||
05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005
|
||||
05/03/2016 15:21:55: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12790061 * 603; perplexity = 1.13644005
|
||||
|
||||
05/03/2016 15:21:55: Action "test" complete.
|
||||
|
||||
|
|
|
@ -68,7 +68,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -169,7 +169,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -302,7 +302,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -371,7 +371,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -400,7 +400,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -424,14 +424,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:21:55: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:21:55: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:21:55: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0x12a62e8: {[features Value[2 x *]] }
|
||||
0x20202b8: {[MeanOfFeatures Value[2]] }
|
||||
0x20207c8: {[InvStdOfFeatures Value[2]] }
|
||||
|
@ -444,7 +444,7 @@ Memory Sharing Structure:
|
|||
0x278ae18: {[Prior Value[2]] }
|
||||
0x278c158: {[LogOfPrior Value[2]] }
|
||||
0x27908f8: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
0x2790a18: {[EvalClassificationError Value[1]] }
|
||||
0x2790a18: {[EvalErrorPrediction Value[1]] }
|
||||
0x2790d18: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
0x2790e78: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x27966e8: {[B0 Value[50 x 1]] }
|
||||
|
@ -474,139 +474,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:21:56: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:56: Starting minibatch loop.
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
|
||||
05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0059s; samplesPerSecond = 42038.0
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50525.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0050s; samplesPerSecond = 50423.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0049s; samplesPerSecond = 50689.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0050s; samplesPerSecond = 50261.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0049s; samplesPerSecond = 50576.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0049s; samplesPerSecond = 50566.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0049s; samplesPerSecond = 50515.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0049s; samplesPerSecond = 50730.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0049s; samplesPerSecond = 50751.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0050s; samplesPerSecond = 50454.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0050s; samplesPerSecond = 50393.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0048s; samplesPerSecond = 51899.5
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0047s; samplesPerSecond = 53498.8
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53694.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 53879.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53890.9
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0047s; samplesPerSecond = 53728.8
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 53786.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54265.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0047s; samplesPerSecond = 53602.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0047s; samplesPerSecond = 53441.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0047s; samplesPerSecond = 53659.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0047s; samplesPerSecond = 53339.0
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 53832.9
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0047s; samplesPerSecond = 53350.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0047s; samplesPerSecond = 53430.2
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 53960.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0047s; samplesPerSecond = 53544.7
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0047s; samplesPerSecond = 53453.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 53972.4
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0047s; samplesPerSecond = 53636.6
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0047s; samplesPerSecond = 52854.1
|
||||
05/03/2016 15:21:56: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0047s; samplesPerSecond = 53521.7
|
||||
05/03/2016 15:21:56: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.194983s
|
||||
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
|
||||
|
||||
05/03/2016 15:21:56: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
|
||||
05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27919647 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0093s; samplesPerSecond = 26818.3
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24468611 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19639892 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30982.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16397861 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0080s; samplesPerSecond = 31222.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19745002 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19548896 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0081s; samplesPerSecond = 30871.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18230148 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0081s; samplesPerSecond = 30910.0
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17531255 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0080s; samplesPerSecond = 31059.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20166559 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0081s; samplesPerSecond = 30944.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19749058 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0081s; samplesPerSecond = 31055.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13463336 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0081s; samplesPerSecond = 30963.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19006259 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0080s; samplesPerSecond = 31063.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12234776 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0079s; samplesPerSecond = 31605.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16962922 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32649.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16091639 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32743.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18624030 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32748.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18465726 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18514518 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0077s; samplesPerSecond = 32620.0
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20127224 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13418547 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32701.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13995001 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15602538 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32907.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15448171 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32864.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14780067 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12361633 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0077s; samplesPerSecond = 32628.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14079766 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32632.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12624363 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18913222 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32894.7
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17952681 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32786.9
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18825452 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17517656 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14744161 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13888184 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32795.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.14156678 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13990591 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0077s; samplesPerSecond = 32607.3
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.15059729 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32855.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14720846 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32799.8
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13021243 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32912.1
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19704037 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0076s; samplesPerSecond = 33029.5
|
||||
05/03/2016 15:21:56: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15858146 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
|
||||
05/03/2016 15:21:56: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16938752 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.313881s
|
||||
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
|
||||
|
||||
05/03/2016 15:21:56: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:56: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
|
||||
05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalClassificationError = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18888809 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0078s; samplesPerSecond = 32129.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.14084978 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14561895 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32666.9
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13238169 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0076s; samplesPerSecond = 32752.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17465335 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32765.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17752616 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32821.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.15030556 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0077s; samplesPerSecond = 32645.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17118019 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0077s; samplesPerSecond = 32611.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10379908 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0077s; samplesPerSecond = 32637.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20636150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16606704 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0077s; samplesPerSecond = 32543.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.14937580 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0077s; samplesPerSecond = 32446.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19161901 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32731.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13684752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.21095939 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32688.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.13216461 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32769.7
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17341094 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0077s; samplesPerSecond = 32586.0
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16532641 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0076s; samplesPerSecond = 32868.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14614740 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0076s; samplesPerSecond = 32696.8
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12551177 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32705.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13419939 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32782.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.17050096 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32899.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22579789 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18219666 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0078s; samplesPerSecond = 32220.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.20347898 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32791.2
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22972656 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12621914 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0076s; samplesPerSecond = 32890.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15674728 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11517532 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0077s; samplesPerSecond = 32658.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14187870 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32860.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18496784 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0076s; samplesPerSecond = 32929.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15026403 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32942.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12862609 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32925.1
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17651362 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0076s; samplesPerSecond = 32778.3
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14975908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0076s; samplesPerSecond = 32981.5
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11465866 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0076s; samplesPerSecond = 32838.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16513610 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0076s; samplesPerSecond = 32808.4
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14972374 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0076s; samplesPerSecond = 32977.2
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.15995582 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0076s; samplesPerSecond = 32825.6
|
||||
05/03/2016 15:21:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17898927 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0076s; samplesPerSecond = 32756.8
|
||||
05/03/2016 15:21:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.16083773 * 10000; EvalErrorPrediction = 0.07760000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.307973s
|
||||
05/03/2016 15:21:56: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152142.598996/CNTKTextFormatReader/Examples/Other/Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
|
||||
05/03/2016 15:21:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
|
||||
|
||||
|
@ -624,7 +624,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -653,7 +653,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -677,7 +677,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0x1222268: {[InvStdOfFeatures Value[2]] }
|
||||
0x1223258: {[W2 Value[2 x 50]] }
|
||||
0x12a56c8: {[B0 Value[50 x 1]] }
|
||||
|
@ -697,12 +697,12 @@ Memory Sharing Structure:
|
|||
0x2adcc08: {[W0*features Value[50 x *1]] }
|
||||
0x2add0a8: {[W0 Value[50 x 2]] }
|
||||
0x2ae0518: {[W1 Value[50 x 50]] }
|
||||
0x68bf228: {[EvalClassificationError Value[1]] }
|
||||
0x68bf228: {[EvalErrorPrediction Value[1]] }
|
||||
0x68bf388: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x68bf988: {[LogOfPrior Value[2]] }
|
||||
0x68d0438: {[features Value[2 x *1]] }
|
||||
|
||||
05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240
|
||||
05/03/2016 15:21:57: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05804312 * 603; CrossEntropyWithSoftmax = 0.12736577 * 603; perplexity = 1.13583240
|
||||
|
||||
05/03/2016 15:21:57: Action "test" complete.
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -368,7 +368,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -397,7 +397,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -421,14 +421,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:29:48: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:29:48: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:29:48: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
000000CDDFBEECA0: {[features Value[2 x *]] }
|
||||
000000CDDFC7B170: {[W0*features+B0 Gradient[50 x 1 x *]] [W1*H1 Value[50 x 1 x *]] }
|
||||
000000CDDFC7B490: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
|
||||
|
@ -438,7 +438,7 @@ Memory Sharing Structure:
|
|||
000000CDDFC7B990: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
000000CDDFC7BC10: {[LogOfPrior Value[2]] }
|
||||
000000CDDFC7BCB0: {[MVNormalizedFeatures Value[2 x *]] }
|
||||
000000CDDFC7BD50: {[EvalClassificationError Value[1]] }
|
||||
000000CDDFC7BD50: {[EvalErrorPrediction Value[1]] }
|
||||
000000CDDFC7BDF0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
|
||||
000000CDDFC7BF30: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
000000CDDFC7C070: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
|
||||
|
@ -471,139 +471,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:29:48: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:48: Starting minibatch loop.
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8
|
||||
05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0377s; samplesPerSecond = 6637.8
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0300s; samplesPerSecond = 8341.4
|
||||
05/03/2016 15:29:48: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0285s; samplesPerSecond = 8758.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0290s; samplesPerSecond = 8610.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0285s; samplesPerSecond = 8776.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0289s; samplesPerSecond = 8652.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0288s; samplesPerSecond = 8670.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0277s; samplesPerSecond = 9033.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0271s; samplesPerSecond = 9209.5
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0247s; samplesPerSecond = 10134.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0270s; samplesPerSecond = 9243.5
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0268s; samplesPerSecond = 9333.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9435.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0269s; samplesPerSecond = 9309.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0253s; samplesPerSecond = 9880.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0267s; samplesPerSecond = 9357.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0270s; samplesPerSecond = 9264.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0257s; samplesPerSecond = 9716.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0257s; samplesPerSecond = 9742.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0295s; samplesPerSecond = 8477.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0261s; samplesPerSecond = 9562.8
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0252s; samplesPerSecond = 9924.6
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0262s; samplesPerSecond = 9530.7
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9720.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0273s; samplesPerSecond = 9161.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0270s; samplesPerSecond = 9259.9
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0264s; samplesPerSecond = 9486.2
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0261s; samplesPerSecond = 9595.1
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0244s; samplesPerSecond = 10248.8
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0220s; samplesPerSecond = 11388.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0222s; samplesPerSecond = 11277.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0245s; samplesPerSecond = 10192.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0243s; samplesPerSecond = 10305.0
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0236s; samplesPerSecond = 10606.3
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10578.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0232s; samplesPerSecond = 10762.4
|
||||
05/03/2016 15:29:49: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0235s; samplesPerSecond = 10623.8
|
||||
05/03/2016 15:29:49: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.06166s
|
||||
05/03/2016 15:29:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.1'
|
||||
|
||||
05/03/2016 15:29:49: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:49: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9
|
||||
05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalClassificationError = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s
|
||||
05/03/2016 15:29:49: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28780429 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0246s; samplesPerSecond = 10181.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.28222478 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0246s; samplesPerSecond = 10178.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23589864 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0255s; samplesPerSecond = 9796.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.21209458 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20285913 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10283.0
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21300948 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0252s; samplesPerSecond = 9928.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17835594 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0256s; samplesPerSecond = 9753.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18830077 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0257s; samplesPerSecond = 9740.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14198478 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0250s; samplesPerSecond = 10019.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15895022 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0237s; samplesPerSecond = 10566.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21062646 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10517.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16081948 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0223s; samplesPerSecond = 11186.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15635713 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10700.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13008516 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0239s; samplesPerSecond = 10453.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16625347 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10674.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15001793 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0245s; samplesPerSecond = 10223.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22343917 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0234s; samplesPerSecond = 10692.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18006735 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0245s; samplesPerSecond = 10194.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15361620 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0235s; samplesPerSecond = 10636.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17039588 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0246s; samplesPerSecond = 10177.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15516786 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0237s; samplesPerSecond = 10544.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15969617 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0225s; samplesPerSecond = 11102.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15939439 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10697.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15300194 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0233s; samplesPerSecond = 10729.2
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14902476 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0231s; samplesPerSecond = 10811.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15043256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0231s; samplesPerSecond = 10823.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15531360 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0229s; samplesPerSecond = 10936.1
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17990796 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0248s; samplesPerSecond = 10088.4
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22925668 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0229s; samplesPerSecond = 10913.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16843626 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0234s; samplesPerSecond = 10682.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18045325 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0236s; samplesPerSecond = 10585.6
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13337526 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0221s; samplesPerSecond = 11308.6
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14332977 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0245s; samplesPerSecond = 10219.9
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18749446 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0242s; samplesPerSecond = 10326.7
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15505967 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0236s; samplesPerSecond = 10587.8
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19616616 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0228s; samplesPerSecond = 10980.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17305907 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10610.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15197365 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10033.3
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12102416 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0238s; samplesPerSecond = 10483.5
|
||||
05/03/2016 15:29:50: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15278496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0235s; samplesPerSecond = 10646.9
|
||||
05/03/2016 15:29:50: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17643784 * 10000; EvalErrorPrediction = 0.07560000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.957696s
|
||||
05/03/2016 15:29:50: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn.2'
|
||||
|
||||
05/03/2016 15:29:50: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:50: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4
|
||||
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0
|
||||
05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalClassificationError = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s
|
||||
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10623312 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10637.4
|
||||
05/03/2016 15:29:50: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17519442 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0236s; samplesPerSecond = 10608.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14133983 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0240s; samplesPerSecond = 10404.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16278491 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0233s; samplesPerSecond = 10749.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11783558 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0232s; samplesPerSecond = 10780.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16342188 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0243s; samplesPerSecond = 10305.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16272195 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10476.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19401477 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10370.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20186661 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0229s; samplesPerSecond = 10903.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13672539 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10631.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20069212 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0234s; samplesPerSecond = 10681.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17729039 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9928.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15906107 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0251s; samplesPerSecond = 9941.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16281632 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0247s; samplesPerSecond = 10121.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19834981 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0248s; samplesPerSecond = 10067.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10217642 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0247s; samplesPerSecond = 10105.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17011383 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0258s; samplesPerSecond = 9692.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16599137 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9911.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12648996 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0254s; samplesPerSecond = 9848.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11920298 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0248s; samplesPerSecond = 10091.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12883164 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0272s; samplesPerSecond = 9205.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18222479 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0250s; samplesPerSecond = 9988.0
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13443351 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0246s; samplesPerSecond = 10149.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19720325 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0244s; samplesPerSecond = 10230.8
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15586137 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0254s; samplesPerSecond = 9860.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11854887 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0250s; samplesPerSecond = 9991.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13705285 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0249s; samplesPerSecond = 10050.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20009941 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0240s; samplesPerSecond = 10411.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19078680 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10741.6
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16505705 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10507.7
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12232722 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0239s; samplesPerSecond = 10472.1
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16342047 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0238s; samplesPerSecond = 10514.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15875107 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10688.3
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12248772 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0232s; samplesPerSecond = 10793.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13457009 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0238s; samplesPerSecond = 10521.4
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20976565 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0238s; samplesPerSecond = 10494.9
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519102 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10862.5
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14971420 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0247s; samplesPerSecond = 10106.3
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16456633 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0230s; samplesPerSecond = 10858.2
|
||||
05/03/2016 15:29:51: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16971407 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0239s; samplesPerSecond = 10473.0
|
||||
05/03/2016 15:29:51: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15787325 * 10000; EvalErrorPrediction = 0.07430000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.972052s
|
||||
05/03/2016 15:29:51: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_cpu/Models/multigpu.dnn'
|
||||
05/03/2016 15:29:51: CNTKCommandTrainEnd: Multigpu_Demo_Train
|
||||
|
||||
|
@ -621,7 +621,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -650,7 +650,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -674,7 +674,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
000000CDDFC7B490: {[W0 Value[50 x 2]] }
|
||||
000000CDDFC7B530: {[features Value[2 x *1]] }
|
||||
000000CDDFC7B710: {[W1 Value[50 x 50]] }
|
||||
|
@ -690,7 +690,7 @@ Memory Sharing Structure:
|
|||
000000CDDFC8C2B0: {[W1*H1+B1 Value[50 x 1 x *1]] }
|
||||
000000CDDFC8C490: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
000000CDDFC8C5D0: {[LogOfPrior Value[2]] }
|
||||
000000CDDFC8C670: {[EvalClassificationError Value[1]] }
|
||||
000000CDDFC8C670: {[EvalErrorPrediction Value[1]] }
|
||||
000000CDDFC8C990: {[MVNormalizedFeatures Value[2 x *1]] }
|
||||
000000CDDFC8CA30: {[H2 Value[50 x 1 x *1]] }
|
||||
000000CDDFC8CC10: {[W1*H1 Value[50 x 1 x *1]] }
|
||||
|
@ -699,7 +699,7 @@ Memory Sharing Structure:
|
|||
000000CDDFC8D610: {[HLast Value[2 x 1 x *1]] }
|
||||
000000CDDFC8D750: {[W0*features+B0 Value[50 x 1 x *1]] }
|
||||
|
||||
05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868
|
||||
05/03/2016 15:29:52: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05306799 * 603; CrossEntropyWithSoftmax = 0.11782631 * 603; perplexity = 1.12504868
|
||||
|
||||
05/03/2016 15:29:52: Action "test" complete.
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -167,7 +167,7 @@ Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -300,7 +300,7 @@ configparameters: Multigpu.cntk:Multigpu_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -369,7 +369,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -398,7 +398,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -422,14 +422,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:29:53: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:29:53: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:29:53: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
000000572B66ECA0: {[features Value[2 x *]] }
|
||||
00000057420A1700: {[W1 Value[50 x 50]] }
|
||||
00000057420A1980: {[MeanOfFeatures Value[2]] }
|
||||
|
@ -448,7 +448,7 @@ Memory Sharing Structure:
|
|||
00000057439283E0: {[LogOfPrior Value[2]] }
|
||||
00000057439285C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
|
||||
0000005743928660: {[B1 Gradient[50 x 1]] [H2 Gradient[50 x 1 x *]] [HLast Gradient[2 x 1 x *]] }
|
||||
00000057439287A0: {[EvalClassificationError Value[1]] }
|
||||
00000057439287A0: {[EvalErrorPrediction Value[1]] }
|
||||
0000005743928980: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0000005743928A20: {[B2 Gradient[2 x 1]] }
|
||||
0000005743928E80: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
|
@ -472,139 +472,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:29:54: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:54: Starting minibatch loop.
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3
|
||||
05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21832.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0095s; samplesPerSecond = 26326.9
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0100s; samplesPerSecond = 25067.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0096s; samplesPerSecond = 26079.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0115s; samplesPerSecond = 21692.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0124s; samplesPerSecond = 20127.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0091s; samplesPerSecond = 27439.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0112s; samplesPerSecond = 22246.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0115s; samplesPerSecond = 21739.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0105s; samplesPerSecond = 23814.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0115s; samplesPerSecond = 21763.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0120s; samplesPerSecond = 20835.1
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0130s; samplesPerSecond = 19305.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0108s; samplesPerSecond = 23184.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0117s; samplesPerSecond = 21385.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0120s; samplesPerSecond = 20917.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0112s; samplesPerSecond = 22301.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0113s; samplesPerSecond = 22079.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21514.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0107s; samplesPerSecond = 23406.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0116s; samplesPerSecond = 21546.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0115s; samplesPerSecond = 21756.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0113s; samplesPerSecond = 22065.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0114s; samplesPerSecond = 22018.7
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0111s; samplesPerSecond = 22604.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0111s; samplesPerSecond = 22504.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0113s; samplesPerSecond = 22118.0
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19278.2
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19632.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0095s; samplesPerSecond = 26329.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0112s; samplesPerSecond = 22361.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0130s; samplesPerSecond = 19168.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0094s; samplesPerSecond = 26729.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0112s; samplesPerSecond = 22365.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0128s; samplesPerSecond = 19583.3
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0092s; samplesPerSecond = 27144.4
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0114s; samplesPerSecond = 21864.6
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0116s; samplesPerSecond = 21475.8
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0094s; samplesPerSecond = 26567.5
|
||||
05/03/2016 15:29:54: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0132s; samplesPerSecond = 18988.3
|
||||
05/03/2016 15:29:54: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.453807s
|
||||
05/03/2016 15:29:54: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.1'
|
||||
|
||||
05/03/2016 15:29:54: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:54: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalClassificationError = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7
|
||||
05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalClassificationError = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.45075654 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0250s; samplesPerSecond = 10002.4
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40775497 * 250; EvalErrorPrediction = 0.14400000 * 250; time = 0.0219s; samplesPerSecond = 11420.2
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.34165228 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0230s; samplesPerSecond = 10859.6
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29708900 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0198s; samplesPerSecond = 12604.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26669365 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0211s; samplesPerSecond = 11860.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25328680 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0212s; samplesPerSecond = 11817.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.21017820 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0237s; samplesPerSecond = 10540.1
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21483054 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11699.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16626513 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0213s; samplesPerSecond = 11757.5
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17672434 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0239s; samplesPerSecond = 10454.6
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22140190 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0208s; samplesPerSecond = 12033.1
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17048554 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0237s; samplesPerSecond = 10553.4
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16438517 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10662.3
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13782141 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0218s; samplesPerSecond = 11449.0
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16909663 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0244s; samplesPerSecond = 10228.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15419129 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0229s; samplesPerSecond = 10924.7
|
||||
05/03/2016 15:29:54: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22229924 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18134995 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15616904 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0236s; samplesPerSecond = 10594.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17162733 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0262s; samplesPerSecond = 9530.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15676289 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0262s; samplesPerSecond = 9554.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16159542 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0262s; samplesPerSecond = 9558.8
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16102246 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0284s; samplesPerSecond = 8800.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15392923 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0248s; samplesPerSecond = 10089.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14898334 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0269s; samplesPerSecond = 9279.5
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15087969 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0285s; samplesPerSecond = 8785.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15494578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0247s; samplesPerSecond = 10101.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17878713 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0250s; samplesPerSecond = 9986.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22845049 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0249s; samplesPerSecond = 10045.4
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16884430 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0241s; samplesPerSecond = 10376.5
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17970282 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0237s; samplesPerSecond = 10533.9
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13292468 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0257s; samplesPerSecond = 9721.6
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14167778 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0226s; samplesPerSecond = 11048.3
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18716852 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0237s; samplesPerSecond = 10534.7
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15480385 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0258s; samplesPerSecond = 9705.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.19482328 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10115.7
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17488171 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0249s; samplesPerSecond = 10048.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15164433 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0281s; samplesPerSecond = 8901.2
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12142463 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0222s; samplesPerSecond = 11279.0
|
||||
05/03/2016 15:29:55: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15287631 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0238s; samplesPerSecond = 10489.7
|
||||
05/03/2016 15:29:55: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19475469 * 10000; EvalErrorPrediction = 0.07830000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.964496s
|
||||
05/03/2016 15:29:55: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn.2'
|
||||
|
||||
05/03/2016 15:29:55: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:29:55: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1).
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4
|
||||
05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalClassificationError = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10717578 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0253s; samplesPerSecond = 9869.7
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17521929 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0234s; samplesPerSecond = 10701.1
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14088211 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0250s; samplesPerSecond = 9986.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16281337 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10287.6
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11778386 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0234s; samplesPerSecond = 10666.9
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16295400 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0266s; samplesPerSecond = 9385.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16287201 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0233s; samplesPerSecond = 10746.2
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19482140 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0242s; samplesPerSecond = 10312.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20113689 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0235s; samplesPerSecond = 10643.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13748570 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10484.4
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20080420 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0236s; samplesPerSecond = 10600.9
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17730590 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0268s; samplesPerSecond = 9342.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15851029 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0233s; samplesPerSecond = 10743.0
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16257260 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0250s; samplesPerSecond = 10012.8
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19772537 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0224s; samplesPerSecond = 11143.3
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10259204 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0235s; samplesPerSecond = 10626.1
|
||||
05/03/2016 15:29:55: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17093073 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0244s; samplesPerSecond = 10230.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16628544 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0252s; samplesPerSecond = 9936.8
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12690716 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0246s; samplesPerSecond = 10171.7
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11894288 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0233s; samplesPerSecond = 10718.1
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12815907 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0246s; samplesPerSecond = 10151.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18265773 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0225s; samplesPerSecond = 11131.9
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13388730 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0231s; samplesPerSecond = 10807.5
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19787903 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0251s; samplesPerSecond = 9951.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15563315 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0241s; samplesPerSecond = 10373.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11837055 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0240s; samplesPerSecond = 10429.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13732942 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0234s; samplesPerSecond = 10689.7
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20012115 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0253s; samplesPerSecond = 9872.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.19086846 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0238s; samplesPerSecond = 10525.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16492589 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0243s; samplesPerSecond = 10272.8
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.12141157 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0238s; samplesPerSecond = 10509.5
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16335481 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0236s; samplesPerSecond = 10579.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.15923900 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0241s; samplesPerSecond = 10358.0
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12315803 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0235s; samplesPerSecond = 10617.1
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13481532 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0260s; samplesPerSecond = 9612.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20958008 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0223s; samplesPerSecond = 11232.4
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16519713 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0255s; samplesPerSecond = 9814.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14990733 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0239s; samplesPerSecond = 10481.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16508552 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0255s; samplesPerSecond = 9789.3
|
||||
05/03/2016 15:29:56: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16941540 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0240s; samplesPerSecond = 10435.4
|
||||
05/03/2016 15:29:56: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15791792 * 10000; EvalErrorPrediction = 0.07460000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.970059s
|
||||
05/03/2016 15:29:56: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503162947.903093\CNTKTextFormatReader\Examples\Other\Simple2d_MultiGpu@release_gpu/Models/multigpu.dnn'
|
||||
05/03/2016 15:29:56: CNTKCommandTrainEnd: Multigpu_Demo_Train
|
||||
|
||||
|
@ -622,7 +622,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -651,7 +651,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -675,7 +675,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0000005743925BB0: {[HLast Value[2 x 1 x *1]] }
|
||||
0000005743925D90: {[MVNormalizedFeatures Value[2 x *1]] }
|
||||
0000005743925E30: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
|
@ -688,7 +688,7 @@ Memory Sharing Structure:
|
|||
00000057439265B0: {[W0*features+B0 Value[50 x 1 x *1]] }
|
||||
0000005743926650: {[W1*H1 Value[50 x 1 x *1]] }
|
||||
0000005743926970: {[H2 Value[50 x 1 x *1]] }
|
||||
0000005743926AB0: {[EvalClassificationError Value[1]] }
|
||||
0000005743926AB0: {[EvalErrorPrediction Value[1]] }
|
||||
000000574B7FAD10: {[W0 Value[50 x 2]] }
|
||||
000000574B7FB170: {[InvStdOfFeatures Value[2]] }
|
||||
000000574B7FB210: {[MeanOfFeatures Value[2]] }
|
||||
|
@ -700,7 +700,7 @@ Memory Sharing Structure:
|
|||
000000574D960E50: {[B2 Value[2 x 1]] }
|
||||
000000574D9610D0: {[B0 Value[50 x 1]] }
|
||||
|
||||
05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529
|
||||
05/03/2016 15:29:56: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12022919 * 603; perplexity = 1.12775529
|
||||
|
||||
05/03/2016 15:29:56: Action "test" complete.
|
||||
|
||||
|
|
|
@ -58,7 +58,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -157,7 +157,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -355,7 +355,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -384,7 +384,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -408,14 +408,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:21:15: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:21:15: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:21:15: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0x2e7f338: {[features Value[2 x *]] }
|
||||
0x2e82908: {[MeanOfFeatures Value[2]] }
|
||||
0x2e84f08: {[InvStdOfFeatures Value[2]] }
|
||||
|
@ -427,7 +427,7 @@ Memory Sharing Structure:
|
|||
0x2e8b718: {[B2 Value[2 x 1]] }
|
||||
0x2e8c1e8: {[labels Value[2 x *]] }
|
||||
0x2e8cf38: {[Prior Value[2]] }
|
||||
0x2e926f8: {[EvalClassificationError Value[1]] }
|
||||
0x2e926f8: {[EvalErrorPrediction Value[1]] }
|
||||
0x2e92858: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
0x2e929b8: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x2e93218: {[LogOfPrior Value[2]] }
|
||||
|
@ -458,139 +458,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:21:17: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:17: Starting minibatch loop.
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalClassificationError = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalClassificationError = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalClassificationError = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalClassificationError = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalClassificationError = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4
|
||||
05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalClassificationError = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.69966235 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0806s; samplesPerSecond = 3103.4
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70639648 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0489s; samplesPerSecond = 5107.5
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70470264 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0598s; samplesPerSecond = 4180.0
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69813501 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0581s; samplesPerSecond = 4306.3
|
||||
05/03/2016 15:21:17: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73551416 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0618s; samplesPerSecond = 4045.4
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72432324 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0579s; samplesPerSecond = 4314.7
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73327588 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.2699s; samplesPerSecond = 926.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70092627 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0620s; samplesPerSecond = 4035.0
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72354980 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0826s; samplesPerSecond = 3027.2
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72148096 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0811s; samplesPerSecond = 3082.2
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69814941 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0895s; samplesPerSecond = 2793.1
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70699121 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0482s; samplesPerSecond = 5187.9
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69898437 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0567s; samplesPerSecond = 4408.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71712695 * 250; EvalErrorPrediction = 0.54000000 * 250; time = 0.0586s; samplesPerSecond = 4266.7
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69470703 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0546s; samplesPerSecond = 4575.3
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71375879 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0640s; samplesPerSecond = 3907.4
|
||||
05/03/2016 15:21:18: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70381641 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0756s; samplesPerSecond = 3307.9
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71748633 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0598s; samplesPerSecond = 4178.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71863281 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0813s; samplesPerSecond = 3075.3
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70715234 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0811s; samplesPerSecond = 3082.9
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70401074 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0673s; samplesPerSecond = 3717.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70599414 * 250; EvalErrorPrediction = 0.48400000 * 250; time = 0.0819s; samplesPerSecond = 3052.5
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69628711 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0909s; samplesPerSecond = 2749.3
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75920898 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0752s; samplesPerSecond = 3323.1
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70542578 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0734s; samplesPerSecond = 3406.2
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70643945 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0869s; samplesPerSecond = 2875.4
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72481641 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0893s; samplesPerSecond = 2798.7
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71133594 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0814s; samplesPerSecond = 3072.2
|
||||
05/03/2016 15:21:19: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68605664 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0812s; samplesPerSecond = 3077.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69535352 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0895s; samplesPerSecond = 2792.1
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.68741797 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0831s; samplesPerSecond = 3008.7
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.67916406 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0818s; samplesPerSecond = 3056.5
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.67841992 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.2681s; samplesPerSecond = 932.5
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68038477 * 250; EvalErrorPrediction = 0.49200000 * 250; time = 0.0513s; samplesPerSecond = 4869.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.61937109 * 250; EvalErrorPrediction = 0.30400000 * 250; time = 0.0680s; samplesPerSecond = 3678.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.57844141 * 250; EvalErrorPrediction = 0.27200000 * 250; time = 0.0758s; samplesPerSecond = 3296.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.49124023 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0664s; samplesPerSecond = 3763.4
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.39071289 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0505s; samplesPerSecond = 4955.3
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.27650586 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0515s; samplesPerSecond = 4855.7
|
||||
05/03/2016 15:21:20: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.26430078 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0517s; samplesPerSecond = 4834.4
|
||||
05/03/2016 15:21:20: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.66664150 * 10000; EvalErrorPrediction = 0.44430000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=3.21314s
|
||||
05/03/2016 15:21:20: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.1'
|
||||
|
||||
05/03/2016 15:21:20: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:20: Starting minibatch loop.
|
||||
05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0
|
||||
05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalClassificationError = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s
|
||||
05/03/2016 15:21:20: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.20732678 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0782s; samplesPerSecond = 3196.0
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.19684015 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0812s; samplesPerSecond = 3079.4
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.16083588 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0796s; samplesPerSecond = 3141.3
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.13558752 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0811s; samplesPerSecond = 3083.5
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17992950 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0814s; samplesPerSecond = 3070.9
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17858063 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0812s; samplesPerSecond = 3079.3
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16847546 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0688s; samplesPerSecond = 3631.6
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16359399 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0547s; samplesPerSecond = 4572.7
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19534705 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0521s; samplesPerSecond = 4796.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19363660 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0758s; samplesPerSecond = 3297.5
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.12703638 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0682s; samplesPerSecond = 3667.7
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.18622827 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0576s; samplesPerSecond = 4344.0
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.11595044 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0599s; samplesPerSecond = 4171.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16689380 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0650s; samplesPerSecond = 3845.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.15822559 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0631s; samplesPerSecond = 3964.2
|
||||
05/03/2016 15:21:21: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18381909 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0638s; samplesPerSecond = 3920.5
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18274048 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3893.2
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18638428 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0564s; samplesPerSecond = 4431.5
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20111572 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0528s; samplesPerSecond = 4733.8
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13185034 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0504s; samplesPerSecond = 4962.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13692554 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0559s; samplesPerSecond = 4468.8
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15396802 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0672s; samplesPerSecond = 3719.4
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15347241 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0818s; samplesPerSecond = 3057.6
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14583887 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2662s; samplesPerSecond = 939.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12333276 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0738s; samplesPerSecond = 3389.0
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.13958154 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0778s; samplesPerSecond = 3211.3
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12539844 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0772s; samplesPerSecond = 3239.1
|
||||
05/03/2016 15:21:22: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.19014404 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0475s; samplesPerSecond = 5259.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17959521 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0780s; samplesPerSecond = 3206.4
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18899121 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0469s; samplesPerSecond = 5333.6
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17525586 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0625s; samplesPerSecond = 4003.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14735645 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0940s; samplesPerSecond = 2658.9
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13705518 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0543s; samplesPerSecond = 4600.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13610693 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0752s; samplesPerSecond = 3324.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13555811 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0583s; samplesPerSecond = 4291.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14883594 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0598s; samplesPerSecond = 4180.7
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14724707 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0599s; samplesPerSecond = 4172.4
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13130469 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0664s; samplesPerSecond = 3764.2
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19636084 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0644s; samplesPerSecond = 3884.1
|
||||
05/03/2016 15:21:23: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15681836 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0651s; samplesPerSecond = 3841.0
|
||||
05/03/2016 15:21:23: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16173864 * 10000; EvalErrorPrediction = 0.07520000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=2.87283s
|
||||
05/03/2016 15:21:23: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn.2'
|
||||
|
||||
05/03/2016 15:21:23: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:23: Starting minibatch loop.
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalClassificationError = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4
|
||||
05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalClassificationError = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18214960 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0604s; samplesPerSecond = 4138.7
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13526825 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0622s; samplesPerSecond = 4020.6
|
||||
05/03/2016 15:21:23: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14344995 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0640s; samplesPerSecond = 3906.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12557471 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0628s; samplesPerSecond = 3978.7
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17627924 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0639s; samplesPerSecond = 3914.6
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17585291 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0644s; samplesPerSecond = 3884.2
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14716791 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0628s; samplesPerSecond = 3979.1
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16757751 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0643s; samplesPerSecond = 3885.5
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10314917 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0642s; samplesPerSecond = 3895.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20322217 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0650s; samplesPerSecond = 3848.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16604797 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0642s; samplesPerSecond = 3892.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15105725 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0651s; samplesPerSecond = 3839.4
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19206934 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0640s; samplesPerSecond = 3903.9
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13667065 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.2688s; samplesPerSecond = 930.0
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20713037 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0472s; samplesPerSecond = 5299.3
|
||||
05/03/2016 15:21:24: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12862158 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0625s; samplesPerSecond = 3998.5
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17174683 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0465s; samplesPerSecond = 5381.7
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16493628 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0526s; samplesPerSecond = 4753.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14843726 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0505s; samplesPerSecond = 4952.5
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12574292 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0505s; samplesPerSecond = 4951.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13455151 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0614s; samplesPerSecond = 4072.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16762988 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0495s; samplesPerSecond = 5055.0
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22347461 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0523s; samplesPerSecond = 4780.1
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18213623 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0542s; samplesPerSecond = 4611.6
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19970923 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0539s; samplesPerSecond = 4638.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22695947 * 250; EvalErrorPrediction = 0.12800000 * 250; time = 0.0542s; samplesPerSecond = 4609.7
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12664502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0541s; samplesPerSecond = 4625.3
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15838037 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0538s; samplesPerSecond = 4648.8
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11555566 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0581s; samplesPerSecond = 4305.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14157520 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0544s; samplesPerSecond = 4595.2
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18558350 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0541s; samplesPerSecond = 4622.4
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15083594 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0540s; samplesPerSecond = 4632.9
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12831787 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0541s; samplesPerSecond = 4624.1
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17656494 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0545s; samplesPerSecond = 4587.6
|
||||
05/03/2016 15:21:25: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14956396 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0625s; samplesPerSecond = 4000.3
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11451660 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0496s; samplesPerSecond = 5040.3
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16392383 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0496s; samplesPerSecond = 5036.0
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811230 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0505s; samplesPerSecond = 4955.0
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16003760 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0588s; samplesPerSecond = 4255.2
|
||||
05/03/2016 15:21:26: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17969775 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0482s; samplesPerSecond = 5185.4
|
||||
05/03/2016 15:21:26: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15964808 * 10000; EvalErrorPrediction = 0.07750000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=2.49695s
|
||||
05/03/2016 15:21:26: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_cpu/Models/simple.dnn'
|
||||
05/03/2016 15:21:26: CNTKCommandTrainEnd: Simple_Demo_Train
|
||||
|
||||
|
@ -608,7 +608,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -637,7 +637,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -661,7 +661,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0x2e83eb8: {[W2 Value[2 x 50]] }
|
||||
0x2e87ac8: {[MVNormalizedFeatures Value[2 x *1]] }
|
||||
0x2e87e78: {[W0*features Value[50 x *1]] }
|
||||
|
@ -676,7 +676,7 @@ Memory Sharing Structure:
|
|||
0x2e8d298: {[B2 Value[2 x 1]] }
|
||||
0x2e8f2c8: {[labels Value[2 x *1]] }
|
||||
0x2e8f8e8: {[MeanOfFeatures Value[2]] }
|
||||
0x2e91598: {[EvalClassificationError Value[1]] }
|
||||
0x2e91598: {[EvalErrorPrediction Value[1]] }
|
||||
0x2e916f8: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x2e91bb8: {[LogOfPrior Value[2]] }
|
||||
0x2e93758: {[B0 Value[50 x 1]] }
|
||||
|
@ -686,7 +686,7 @@ Memory Sharing Structure:
|
|||
0x2e985f8: {[W1 Value[50 x 50]] }
|
||||
0x2e99178: {[features Value[2 x *1]] }
|
||||
|
||||
05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032
|
||||
05/03/2016 15:21:26: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13085309 * 603; perplexity = 1.13980032
|
||||
|
||||
05/03/2016 15:21:26: Action "test" complete.
|
||||
|
||||
|
@ -702,7 +702,7 @@ Post-processing network...
|
|||
|
||||
8 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -732,7 +732,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
|
||||
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -755,7 +755,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
0x2e82858: {[PosteriorProb Value[2 x 1 x *2]] }
|
||||
0x2e83b58: {[labels Value[2 x *2]] }
|
||||
0x2e84318: {[MeanOfFeatures Value[2]] }
|
||||
|
|
|
@ -58,7 +58,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -157,7 +157,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -300,7 +300,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -356,7 +356,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -385,7 +385,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -409,14 +409,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 15:21:27: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 15:21:27: EvalClassificationError = ClassificationError
|
||||
05/03/2016 15:21:27: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
(nil): {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0x1ef9338: {[features Value[2 x *]] }
|
||||
0x2b32ad8: {[MeanOfFeatures Value[2]] }
|
||||
0x2b32fe8: {[InvStdOfFeatures Value[2]] }
|
||||
|
@ -429,7 +429,7 @@ Memory Sharing Structure:
|
|||
0x3185898: {[Prior Value[2]] }
|
||||
0x3186bd8: {[LogOfPrior Value[2]] }
|
||||
0x318b378: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
0x318b498: {[EvalClassificationError Value[1]] }
|
||||
0x318b498: {[EvalErrorPrediction Value[1]] }
|
||||
0x318b798: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
0x318b8f8: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x3191148: {[B0 Value[50 x 1]] }
|
||||
|
@ -459,139 +459,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 15:21:28: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:28: Starting minibatch loop.
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalClassificationError = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalClassificationError = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalClassificationError = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalClassificationError = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalClassificationError = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalClassificationError = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalClassificationError = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalClassificationError = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70004456 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0055s; samplesPerSecond = 45495.9
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.70309900 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.70606104 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54241.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.69845532 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54549.4
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.73496533 * 250; EvalErrorPrediction = 0.57600000 * 250; time = 0.0046s; samplesPerSecond = 54136.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72522827 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0046s; samplesPerSecond = 54359.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.73287500 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.70135547 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.72466504 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54194.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.72187500 * 250; EvalErrorPrediction = 0.52000000 * 250; time = 0.0046s; samplesPerSecond = 54501.9
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.69799023 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.70696387 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.69863965 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54300.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.71772461 * 250; EvalErrorPrediction = 0.54800000 * 250; time = 0.0046s; samplesPerSecond = 54644.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.69526270 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.71436426 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70399316 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.71745508 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0046s; samplesPerSecond = 54716.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.71963184 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.70689941 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54336.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.70425098 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70622754 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69729492 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.75974219 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.70631250 * 250; EvalErrorPrediction = 0.43600000 * 250; time = 0.0046s; samplesPerSecond = 54288.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.70705664 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.72660352 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.71369727 * 250; EvalErrorPrediction = 0.55600000 * 250; time = 0.0046s; samplesPerSecond = 54537.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.68916602 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0046s; samplesPerSecond = 54371.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69964844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0046s; samplesPerSecond = 54218.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.69387891 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.68885742 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0046s; samplesPerSecond = 54573.2
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69388867 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0046s; samplesPerSecond = 54454.4
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.70363867 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65449219 * 250; EvalErrorPrediction = 0.44400000 * 250; time = 0.0046s; samplesPerSecond = 54561.3
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64607031 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0046s; samplesPerSecond = 54347.8
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.59492969 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.53965820 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54609.0
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.43681445 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0046s; samplesPerSecond = 54525.6
|
||||
05/03/2016 15:21:28: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37407422 * 250; EvalErrorPrediction = 0.12000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68409629 * 10000; EvalErrorPrediction = 0.45780000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.1879s
|
||||
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.1'
|
||||
|
||||
05/03/2016 15:21:28: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:28: Starting minibatch loop.
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalClassificationError = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5
|
||||
05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalClassificationError = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.27895840 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 53902.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.24395615 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.19587115 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16368213 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 55126.8
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.19700140 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54933.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.19580530 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54585.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.18257983 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.17520911 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20164514 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0046s; samplesPerSecond = 54752.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.19787024 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0046s; samplesPerSecond = 54466.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.13437573 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0045s; samplesPerSecond = 55090.3
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.19004956 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.12287280 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16975903 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55175.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16102686 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54513.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.18611646 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.18469507 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55334.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18472339 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.20064648 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54597.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.13324683 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13878418 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.15587354 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15337378 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54812.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.14797070 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55199.8
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.12512891 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0046s; samplesPerSecond = 54383.3
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14058545 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12611963 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.18970605 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.17965479 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.18866455 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17539941 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.14742432 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54848.6
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13789502 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0046s; samplesPerSecond = 54788.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.13652100 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13619336 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54920.9
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.14909424 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54478.1
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.14762256 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55139.0
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.13142578 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54860.7
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.19570459 * 250; EvalErrorPrediction = 0.11600000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15718604 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55005.5
|
||||
05/03/2016 15:21:28: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.16901047 * 10000; EvalErrorPrediction = 0.07510000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.184798s
|
||||
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn.2'
|
||||
|
||||
05/03/2016 15:21:28: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 15:21:28: Starting minibatch loop.
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
|
||||
05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalClassificationError = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.18133401 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54124.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.13605756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14345651 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54668.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.12512610 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0045s; samplesPerSecond = 54969.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.17690991 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.17504150 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.14723834 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 55224.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.16752893 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.10317773 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0046s; samplesPerSecond = 54800.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.20306372 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.16637036 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0045s; samplesPerSecond = 55066.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.15126868 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54824.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.19167224 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54884.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13687085 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 55420.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.20709912 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0046s; samplesPerSecond = 54740.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.12918774 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17185107 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55322.0
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16523242 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.14880249 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0046s; samplesPerSecond = 54728.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.12590967 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0045s; samplesPerSecond = 54957.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.13443018 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54872.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16726147 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54836.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.22407422 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55041.8
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.18191553 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.19983057 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0046s; samplesPerSecond = 54680.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.22728223 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0046s; samplesPerSecond = 54692.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.12720459 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.15842871 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.11558691 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0045s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.14163428 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0045s; samplesPerSecond = 55248.6
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.18560596 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 54993.4
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.15099561 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0045s; samplesPerSecond = 55078.2
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.12822461 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0046s; samplesPerSecond = 54395.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.17662500 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0045s; samplesPerSecond = 55309.7
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.14950781 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0046s; samplesPerSecond = 54945.1
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.11450977 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0046s; samplesPerSecond = 54908.9
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16386768 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0045s; samplesPerSecond = 55260.8
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14811523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0045s; samplesPerSecond = 54981.3
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16021143 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0046s; samplesPerSecond = 54764.5
|
||||
05/03/2016 15:21:28: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.17989551 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0045s; samplesPerSecond = 55151.1
|
||||
05/03/2016 15:21:28: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15971016 * 10000; EvalErrorPrediction = 0.07740000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.184406s
|
||||
05/03/2016 15:21:28: SGD: Saving checkpoint model '/tmp/cntk-test-20160503152115.267374/CNTKTextFormatReader/Examples/Other/Simple2d_Simple@release_gpu/Models/simple.dnn'
|
||||
05/03/2016 15:21:29: CNTKCommandTrainEnd: Simple_Demo_Train
|
||||
|
||||
|
@ -609,7 +609,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -638,7 +638,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -662,11 +662,11 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0x1efcc08: {[B2 Value[2 x 1]] }
|
||||
0x1efd8c8: {[W0 Value[50 x 2]] }
|
||||
0x1efee68: {[InvStdOfFeatures Value[2]] }
|
||||
0x2b337e8: {[EvalClassificationError Value[1]] }
|
||||
0x2b337e8: {[EvalErrorPrediction Value[1]] }
|
||||
0x2b33948: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0x2b33f08: {[LogOfPrior Value[2]] }
|
||||
0x31808e8: {[W2 Value[2 x 50]] }
|
||||
|
@ -687,7 +687,7 @@ Memory Sharing Structure:
|
|||
0x7273058: {[W2*H1 Value[2 x 1 x *1]] }
|
||||
0x7273218: {[HLast Value[2 x 1 x *1]] }
|
||||
|
||||
05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946
|
||||
05/03/2016 15:21:29: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05970149 * 603; CrossEntropyWithSoftmax = 0.13093129 * 603; perplexity = 1.13988946
|
||||
|
||||
05/03/2016 15:21:29: Action "test" complete.
|
||||
|
||||
|
@ -703,7 +703,7 @@ Post-processing network...
|
|||
|
||||
8 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -733,7 +733,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
|
||||
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -756,7 +756,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
(nil): {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
0x1efcef8: {[features Value[2 x *2]] }
|
||||
0x1efe2c8: {[labels Value[2 x *2]] }
|
||||
0x1eff188: {[PosteriorProb Value[2 x 1 x *2]] }
|
||||
|
|
|
@ -56,7 +56,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -155,7 +155,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -353,7 +353,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -382,7 +382,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -406,14 +406,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 13:12:46: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 13:12:46: EvalClassificationError = ClassificationError
|
||||
05/03/2016 13:12:46: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
000000702B410E90: {[features Value[2 x *]] }
|
||||
000000702B44E0C0: {[W0 Value[50 x 2]] }
|
||||
000000702B4D76F0: {[H2 Value[50 x 1 x *]] [W1*H1 Gradient[50 x 1 x *]] }
|
||||
|
@ -428,7 +428,7 @@ Memory Sharing Structure:
|
|||
000000702B4D8690: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
|
||||
000000702B4D8730: {[HLast Value[2 x 1 x *]] [W2 Gradient[2 x 50]] }
|
||||
000000702B4D89B0: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
000000702B4D8AF0: {[EvalClassificationError Value[1]] }
|
||||
000000702B4D8AF0: {[EvalErrorPrediction Value[1]] }
|
||||
000000702B4D8B90: {[H1 Value[50 x 1 x *]] [W0*features Gradient[50 x *]] }
|
||||
000000702B4D8F50: {[B2 Gradient[2 x 1]] }
|
||||
000000702B4D91D0: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
|
@ -456,139 +456,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 13:12:47: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:12:47: Starting minibatch loop.
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalClassificationError = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalClassificationError = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalClassificationError = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalClassificationError = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalClassificationError = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalClassificationError = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0
|
||||
05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalClassificationError = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70511987 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0327s; samplesPerSecond = 7657.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69754895 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9726.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71056921 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0248s; samplesPerSecond = 10096.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72951074 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0245s; samplesPerSecond = 10210.3
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70946655 * 250; EvalErrorPrediction = 0.48800000 * 250; time = 0.0249s; samplesPerSecond = 10032.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72656787 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0248s; samplesPerSecond = 10065.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69337402 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0256s; samplesPerSecond = 9766.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73605176 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0259s; samplesPerSecond = 9662.6
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71453076 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0239s; samplesPerSecond = 10469.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75191992 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0255s; samplesPerSecond = 9802.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75975146 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0248s; samplesPerSecond = 10100.6
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73172168 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0255s; samplesPerSecond = 9808.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76840820 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0261s; samplesPerSecond = 9593.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70464746 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0255s; samplesPerSecond = 9807.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70557227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0242s; samplesPerSecond = 10340.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72711816 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0249s; samplesPerSecond = 10049.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70076660 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0247s; samplesPerSecond = 10117.4
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69409766 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0254s; samplesPerSecond = 9834.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69139941 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0243s; samplesPerSecond = 10275.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73361621 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0255s; samplesPerSecond = 9802.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72225879 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0246s; samplesPerSecond = 10146.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70356348 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0243s; samplesPerSecond = 10286.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69928613 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0252s; samplesPerSecond = 9909.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72360938 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0244s; samplesPerSecond = 10227.0
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69871875 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0244s; samplesPerSecond = 10243.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69114844 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0248s; samplesPerSecond = 10081.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68648047 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0254s; samplesPerSecond = 9844.5
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69657227 * 250; EvalErrorPrediction = 0.46400000 * 250; time = 0.0258s; samplesPerSecond = 9679.8
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71585547 * 250; EvalErrorPrediction = 0.45200000 * 250; time = 0.0255s; samplesPerSecond = 9798.2
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.69730664 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0260s; samplesPerSecond = 9609.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70432422 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0265s; samplesPerSecond = 9431.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.69991797 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0257s; samplesPerSecond = 9722.7
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.68696875 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0259s; samplesPerSecond = 9647.3
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.67331445 * 250; EvalErrorPrediction = 0.37200000 * 250; time = 0.0267s; samplesPerSecond = 9364.7
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.65711328 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0258s; samplesPerSecond = 9700.1
|
||||
05/03/2016 13:12:47: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.64534375 * 250; EvalErrorPrediction = 0.44800000 * 250; time = 0.0260s; samplesPerSecond = 9608.0
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.61021875 * 250; EvalErrorPrediction = 0.36400000 * 250; time = 0.0263s; samplesPerSecond = 9515.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.54191016 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0229s; samplesPerSecond = 10907.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.45624414 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0239s; samplesPerSecond = 10479.5
|
||||
05/03/2016 13:12:48: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.37636133 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0229s; samplesPerSecond = 10917.0
|
||||
05/03/2016 13:12:48: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.68695688 * 10000; EvalErrorPrediction = 0.45550000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=1.01718s
|
||||
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.1'
|
||||
|
||||
05/03/2016 13:12:48: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:12:48: Starting minibatch loop.
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7
|
||||
05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalClassificationError = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.28579105 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0228s; samplesPerSecond = 10943.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.27768619 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0230s; samplesPerSecond = 10860.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.23309790 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0223s; samplesPerSecond = 11187.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.20937585 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0221s; samplesPerSecond = 11327.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.20192059 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0225s; samplesPerSecond = 11116.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.21303992 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0232s; samplesPerSecond = 10762.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.17823340 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0247s; samplesPerSecond = 10120.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.18892688 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0231s; samplesPerSecond = 10816.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.14161328 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0225s; samplesPerSecond = 11100.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.15813574 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0226s; samplesPerSecond = 11077.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.21082446 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0233s; samplesPerSecond = 10728.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.16117041 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0229s; samplesPerSecond = 10928.0
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15665234 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0223s; samplesPerSecond = 11195.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13067773 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0226s; samplesPerSecond = 11047.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16602710 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0212s; samplesPerSecond = 11796.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.14975708 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0215s; samplesPerSecond = 11641.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22351709 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0214s; samplesPerSecond = 11708.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18010474 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0207s; samplesPerSecond = 12085.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15341577 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12072.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17195337 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0209s; samplesPerSecond = 11976.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15546069 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0217s; samplesPerSecond = 11534.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16008325 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0214s; samplesPerSecond = 11689.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.15944043 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0209s; samplesPerSecond = 11981.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15336865 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0207s; samplesPerSecond = 12102.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14822266 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0212s; samplesPerSecond = 11766.4
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.14999512 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0211s; samplesPerSecond = 11833.2
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15481982 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0208s; samplesPerSecond = 11992.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17656738 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0204s; samplesPerSecond = 12229.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22373242 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0213s; samplesPerSecond = 11738.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16403760 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0211s; samplesPerSecond = 11856.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17322168 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0211s; samplesPerSecond = 11868.0
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13165430 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0205s; samplesPerSecond = 12202.3
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.14016992 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 11993.9
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18369678 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0214s; samplesPerSecond = 11657.7
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15161035 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0215s; samplesPerSecond = 11612.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18919824 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0215s; samplesPerSecond = 11632.8
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17373975 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11818.1
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033740 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0208s; samplesPerSecond = 12036.6
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12107568 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0207s; samplesPerSecond = 12075.5
|
||||
05/03/2016 13:12:48: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15386328 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0227s; samplesPerSecond = 10997.7
|
||||
05/03/2016 13:12:48: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.17515541 * 10000; EvalErrorPrediction = 0.07440000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.87149s
|
||||
05/03/2016 13:12:48: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn.2'
|
||||
|
||||
05/03/2016 13:12:48: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:12:48: Starting minibatch loop.
|
||||
05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1
|
||||
05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalClassificationError = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s
|
||||
05/03/2016 13:12:48: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10671188 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0217s; samplesPerSecond = 11511.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17609265 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0205s; samplesPerSecond = 12183.8
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14152701 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0208s; samplesPerSecond = 12001.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348053 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0213s; samplesPerSecond = 11748.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11764551 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0219s; samplesPerSecond = 11435.4
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16246954 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0212s; samplesPerSecond = 11811.4
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16140149 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0207s; samplesPerSecond = 12078.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19747632 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0202s; samplesPerSecond = 12391.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.20041309 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0214s; samplesPerSecond = 11659.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13657080 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0208s; samplesPerSecond = 12033.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20124377 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0203s; samplesPerSecond = 12293.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17898120 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0206s; samplesPerSecond = 12144.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16037830 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0232s; samplesPerSecond = 10779.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16276050 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0214s; samplesPerSecond = 11704.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19882275 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0218s; samplesPerSecond = 11454.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10263354 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0208s; samplesPerSecond = 12041.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17038770 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0213s; samplesPerSecond = 11725.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16624731 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0209s; samplesPerSecond = 11958.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12664160 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0213s; samplesPerSecond = 11723.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11944995 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0213s; samplesPerSecond = 11733.8
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12949756 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0208s; samplesPerSecond = 11996.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18147778 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0222s; samplesPerSecond = 11242.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13172412 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0233s; samplesPerSecond = 10719.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19600269 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0238s; samplesPerSecond = 10521.0
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15840479 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0226s; samplesPerSecond = 11084.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11888281 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0225s; samplesPerSecond = 11129.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13710742 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0222s; samplesPerSecond = 11251.1
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20026318 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0233s; samplesPerSecond = 10730.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18824951 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0223s; samplesPerSecond = 11227.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16653223 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0225s; samplesPerSecond = 11096.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11935254 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10918.5
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16085400 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0225s; samplesPerSecond = 11132.9
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16112646 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0219s; samplesPerSecond = 11439.6
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12345313 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0229s; samplesPerSecond = 10904.6
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13502686 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0226s; samplesPerSecond = 11075.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20874756 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0224s; samplesPerSecond = 11185.2
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16650537 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0227s; samplesPerSecond = 11009.3
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.14995752 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0206s; samplesPerSecond = 12134.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16497070 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0209s; samplesPerSecond = 11953.7
|
||||
05/03/2016 13:12:49: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16843018 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0210s; samplesPerSecond = 11912.1
|
||||
05/03/2016 13:12:49: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15794755 * 10000; EvalErrorPrediction = 0.07480000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.871499s
|
||||
05/03/2016 13:12:49: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503141245.787579\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_cpu/Models/simple.dnn'
|
||||
05/03/2016 13:12:49: CNTKCommandTrainEnd: Simple_Demo_Train
|
||||
|
||||
|
@ -606,7 +606,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -635,7 +635,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -659,7 +659,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
00000070343C5200: {[InvStdOfFeatures Value[2]] }
|
||||
00000070343C5340: {[Prior Value[2]] }
|
||||
00000070343C53E0: {[W0 Value[50 x 2]] }
|
||||
|
@ -671,7 +671,7 @@ Memory Sharing Structure:
|
|||
000000703442D030: {[HLast Value[2 x 1 x *1]] }
|
||||
000000703442D0D0: {[W0*features Value[50 x *1]] }
|
||||
000000703442D170: {[W1*H1+B1 Value[50 x 1 x *1]] }
|
||||
000000703442D2B0: {[EvalClassificationError Value[1]] }
|
||||
000000703442D2B0: {[EvalErrorPrediction Value[1]] }
|
||||
000000703442D530: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
000000703442D5D0: {[W2 Value[2 x 50]] }
|
||||
000000703442D670: {[LogOfPrior Value[2]] }
|
||||
|
@ -684,7 +684,7 @@ Memory Sharing Structure:
|
|||
0000007034432340: {[B0 Value[50 x 1]] }
|
||||
0000007034432480: {[B2 Value[2 x 1]] }
|
||||
|
||||
05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515
|
||||
05/03/2016 13:12:50: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12474995 * 603; perplexity = 1.13286515
|
||||
|
||||
05/03/2016 13:12:50: Action "test" complete.
|
||||
|
||||
|
@ -700,7 +700,7 @@ Post-processing network...
|
|||
|
||||
8 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -730,7 +730,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
|
||||
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -753,7 +753,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
000000702E3275E0: {[H2 Value[50 x 1 x *2]] }
|
||||
000000702E327680: {[W2*H1 Value[2 x 1 x *2]] }
|
||||
000000702E3277C0: {[LogOfPrior Value[2]] }
|
||||
|
|
|
@ -56,7 +56,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -155,7 +155,7 @@ Simple_Demo_Train = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -298,7 +298,7 @@ configparameters: Simple.cntk:Simple_Demo_Train=[
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 2:50*2:2
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
initValueScale = 1.0
|
||||
applyMeanVarNorm = true
|
||||
|
@ -354,7 +354,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -383,7 +383,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *] -> [2 x 1 x *]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *], [2 x 1] -> [2 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *], [2 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *] -> [2 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [2 x *] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -407,14 +407,14 @@ Post-processing network complete.
|
|||
|
||||
05/03/2016 13:01:59: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 13:01:59: EvalClassificationError = ClassificationError
|
||||
05/03/2016 13:01:59: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
0000000000000000: {[EvalErrorPrediction Gradient[1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *]] [PosteriorProb Value[2 x 1 x *]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *]] [features Gradient[2 x *]] [labels Gradient[2 x *]] }
|
||||
000000501A590FF0: {[W2 Value[2 x 50]] }
|
||||
000000501A591090: {[W0 Value[50 x 2]] }
|
||||
000000501A5919F0: {[B1 Value[50 x 1]] }
|
||||
|
@ -427,7 +427,7 @@ Memory Sharing Structure:
|
|||
000000501A5A1180: {[ScaledLogLikelihood Value[2 x 1 x *]] }
|
||||
000000501A5A1220: {[B0 Gradient[50 x 1]] [H1 Gradient[50 x 1 x *]] [W1*H1+B1 Gradient[50 x 1 x *]] [W2*H1 Value[2 x 1 x *]] }
|
||||
000000501A5A17C0: {[W0 Gradient[50 x 2]] [W0*features+B0 Value[50 x 1 x *]] }
|
||||
000000501A5A1900: {[EvalClassificationError Value[1]] }
|
||||
000000501A5A1900: {[EvalErrorPrediction Value[1]] }
|
||||
000000501A5A19A0: {[W0*features Value[50 x *]] }
|
||||
000000501A5A1A40: {[W2*H1 Gradient[2 x 1 x *]] }
|
||||
000000501A5A1F40: {[MVNormalizedFeatures Value[2 x *]] }
|
||||
|
@ -457,139 +457,139 @@ Memory Sharing Structure:
|
|||
05/03/2016 13:01:59: Starting Epoch 1: learning rate per sample = 0.020000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:01:59: Starting minibatch loop.
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalClassificationError = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalClassificationError = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalClassificationError = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalClassificationError = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalClassificationError = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalClassificationError = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalClassificationError = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalClassificationError = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalClassificationError = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalClassificationError = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalClassificationError = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalClassificationError = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalClassificationError = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalClassificationError = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalClassificationError = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalClassificationError = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalClassificationError = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalClassificationError = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalClassificationError = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalClassificationError = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalClassificationError = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalClassificationError = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalClassificationError = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalClassificationError = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7
|
||||
05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalClassificationError = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 1- 10]: CrossEntropyWithSoftmax = 0.70650452 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0123s; samplesPerSecond = 20247.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 11- 20]: CrossEntropyWithSoftmax = 0.69701831 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0112s; samplesPerSecond = 22393.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 21- 30]: CrossEntropyWithSoftmax = 0.71089587 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0126s; samplesPerSecond = 19907.6
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 31- 40]: CrossEntropyWithSoftmax = 0.72980273 * 250; EvalErrorPrediction = 0.56000000 * 250; time = 0.0113s; samplesPerSecond = 22042.0
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 41- 50]: CrossEntropyWithSoftmax = 0.70902783 * 250; EvalErrorPrediction = 0.52800000 * 250; time = 0.0131s; samplesPerSecond = 19088.3
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 51- 60]: CrossEntropyWithSoftmax = 0.72657300 * 250; EvalErrorPrediction = 0.54400000 * 250; time = 0.0138s; samplesPerSecond = 18059.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 61- 70]: CrossEntropyWithSoftmax = 0.69319678 * 250; EvalErrorPrediction = 0.43200000 * 250; time = 0.0148s; samplesPerSecond = 16917.0
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 71- 80]: CrossEntropyWithSoftmax = 0.73563477 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0164s; samplesPerSecond = 15236.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 81- 90]: CrossEntropyWithSoftmax = 0.71463281 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0123s; samplesPerSecond = 20321.9
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 91- 100]: CrossEntropyWithSoftmax = 0.75213428 * 250; EvalErrorPrediction = 0.47200000 * 250; time = 0.0167s; samplesPerSecond = 14944.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 101- 110]: CrossEntropyWithSoftmax = 0.75931445 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0131s; samplesPerSecond = 19105.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 111- 120]: CrossEntropyWithSoftmax = 0.73075293 * 250; EvalErrorPrediction = 0.50800000 * 250; time = 0.0132s; samplesPerSecond = 18886.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 121- 130]: CrossEntropyWithSoftmax = 0.76701953 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0128s; samplesPerSecond = 19574.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 131- 140]: CrossEntropyWithSoftmax = 0.70451270 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0128s; samplesPerSecond = 19467.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 141- 150]: CrossEntropyWithSoftmax = 0.70539941 * 250; EvalErrorPrediction = 0.50400000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 151- 160]: CrossEntropyWithSoftmax = 0.72700293 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0123s; samplesPerSecond = 20391.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 161- 170]: CrossEntropyWithSoftmax = 0.70096191 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0143s; samplesPerSecond = 17465.4
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 171- 180]: CrossEntropyWithSoftmax = 0.69437305 * 250; EvalErrorPrediction = 0.49600000 * 250; time = 0.0117s; samplesPerSecond = 21367.5
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 181- 190]: CrossEntropyWithSoftmax = 0.69161621 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0137s; samplesPerSecond = 18200.3
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 191- 200]: CrossEntropyWithSoftmax = 0.73388281 * 250; EvalErrorPrediction = 0.55200000 * 250; time = 0.0115s; samplesPerSecond = 21782.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 201- 210]: CrossEntropyWithSoftmax = 0.72255664 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 211- 220]: CrossEntropyWithSoftmax = 0.70414551 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0131s; samplesPerSecond = 19017.2
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 221- 230]: CrossEntropyWithSoftmax = 0.69976758 * 250; EvalErrorPrediction = 0.46000000 * 250; time = 0.0137s; samplesPerSecond = 18191.1
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 231- 240]: CrossEntropyWithSoftmax = 0.72419141 * 250; EvalErrorPrediction = 0.51600000 * 250; time = 0.0143s; samplesPerSecond = 17444.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 241- 250]: CrossEntropyWithSoftmax = 0.69943945 * 250; EvalErrorPrediction = 0.51200000 * 250; time = 0.0109s; samplesPerSecond = 22891.7
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 251- 260]: CrossEntropyWithSoftmax = 0.69206445 * 250; EvalErrorPrediction = 0.47600000 * 250; time = 0.0133s; samplesPerSecond = 18739.2
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 261- 270]: CrossEntropyWithSoftmax = 0.68771680 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0130s; samplesPerSecond = 19291.6
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 271- 280]: CrossEntropyWithSoftmax = 0.69878516 * 250; EvalErrorPrediction = 0.44000000 * 250; time = 0.0130s; samplesPerSecond = 19230.8
|
||||
05/03/2016 13:01:59: Epoch[ 1 of 3]-Minibatch[ 281- 290]: CrossEntropyWithSoftmax = 0.71889844 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0118s; samplesPerSecond = 21168.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 291- 300]: CrossEntropyWithSoftmax = 0.70086523 * 250; EvalErrorPrediction = 0.52400000 * 250; time = 0.0128s; samplesPerSecond = 19577.1
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 301- 310]: CrossEntropyWithSoftmax = 0.70878320 * 250; EvalErrorPrediction = 0.53200000 * 250; time = 0.0129s; samplesPerSecond = 19432.6
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 311- 320]: CrossEntropyWithSoftmax = 0.70674414 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0126s; samplesPerSecond = 19767.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 321- 330]: CrossEntropyWithSoftmax = 0.69707422 * 250; EvalErrorPrediction = 0.50000000 * 250; time = 0.0121s; samplesPerSecond = 20736.6
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 331- 340]: CrossEntropyWithSoftmax = 0.68588281 * 250; EvalErrorPrediction = 0.40800000 * 250; time = 0.0124s; samplesPerSecond = 20109.4
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 341- 350]: CrossEntropyWithSoftmax = 0.67734766 * 250; EvalErrorPrediction = 0.45600000 * 250; time = 0.0127s; samplesPerSecond = 19727.0
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 351- 360]: CrossEntropyWithSoftmax = 0.67958008 * 250; EvalErrorPrediction = 0.48000000 * 250; time = 0.0127s; samplesPerSecond = 19615.5
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 361- 370]: CrossEntropyWithSoftmax = 0.66424805 * 250; EvalErrorPrediction = 0.46800000 * 250; time = 0.0117s; samplesPerSecond = 21292.9
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 371- 380]: CrossEntropyWithSoftmax = 0.62412500 * 250; EvalErrorPrediction = 0.20400000 * 250; time = 0.0127s; samplesPerSecond = 19624.8
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 381- 390]: CrossEntropyWithSoftmax = 0.58007422 * 250; EvalErrorPrediction = 0.16000000 * 250; time = 0.0130s; samplesPerSecond = 19157.1
|
||||
05/03/2016 13:02:00: Epoch[ 1 of 3]-Minibatch[ 391- 400]: CrossEntropyWithSoftmax = 0.52764648 * 250; EvalErrorPrediction = 0.19200000 * 250; time = 0.0143s; samplesPerSecond = 17521.7
|
||||
05/03/2016 13:02:00: Finished Epoch[ 1 of 3]: [Training] CrossEntropyWithSoftmax = 0.69975483 * 10000; EvalErrorPrediction = 0.46850000 * 10000; totalSamplesSeen = 10000; learningRatePerSample = 0.02; epochTime=0.526194s
|
||||
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.1'
|
||||
|
||||
05/03/2016 13:02:00: Starting Epoch 2: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:02:00: Starting minibatch loop.
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalClassificationError = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalClassificationError = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalClassificationError = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5
|
||||
05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalClassificationError = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.44832977 * 250; EvalErrorPrediction = 0.15200000 * 250; time = 0.0124s; samplesPerSecond = 20205.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.40085291 * 250; EvalErrorPrediction = 0.12400000 * 250; time = 0.0142s; samplesPerSecond = 17631.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.33487201 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0129s; samplesPerSecond = 19405.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.29081885 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0125s; samplesPerSecond = 20016.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.26279236 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0118s; samplesPerSecond = 21188.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.25220630 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0138s; samplesPerSecond = 18158.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.20988293 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0129s; samplesPerSecond = 19447.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.21577441 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0148s; samplesPerSecond = 16846.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.16622900 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0157s; samplesPerSecond = 15967.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.17637866 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0144s; samplesPerSecond = 17315.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.22185278 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0123s; samplesPerSecond = 20366.6
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17055811 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0151s; samplesPerSecond = 16564.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.16481055 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0140s; samplesPerSecond = 17910.9
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.13871704 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0156s; samplesPerSecond = 16005.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.16922363 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0143s; samplesPerSecond = 17454.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.15403345 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0135s; samplesPerSecond = 18485.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.22255859 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0108s; samplesPerSecond = 23079.8
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.18146851 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0133s; samplesPerSecond = 18843.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.15611523 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19081.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.17320215 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0137s; samplesPerSecond = 18192.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.15727930 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0117s; samplesPerSecond = 21404.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.16195410 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0119s; samplesPerSecond = 21088.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.16121338 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0128s; samplesPerSecond = 19546.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.15427100 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0125s; samplesPerSecond = 20011.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.14844775 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0141s; samplesPerSecond = 17743.1
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.15055713 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0108s; samplesPerSecond = 23067.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.15467627 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0132s; samplesPerSecond = 18965.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.17615869 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0140s; samplesPerSecond = 17872.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.22356104 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0121s; samplesPerSecond = 20650.9
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16514209 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0109s; samplesPerSecond = 22946.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.17355859 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0129s; samplesPerSecond = 19372.3
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.13117578 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0138s; samplesPerSecond = 18151.5
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.13956104 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0121s; samplesPerSecond = 20743.4
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.18397363 * 250; EvalErrorPrediction = 0.09600000 * 250; time = 0.0105s; samplesPerSecond = 23741.7
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.15222656 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19909.2
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.18856396 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0145s; samplesPerSecond = 17207.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.17513330 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0130s; samplesPerSecond = 19199.8
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15008252 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0108s; samplesPerSecond = 23043.6
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.12125342 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0127s; samplesPerSecond = 19668.0
|
||||
05/03/2016 13:02:00: Epoch[ 2 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.15408496 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0141s; samplesPerSecond = 17788.5
|
||||
05/03/2016 13:02:00: Finished Epoch[ 2 of 3]: [Training] CrossEntropyWithSoftmax = 0.19333879 * 10000; EvalErrorPrediction = 0.07700000 * 10000; totalSamplesSeen = 20000; learningRatePerSample = 0.0080000004; epochTime=0.525411s
|
||||
05/03/2016 13:02:00: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn.2'
|
||||
|
||||
05/03/2016 13:02:00: Starting Epoch 3: learning rate per sample = 0.008000 effective momentum = 0.900000 momentum as time constant = 237.3 samples
|
||||
|
||||
05/03/2016 13:02:00: Starting minibatch loop.
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalClassificationError = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalClassificationError = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalClassificationError = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalClassificationError = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalClassificationError = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalClassificationError = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalClassificationError = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalClassificationError = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalClassificationError = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalClassificationError = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalClassificationError = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalClassificationError = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalClassificationError = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalClassificationError = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalClassificationError = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalClassificationError = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalClassificationError = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalClassificationError = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7
|
||||
05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalClassificationError = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 1- 10, 2.50%]: CrossEntropyWithSoftmax = 0.10746781 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0126s; samplesPerSecond = 19806.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 11- 20, 5.00%]: CrossEntropyWithSoftmax = 0.17648278 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0122s; samplesPerSecond = 20429.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 21- 30, 7.50%]: CrossEntropyWithSoftmax = 0.14106094 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19838.1
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 31- 40, 10.00%]: CrossEntropyWithSoftmax = 0.16348077 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0127s; samplesPerSecond = 19745.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 41- 50, 12.50%]: CrossEntropyWithSoftmax = 0.11767151 * 250; EvalErrorPrediction = 0.04000000 * 250; time = 0.0110s; samplesPerSecond = 22787.3
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 51- 60, 15.00%]: CrossEntropyWithSoftmax = 0.16217944 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0137s; samplesPerSecond = 18292.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 61- 70, 17.50%]: CrossEntropyWithSoftmax = 0.16171204 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0147s; samplesPerSecond = 16977.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 71- 80, 20.00%]: CrossEntropyWithSoftmax = 0.19844067 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0130s; samplesPerSecond = 19285.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 81- 90, 22.50%]: CrossEntropyWithSoftmax = 0.19984509 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0116s; samplesPerSecond = 21585.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 91- 100, 25.00%]: CrossEntropyWithSoftmax = 0.13727051 * 250; EvalErrorPrediction = 0.05200000 * 250; time = 0.0133s; samplesPerSecond = 18839.5
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 101- 110, 27.50%]: CrossEntropyWithSoftmax = 0.20126648 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0150s; samplesPerSecond = 16709.0
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 111- 120, 30.00%]: CrossEntropyWithSoftmax = 0.17913672 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0138s; samplesPerSecond = 18066.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 121- 130, 32.50%]: CrossEntropyWithSoftmax = 0.15983582 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0138s; samplesPerSecond = 18131.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 131- 140, 35.00%]: CrossEntropyWithSoftmax = 0.16260010 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0126s; samplesPerSecond = 19798.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 141- 150, 37.50%]: CrossEntropyWithSoftmax = 0.19813428 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0122s; samplesPerSecond = 20453.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 151- 160, 40.00%]: CrossEntropyWithSoftmax = 0.10295117 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0124s; samplesPerSecond = 20091.6
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 161- 170, 42.50%]: CrossEntropyWithSoftmax = 0.17117065 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0127s; samplesPerSecond = 19762.8
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 171- 180, 45.00%]: CrossEntropyWithSoftmax = 0.16661938 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0127s; samplesPerSecond = 19620.2
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 181- 190, 47.50%]: CrossEntropyWithSoftmax = 0.12718042 * 250; EvalErrorPrediction = 0.05600000 * 250; time = 0.0108s; samplesPerSecond = 23156.7
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 191- 200, 50.00%]: CrossEntropyWithSoftmax = 0.11923853 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0139s; samplesPerSecond = 17989.5
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 201- 210, 52.50%]: CrossEntropyWithSoftmax = 0.12890332 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0129s; samplesPerSecond = 19340.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 211- 220, 55.00%]: CrossEntropyWithSoftmax = 0.18205469 * 250; EvalErrorPrediction = 0.10000000 * 250; time = 0.0124s; samplesPerSecond = 20182.4
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 221- 230, 57.50%]: CrossEntropyWithSoftmax = 0.13154199 * 250; EvalErrorPrediction = 0.06000000 * 250; time = 0.0111s; samplesPerSecond = 22599.9
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 231- 240, 60.00%]: CrossEntropyWithSoftmax = 0.19668359 * 250; EvalErrorPrediction = 0.10400000 * 250; time = 0.0139s; samplesPerSecond = 17922.4
|
||||
05/03/2016 13:02:00: Epoch[ 3 of 3]-Minibatch[ 241- 250, 62.50%]: CrossEntropyWithSoftmax = 0.15817578 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0126s; samplesPerSecond = 19915.6
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 251- 260, 65.00%]: CrossEntropyWithSoftmax = 0.11871240 * 250; EvalErrorPrediction = 0.04400000 * 250; time = 0.0136s; samplesPerSecond = 18378.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 261- 270, 67.50%]: CrossEntropyWithSoftmax = 0.13730908 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0107s; samplesPerSecond = 23384.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 271- 280, 70.00%]: CrossEntropyWithSoftmax = 0.20024854 * 250; EvalErrorPrediction = 0.09200000 * 250; time = 0.0134s; samplesPerSecond = 18719.6
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 281- 290, 72.50%]: CrossEntropyWithSoftmax = 0.18850244 * 250; EvalErrorPrediction = 0.10800000 * 250; time = 0.0131s; samplesPerSecond = 19151.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 291- 300, 75.00%]: CrossEntropyWithSoftmax = 0.16640479 * 250; EvalErrorPrediction = 0.07200000 * 250; time = 0.0108s; samplesPerSecond = 23086.2
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 301- 310, 77.50%]: CrossEntropyWithSoftmax = 0.11872168 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0107s; samplesPerSecond = 23347.0
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 311- 320, 80.00%]: CrossEntropyWithSoftmax = 0.16090430 * 250; EvalErrorPrediction = 0.08800000 * 250; time = 0.0127s; samplesPerSecond = 19730.1
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 321- 330, 82.50%]: CrossEntropyWithSoftmax = 0.16162939 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0137s; samplesPerSecond = 18205.7
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 331- 340, 85.00%]: CrossEntropyWithSoftmax = 0.12408594 * 250; EvalErrorPrediction = 0.04800000 * 250; time = 0.0109s; samplesPerSecond = 22839.4
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 341- 350, 87.50%]: CrossEntropyWithSoftmax = 0.13544434 * 250; EvalErrorPrediction = 0.06800000 * 250; time = 0.0126s; samplesPerSecond = 19893.4
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 351- 360, 90.00%]: CrossEntropyWithSoftmax = 0.20890771 * 250; EvalErrorPrediction = 0.11200000 * 250; time = 0.0129s; samplesPerSecond = 19366.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 361- 370, 92.50%]: CrossEntropyWithSoftmax = 0.16674365 * 250; EvalErrorPrediction = 0.08400000 * 250; time = 0.0146s; samplesPerSecond = 17116.3
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 371- 380, 95.00%]: CrossEntropyWithSoftmax = 0.15033398 * 250; EvalErrorPrediction = 0.06400000 * 250; time = 0.0131s; samplesPerSecond = 19152.7
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 381- 390, 97.50%]: CrossEntropyWithSoftmax = 0.16547705 * 250; EvalErrorPrediction = 0.07600000 * 250; time = 0.0120s; samplesPerSecond = 20752.1
|
||||
05/03/2016 13:02:01: Epoch[ 3 of 3]-Minibatch[ 391- 400, 100.00%]: CrossEntropyWithSoftmax = 0.16792480 * 250; EvalErrorPrediction = 0.08000000 * 250; time = 0.0129s; samplesPerSecond = 19450.7
|
||||
05/03/2016 13:02:01: Finished Epoch[ 3 of 3]: [Training] CrossEntropyWithSoftmax = 0.15806136 * 10000; EvalErrorPrediction = 0.07470000 * 10000; totalSamplesSeen = 30000; learningRatePerSample = 0.0080000004; epochTime=0.511151s
|
||||
05/03/2016 13:02:01: SGD: Saving checkpoint model 'E:\cygwin64\tmp\cntk-test-20160503140157.802427\CNTKTextFormatReader\Examples\Other\Simple2d_Simple@release_gpu/Models/simple.dnn'
|
||||
05/03/2016 13:02:01: CNTKCommandTrainEnd: Simple_Demo_Train
|
||||
|
||||
|
@ -607,7 +607,7 @@ Post-processing network...
|
|||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -636,7 +636,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *1] -> [2 x 1 x *1]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *1], [2 x 1] -> [2 x 1 x *1]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *1], [2 x 1 x *1] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *1] -> [2 x 1 x *1]
|
||||
Validating --> Prior = Mean (labels) : [2 x *1] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -660,7 +660,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalClassificationError Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [EvalErrorPrediction Gradient[1]] [H1 Gradient[50 x 1 x *1]] [H2 Gradient[50 x 1 x *1]] [HLast Gradient[2 x 1 x *1]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *1]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *1]] [PosteriorProb Value[2 x 1 x *1]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *1]] [ScaledLogLikelihood Value[2 x 1 x *1]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *1]] [W0*features+B0 Gradient[50 x 1 x *1]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *1]] [W1*H1+B1 Gradient[50 x 1 x *1]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *1]] [features Gradient[2 x *1]] [labels Gradient[2 x *1]] }
|
||||
000000501A591090: {[W0*features+B0 Value[50 x 1 x *1]] }
|
||||
000000501A591130: {[W1*H1 Value[50 x 1 x *1]] }
|
||||
000000501A5916D0: {[W1*H1+B1 Value[50 x 1 x *1]] }
|
||||
|
@ -672,7 +672,7 @@ Memory Sharing Structure:
|
|||
000000501A592850: {[LogOfPrior Value[2]] }
|
||||
000000501A5928F0: {[H2 Value[50 x 1 x *1]] }
|
||||
000000501A592B70: {[W2 Value[2 x 50]] }
|
||||
000000501A592D50: {[EvalClassificationError Value[1]] }
|
||||
000000501A592D50: {[EvalErrorPrediction Value[1]] }
|
||||
000000501A592DF0: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
0000005024E60C70: {[W1 Value[50 x 50]] }
|
||||
0000005024E613F0: {[W0 Value[50 x 2]] }
|
||||
|
@ -685,7 +685,7 @@ Memory Sharing Structure:
|
|||
0000005024E62430: {[features Value[2 x *1]] }
|
||||
0000005024E624D0: {[B1 Value[50 x 1]] }
|
||||
|
||||
05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalClassificationError = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526
|
||||
05/03/2016 13:02:01: Final Results: Minibatch[1-1]: EvalErrorPrediction = 0.05638474 * 603; CrossEntropyWithSoftmax = 0.12740351 * 603; perplexity = 1.13587526
|
||||
|
||||
05/03/2016 13:02:01: Action "test" complete.
|
||||
|
||||
|
@ -701,7 +701,7 @@ Post-processing network...
|
|||
|
||||
8 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -731,7 +731,7 @@ Validating --> W2*H1 = Times (W2, H2) : [2 x 50], [50 x 1 x *2] -> [2 x 1 x *2]
|
|||
Validating --> B2 = LearnableParameter() : -> [2 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [2 x 1 x *2], [2 x 1] -> [2 x 1 x *2]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [2 x *2], [2 x 1 x *2] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [2 x 1 x *2] -> [2 x 1 x *2]
|
||||
Validating --> Prior = Mean (labels) : [2 x *2] -> [2]
|
||||
Validating --> LogOfPrior = Log (Prior) : [2] -> [2]
|
||||
|
@ -754,7 +754,7 @@ Allocating matrices for forward and/or backward propagation.
|
|||
|
||||
Memory Sharing Structure:
|
||||
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalClassificationError Gradient[1]] [EvalClassificationError Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
0000000000000000: {[B0 Gradient[50 x 1]] [B1 Gradient[50 x 1]] [B2 Gradient[2 x 1]] [CrossEntropyWithSoftmax Gradient[1]] [CrossEntropyWithSoftmax Value[1]] [EvalErrorPrediction Gradient[1]] [EvalErrorPrediction Value[1]] [H1 Gradient[50 x 1 x *2]] [H2 Gradient[50 x 1 x *2]] [HLast Gradient[2 x 1 x *2]] [InvStdOfFeatures Gradient[2]] [LogOfPrior Gradient[2]] [MVNormalizedFeatures Gradient[2 x *2]] [MeanOfFeatures Gradient[2]] [PosteriorProb Gradient[2 x 1 x *2]] [Prior Gradient[2]] [ScaledLogLikelihood Gradient[2 x 1 x *2]] [ScaledLogLikelihood Value[2 x 1 x *2]] [W0 Gradient[50 x 2]] [W0*features Gradient[50 x *2]] [W0*features+B0 Gradient[50 x 1 x *2]] [W1 Gradient[50 x 50]] [W1*H1 Gradient[50 x 1 x *2]] [W1*H1+B1 Gradient[50 x 1 x *2]] [W2 Gradient[2 x 50]] [W2*H1 Gradient[2 x 1 x *2]] [features Gradient[2 x *2]] [labels Gradient[2 x *2]] }
|
||||
000000501A5914F0: {[InvStdOfFeatures Value[2]] }
|
||||
000000501A591590: {[MeanOfFeatures Value[2]] }
|
||||
000000501A5916D0: {[labels Value[2 x *2]] }
|
||||
|
|
|
@ -0,0 +1,434 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 264172964 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 09:41:57
|
||||
Last modified date: Mon Aug 15 23:39:17 2016
|
||||
Build type: release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: /usr/local/cuda-7.5
|
||||
CUB_PATH: /usr/local/cub-1.4.1
|
||||
CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by philly on 643085f7f8c2
|
||||
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
MPIWrapper: initializing MPI
|
||||
ping [requestnodes (before change)]: 1 nodes pinging each other
|
||||
ping [requestnodes (before change)]: all 1 nodes responded
|
||||
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
|
||||
ping [requestnodes (after change)]: 1 nodes pinging each other
|
||||
ping [requestnodes (after change)]: all 1 nodes responded
|
||||
mpihelper: only one MPI process: MPI operation will be boring
|
||||
ping [mpihelper]: 1 nodes pinging each other
|
||||
ping [mpihelper]: all 1 nodes responded
|
||||
08/16/2016 10:01:41: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:41: Build info:
|
||||
|
||||
08/16/2016 10:01:41: Built time: Aug 16 2016 09:41:57
|
||||
08/16/2016 10:01:41: Last modified date: Mon Aug 15 23:39:17 2016
|
||||
08/16/2016 10:01:41: Build type: release
|
||||
08/16/2016 10:01:41: Build target: GPU
|
||||
08/16/2016 10:01:41: With 1bit-SGD: yes
|
||||
08/16/2016 10:01:41: Math lib: mkl
|
||||
08/16/2016 10:01:41: CUDA_PATH: /usr/local/cuda-7.5
|
||||
08/16/2016 10:01:41: CUB_PATH: /usr/local/cub-1.4.1
|
||||
08/16/2016 10:01:41: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
08/16/2016 10:01:41: Build Branch: HEAD
|
||||
08/16/2016 10:01:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 10:01:41: Built by philly on 643085f7f8c2
|
||||
08/16/2016 10:01:41: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
08/16/2016 10:01:41: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:42: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:42: GPU info:
|
||||
|
||||
08/16/2016 10:01:42: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:42: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:42: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:42: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:42: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 10:01:42: Running on localhost at 2016/08/16 10:01:42
|
||||
08/16/2016 10:01:42: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
|
||||
|
||||
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:42: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:42: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:42: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: FeedForward.cntk:command=speechTrain
|
||||
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: FeedForward.cntk:deviceId=-1
|
||||
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:parallelTrain=true
|
||||
configparameters: FeedForward.cntk:precision=float
|
||||
configparameters: FeedForward.cntk:RootDir=..
|
||||
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:speechTrain=[
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: FeedForward.cntk:timestamping=true
|
||||
configparameters: FeedForward.cntk:traceLevel=1
|
||||
08/16/2016 10:01:42: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:01:42: Commands: speechTrain
|
||||
08/16/2016 10:01:42: Precision = "float"
|
||||
08/16/2016 10:01:42: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
08/16/2016 10:01:42: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 10:01:42: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 10:01:42: ##############################################################################
|
||||
08/16/2016 10:01:42: # #
|
||||
08/16/2016 10:01:42: # Action "train" #
|
||||
08/16/2016 10:01:42: # #
|
||||
08/16/2016 10:01:42: ##############################################################################
|
||||
|
||||
08/16/2016 10:01:42: CNTKCommandTrainBegin: speechTrain
|
||||
SimpleNetworkBuilder Using CPU
|
||||
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
|
||||
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
|
||||
08/16/2016 10:01:42: Creating virgin network.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
Prior = Mean()
|
||||
ScaledLogLikelihood = Minus()
|
||||
|
||||
Validating network. 25 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W2 = LearnableParameter() : -> [132 x 512]
|
||||
Validating --> W1 = LearnableParameter() : -> [512 x 512]
|
||||
Validating --> W0 = LearnableParameter() : -> [512 x 363]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
|
||||
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
|
||||
Validating --> B0 = LearnableParameter() : -> [512 x 1]
|
||||
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
|
||||
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> B1 = LearnableParameter() : -> [512 x 1]
|
||||
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
|
||||
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> B2 = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
|
||||
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 17 nodes to process in pass 2.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
12 out of 25 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 10:01:42: Created model with 25 nodes on CPU.
|
||||
|
||||
08/16/2016 10:01:42: Training criterion node(s):
|
||||
08/16/2016 10:01:42: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 10:01:42: Evaluation criterion node(s):
|
||||
08/16/2016 10:01:42: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
|
||||
|
||||
{ W1 : [512 x 512] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] }
|
||||
{ H2 : [512 x 1 x *]
|
||||
W1*H1 : [512 x 1 x *] (gradient) }
|
||||
{ B0 : [512 x 1] (gradient)
|
||||
H1 : [512 x 1 x *] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] (gradient)
|
||||
W2*H1 : [132 x 1 x *] }
|
||||
{ HLast : [132 x 1 x *]
|
||||
W2 : [132 x 512] (gradient) }
|
||||
{ B1 : [512 x 1] (gradient)
|
||||
H2 : [512 x 1 x *] (gradient)
|
||||
HLast : [132 x 1 x *] (gradient) }
|
||||
{ W0 : [512 x 363] (gradient)
|
||||
W0*features+B0 : [512 x 1 x *] }
|
||||
{ H1 : [512 x 1 x *]
|
||||
W0*features : [512 x *] (gradient) }
|
||||
{ W0*features+B0 : [512 x 1 x *] (gradient)
|
||||
W1*H1 : [512 x 1 x *] }
|
||||
|
||||
|
||||
08/16/2016 10:01:42: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
|
||||
|
||||
08/16/2016 10:01:42: Node 'B0' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 10:01:42: Node 'B1' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 10:01:42: Node 'B2' (LearnableParameter operation) : [132 x 1]
|
||||
08/16/2016 10:01:42: Node 'W0' (LearnableParameter operation) : [512 x 363]
|
||||
08/16/2016 10:01:42: Node 'W1' (LearnableParameter operation) : [512 x 512]
|
||||
08/16/2016 10:01:42: Node 'W2' (LearnableParameter operation) : [132 x 512]
|
||||
|
||||
|
||||
08/16/2016 10:01:42: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 10:01:42: MeanOfFeatures = Mean()
|
||||
08/16/2016 10:01:42: InvStdOfFeatures = InvStdDev()
|
||||
08/16/2016 10:01:42: Prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 10:01:43: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 10:01:43: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 10:01:43: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
08/16/2016 10:01:44: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.45117986 * 2048; EvalErrorPrediction = 0.92187500 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.209966s
|
||||
08/16/2016 10:01:44: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
|
||||
08/16/2016 10:01:44: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 10:01:44: Action "train" complete.
|
||||
|
||||
08/16/2016 10:01:44: __COMPLETED__
|
||||
~MPIWrapper
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -0,0 +1,435 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 264172964 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 09:41:57
|
||||
Last modified date: Mon Aug 15 23:39:17 2016
|
||||
Build type: release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: /usr/local/cuda-7.5
|
||||
CUB_PATH: /usr/local/cub-1.4.1
|
||||
CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by philly on 643085f7f8c2
|
||||
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
MPIWrapper: initializing MPI
|
||||
ping [requestnodes (before change)]: 1 nodes pinging each other
|
||||
ping [requestnodes (before change)]: all 1 nodes responded
|
||||
requestnodes [MPIWrapper]: using 1 out of 1 MPI nodes (1 requested); we (0) are in (participating)
|
||||
ping [requestnodes (after change)]: 1 nodes pinging each other
|
||||
ping [requestnodes (after change)]: all 1 nodes responded
|
||||
mpihelper: only one MPI process: MPI operation will be boring
|
||||
ping [mpihelper]: 1 nodes pinging each other
|
||||
ping [mpihelper]: all 1 nodes responded
|
||||
08/16/2016 10:01:45: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:45: Build info:
|
||||
|
||||
08/16/2016 10:01:45: Built time: Aug 16 2016 09:41:57
|
||||
08/16/2016 10:01:45: Last modified date: Mon Aug 15 23:39:17 2016
|
||||
08/16/2016 10:01:45: Build type: release
|
||||
08/16/2016 10:01:45: Build target: GPU
|
||||
08/16/2016 10:01:45: With 1bit-SGD: yes
|
||||
08/16/2016 10:01:45: Math lib: mkl
|
||||
08/16/2016 10:01:45: CUDA_PATH: /usr/local/cuda-7.5
|
||||
08/16/2016 10:01:45: CUB_PATH: /usr/local/cub-1.4.1
|
||||
08/16/2016 10:01:45: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
08/16/2016 10:01:45: Build Branch: HEAD
|
||||
08/16/2016 10:01:45: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 10:01:45: Built by philly on 643085f7f8c2
|
||||
08/16/2016 10:01:45: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
08/16/2016 10:01:45: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:46: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:46: GPU info:
|
||||
|
||||
08/16/2016 10:01:46: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:46: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:46: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:46: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:46: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 10:01:46: Running on localhost at 2016/08/16 10:01:46
|
||||
08/16/2016 10:01:46: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config/FeedForward.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
|
||||
|
||||
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:46: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "$ModelDir$/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:46: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:46: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: FeedForward.cntk:command=speechTrain
|
||||
configparameters: FeedForward.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/FeedForward/../../../../../../Examples/Speech/AN4/Config
|
||||
configparameters: FeedForward.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: FeedForward.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: FeedForward.cntk:deviceId=0
|
||||
configparameters: FeedForward.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:parallelTrain=true
|
||||
configparameters: FeedForward.cntk:precision=float
|
||||
configparameters: FeedForward.cntk:RootDir=..
|
||||
configparameters: FeedForward.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:speechTrain=[
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 20480
|
||||
minibatchSize = 256:1024:2048
|
||||
learningRatesPerMB = 1.0:0.5:0.1
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0.9:0.656119
|
||||
maxEpochs = 3
|
||||
keepCheckPointFiles = true
|
||||
parallelTrain = [
|
||||
parallelizationMethod = "DataParallelSGD"
|
||||
distributedMBReading = true
|
||||
dataParallelSGD = [
|
||||
gradientBits = 1
|
||||
]
|
||||
]
|
||||
autoAdjust=[
|
||||
autoAdjustMinibatch = true
|
||||
minibatchSizeTuningFrequency = 1
|
||||
minibatchSearchCriterionErrorMargin = 2
|
||||
]
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: FeedForward.cntk:timestamping=true
|
||||
configparameters: FeedForward.cntk:traceLevel=1
|
||||
08/16/2016 10:01:46: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:01:46: Commands: speechTrain
|
||||
08/16/2016 10:01:46: Precision = "float"
|
||||
08/16/2016 10:01:46: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
08/16/2016 10:01:46: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 10:01:46: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 10:01:46: ##############################################################################
|
||||
08/16/2016 10:01:46: # #
|
||||
08/16/2016 10:01:46: # Action "train" #
|
||||
08/16/2016 10:01:46: # #
|
||||
08/16/2016 10:01:46: ##############################################################################
|
||||
|
||||
08/16/2016 10:01:46: CNTKCommandTrainBegin: speechTrain
|
||||
SimpleNetworkBuilder Using GPU 0
|
||||
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
|
||||
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
|
||||
08/16/2016 10:01:46: Creating virgin network.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
Prior = Mean()
|
||||
ScaledLogLikelihood = Minus()
|
||||
|
||||
Validating network. 25 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W2 = LearnableParameter() : -> [132 x 512]
|
||||
Validating --> W1 = LearnableParameter() : -> [512 x 512]
|
||||
Validating --> W0 = LearnableParameter() : -> [512 x 363]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> MeanOfFeatures = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> InvStdOfFeatures = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> MVNormalizedFeatures = PerDimMeanVarNormalization (features, MeanOfFeatures, InvStdOfFeatures) : [363 x *], [363], [363] -> [363 x *]
|
||||
Validating --> W0*features = Times (W0, MVNormalizedFeatures) : [512 x 363], [363 x *] -> [512 x *]
|
||||
Validating --> B0 = LearnableParameter() : -> [512 x 1]
|
||||
Validating --> W0*features+B0 = Plus (W0*features, B0) : [512 x *], [512 x 1] -> [512 x 1 x *]
|
||||
Validating --> H1 = Sigmoid (W0*features+B0) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> W1*H1 = Times (W1, H1) : [512 x 512], [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> B1 = LearnableParameter() : -> [512 x 1]
|
||||
Validating --> W1*H1+B1 = Plus (W1*H1, B1) : [512 x 1 x *], [512 x 1] -> [512 x 1 x *]
|
||||
Validating --> H2 = Sigmoid (W1*H1+B1) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> B2 = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
|
||||
Validating --> ScaledLogLikelihood = Minus (HLast, LogOfPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 17 nodes to process in pass 2.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
12 out of 25 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 10:01:46: Created model with 25 nodes on GPU 0.
|
||||
|
||||
08/16/2016 10:01:46: Training criterion node(s):
|
||||
08/16/2016 10:01:46: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 10:01:46: Evaluation criterion node(s):
|
||||
08/16/2016 10:01:46: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
|
||||
|
||||
{ W0 : [512 x 363] (gradient)
|
||||
W0*features+B0 : [512 x 1 x *] }
|
||||
{ H1 : [512 x 1 x *]
|
||||
W0*features : [512 x *] (gradient) }
|
||||
{ W0*features+B0 : [512 x 1 x *] (gradient)
|
||||
W1*H1 : [512 x 1 x *] }
|
||||
{ W1 : [512 x 512] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] }
|
||||
{ H2 : [512 x 1 x *]
|
||||
W1*H1 : [512 x 1 x *] (gradient) }
|
||||
{ B0 : [512 x 1] (gradient)
|
||||
H1 : [512 x 1 x *] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] (gradient)
|
||||
W2*H1 : [132 x 1 x *] }
|
||||
{ HLast : [132 x 1 x *]
|
||||
W2 : [132 x 512] (gradient) }
|
||||
{ B1 : [512 x 1] (gradient)
|
||||
H2 : [512 x 1 x *] (gradient)
|
||||
HLast : [132 x 1 x *] (gradient) }
|
||||
|
||||
|
||||
08/16/2016 10:01:46: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
|
||||
|
||||
08/16/2016 10:01:46: Node 'B0' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 10:01:46: Node 'B1' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 10:01:46: Node 'B2' (LearnableParameter operation) : [132 x 1]
|
||||
08/16/2016 10:01:46: Node 'W0' (LearnableParameter operation) : [512 x 363]
|
||||
08/16/2016 10:01:46: Node 'W1' (LearnableParameter operation) : [512 x 512]
|
||||
08/16/2016 10:01:46: Node 'W2' (LearnableParameter operation) : [132 x 512]
|
||||
|
||||
|
||||
08/16/2016 10:01:46: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 10:01:46: MeanOfFeatures = Mean()
|
||||
08/16/2016 10:01:46: InvStdOfFeatures = InvStdDev()
|
||||
08/16/2016 10:01:46: Prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 10:01:46: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 10:01:46: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 10:01:46: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
08/16/2016 10:01:46: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.023072s
|
||||
08/16/2016 10:01:46: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
|
||||
08/16/2016 10:01:46: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 10:01:46: Action "train" complete.
|
||||
|
||||
08/16/2016 10:01:46: __COMPLETED__
|
||||
~MPIWrapper
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1,18 +1,24 @@
|
|||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
|
||||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 268381192 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: May 3 2016 13:15:46
|
||||
Last modified date: Tue Apr 26 23:35:31 2016
|
||||
Built time: Aug 16 2016 03:09:16
|
||||
Last modified date: Fri Aug 12 05:28:23 2016
|
||||
Build type: Release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: no
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
CUB_PATH: c:\src\cub-1.4.1
|
||||
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
Build Branch: HEAD
|
||||
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
|
||||
Built by svcphil on cntk-muc01
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by svcphil on Philly-Pool1
|
||||
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
|
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
|
|||
mpihelper: only one MPI process: MPI operation will be boring
|
||||
ping [mpihelper]: 1 nodes pinging each other
|
||||
ping [mpihelper]: all 1 nodes responded
|
||||
05/03/2016 13:22:22: -------------------------------------------------------------------
|
||||
05/03/2016 13:22:22: Build info:
|
||||
08/16/2016 03:20:10: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:10: Build info:
|
||||
|
||||
05/03/2016 13:22:22: Built time: May 3 2016 13:15:46
|
||||
05/03/2016 13:22:22: Last modified date: Tue Apr 26 23:35:31 2016
|
||||
05/03/2016 13:22:22: Build type: Release
|
||||
05/03/2016 13:22:22: Build target: GPU
|
||||
05/03/2016 13:22:22: With 1bit-SGD: no
|
||||
05/03/2016 13:22:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
05/03/2016 13:22:22: CUB_PATH: c:\src\cub-1.4.1
|
||||
05/03/2016 13:22:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
05/03/2016 13:22:22: Build Branch: HEAD
|
||||
05/03/2016 13:22:22: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
|
||||
05/03/2016 13:22:22: Built by svcphil on cntk-muc01
|
||||
05/03/2016 13:22:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
05/03/2016 13:22:22: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:10: Built time: Aug 16 2016 03:09:16
|
||||
08/16/2016 03:20:10: Last modified date: Fri Aug 12 05:28:23 2016
|
||||
08/16/2016 03:20:10: Build type: Release
|
||||
08/16/2016 03:20:10: Build target: GPU
|
||||
08/16/2016 03:20:10: With 1bit-SGD: yes
|
||||
08/16/2016 03:20:10: Math lib: mkl
|
||||
08/16/2016 03:20:10: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
08/16/2016 03:20:10: CUB_PATH: c:\src\cub-1.4.1
|
||||
08/16/2016 03:20:10: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
08/16/2016 03:20:10: Build Branch: HEAD
|
||||
08/16/2016 03:20:10: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 03:20:10: Built by svcphil on Philly-Pool1
|
||||
08/16/2016 03:20:10: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
08/16/2016 03:20:10: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:12: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:12: GPU info:
|
||||
|
||||
05/03/2016 13:22:22: Running on DPHAIM-22 at 2016/05/03 13:22:22
|
||||
05/03/2016 13:22:22: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
|
||||
08/16/2016 03:20:12: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:12: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:12: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:12: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 03:20:12: Running on DPHAIM-25 at 2016/08/16 03:20:12
|
||||
08/16/2016 03:20:12: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
|
||||
|
||||
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
05/03/2016 13:22:22: RootDir = ".."
|
||||
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:12: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
|
@ -65,7 +79,7 @@ speechTrain = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -111,35 +125,36 @@ speechTrain = [
|
|||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
05/03/2016 13:22:22: RootDir = ".."
|
||||
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:12: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -185,36 +200,37 @@ speechTrain = [
|
|||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/03/2016 13:22:22: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:12: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: FeedForward.cntk:command=speechTrain
|
||||
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: FeedForward.cntk:deviceId=-1
|
||||
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:parallelTrain=true
|
||||
configparameters: FeedForward.cntk:precision=float
|
||||
configparameters: FeedForward.cntk:RootDir=..
|
||||
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu
|
||||
configparameters: FeedForward.cntk:speechTrain=[
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
|
|||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: FeedForward.cntk:timestamping=true
|
||||
configparameters: FeedForward.cntk:traceLevel=1
|
||||
05/03/2016 13:22:22: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
05/03/2016 13:22:22: Commands: speechTrain
|
||||
05/03/2016 13:22:22: Precision = "float"
|
||||
05/03/2016 13:22:22: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
05/03/2016 13:22:22: CNTKCommandTrainInfo: speechTrain : 1
|
||||
05/03/2016 13:22:22: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
08/16/2016 03:20:12: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:12: Commands: speechTrain
|
||||
08/16/2016 03:20:12: Precision = "float"
|
||||
08/16/2016 03:20:12: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn
|
||||
08/16/2016 03:20:12: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 03:20:12: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
05/03/2016 13:22:22: ##############################################################################
|
||||
05/03/2016 13:22:22: # #
|
||||
05/03/2016 13:22:22: # Action "train" #
|
||||
05/03/2016 13:22:22: # #
|
||||
05/03/2016 13:22:22: ##############################################################################
|
||||
08/16/2016 03:20:12: ##############################################################################
|
||||
08/16/2016 03:20:12: # #
|
||||
08/16/2016 03:20:12: # Action "train" #
|
||||
08/16/2016 03:20:12: # #
|
||||
08/16/2016 03:20:12: ##############################################################################
|
||||
|
||||
05/03/2016 13:22:22: CNTKCommandTrainBegin: speechTrain
|
||||
08/16/2016 03:20:12: CNTKCommandTrainBegin: speechTrain
|
||||
SimpleNetworkBuilder Using CPU
|
||||
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
|
||||
|
@ -284,13 +300,25 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
|
|||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
|
||||
05/03/2016 13:22:23: Creating virgin network.
|
||||
08/16/2016 03:20:12: Creating virgin network.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -319,7 +347,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
|
|||
Validating --> B2 = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
|
||||
|
@ -336,70 +364,70 @@ Validating network, final pass.
|
|||
|
||||
Post-processing network complete.
|
||||
|
||||
05/03/2016 13:22:23: Created model with 25 nodes on CPU.
|
||||
08/16/2016 03:20:12: Created model with 25 nodes on CPU.
|
||||
|
||||
05/03/2016 13:22:23: Training criterion node(s):
|
||||
05/03/2016 13:22:23: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
08/16/2016 03:20:12: Training criterion node(s):
|
||||
08/16/2016 03:20:12: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
|
||||
05/03/2016 13:22:23: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 13:22:23: EvalClassificationError = ClassificationError
|
||||
08/16/2016 03:20:12: Evaluation criterion node(s):
|
||||
08/16/2016 03:20:12: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
|
||||
000000BDD334C430: {[features Value[363 x *]] }
|
||||
000000BDD334C4D0: {[W0 Value[512 x 363]] }
|
||||
000000BDD334C610: {[MeanOfFeatures Value[363]] }
|
||||
000000BDD334C890: {[B0 Value[512 x 1]] }
|
||||
000000BDD334CCF0: {[W1 Value[512 x 512]] }
|
||||
000000BDD334CE30: {[B1 Value[512 x 1]] }
|
||||
000000BDD334D1F0: {[InvStdOfFeatures Value[363]] }
|
||||
000000BDD5BCA080: {[Prior Value[132]] }
|
||||
000000BDD5BCA120: {[EvalClassificationError Value[1]] }
|
||||
000000BDD5BCA260: {[W2 Value[132 x 512]] }
|
||||
000000BDD5BCA440: {[labels Value[132 x *]] }
|
||||
000000BDD5BCA6C0: {[MVNormalizedFeatures Value[363 x *]] }
|
||||
000000BDD5BCAE40: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
|
||||
000000BDD5BCAEE0: {[CrossEntropyWithSoftmax Gradient[1]] }
|
||||
000000BDD5BCAF80: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
|
||||
000000BDD5BCB0C0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
|
||||
000000BDD5BCB160: {[ScaledLogLikelihood Value[132 x 1 x *]] }
|
||||
000000BDD5BCB340: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
|
||||
000000BDD5BCB520: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
|
||||
000000BDD5BCB5C0: {[B2 Gradient[132 x 1]] }
|
||||
000000BDD5BCB700: {[W0*features Value[512 x *]] }
|
||||
000000BDD5BCB7A0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
|
||||
000000BDD5BCB8E0: {[LogOfPrior Value[132]] }
|
||||
000000BDD5BCB980: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
|
||||
000000BDD5BCBAC0: {[B2 Value[132 x 1]] }
|
||||
000000BDD5BCBB60: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
000000BDD5BCBC00: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
|
||||
000000BDD5BCBCA0: {[W2*H1 Gradient[132 x 1 x *]] }
|
||||
{ W0*features+B0 : [512 x 1 x *] (gradient)
|
||||
W1*H1 : [512 x 1 x *] }
|
||||
{ W0 : [512 x 363] (gradient)
|
||||
W0*features+B0 : [512 x 1 x *] }
|
||||
{ H1 : [512 x 1 x *]
|
||||
W0*features : [512 x *] (gradient) }
|
||||
{ W1 : [512 x 512] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] }
|
||||
{ H2 : [512 x 1 x *]
|
||||
W1*H1 : [512 x 1 x *] (gradient) }
|
||||
{ HLast : [132 x 1 x *]
|
||||
W2 : [132 x 512] (gradient) }
|
||||
{ B0 : [512 x 1] (gradient)
|
||||
H1 : [512 x 1 x *] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] (gradient)
|
||||
W2*H1 : [132 x 1 x *] }
|
||||
{ B1 : [512 x 1] (gradient)
|
||||
H2 : [512 x 1 x *] (gradient)
|
||||
HLast : [132 x 1 x *] (gradient) }
|
||||
|
||||
|
||||
05/03/2016 13:22:23: Precomputing --> 3 PreCompute nodes found.
|
||||
08/16/2016 03:20:12: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
|
||||
|
||||
05/03/2016 13:22:23: MeanOfFeatures = Mean()
|
||||
05/03/2016 13:22:23: InvStdOfFeatures = InvStdDev()
|
||||
05/03/2016 13:22:23: Prior = Mean()
|
||||
08/16/2016 03:20:12: Node 'B0' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 03:20:12: Node 'B1' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 03:20:12: Node 'B2' (LearnableParameter operation) : [132 x 1]
|
||||
08/16/2016 03:20:12: Node 'W0' (LearnableParameter operation) : [512 x 363]
|
||||
08/16/2016 03:20:12: Node 'W1' (LearnableParameter operation) : [512 x 512]
|
||||
08/16/2016 03:20:12: Node 'W2' (LearnableParameter operation) : [132 x 512]
|
||||
|
||||
|
||||
08/16/2016 03:20:12: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 03:20:12: MeanOfFeatures = Mean()
|
||||
08/16/2016 03:20:12: InvStdOfFeatures = InvStdDev()
|
||||
08/16/2016 03:20:12: Prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
05/03/2016 13:22:24: Precomputing --> Completed.
|
||||
08/16/2016 03:20:15: Precomputing --> Completed.
|
||||
|
||||
|
||||
05/03/2016 13:22:24: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
08/16/2016 03:20:15: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
05/03/2016 13:22:24: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
05/03/2016 13:22:25: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.48531419 * 2048; EvalClassificationError = 0.90722656 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.288909s
|
||||
05/03/2016 13:22:25: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
|
||||
05/03/2016 13:22:25: CNTKCommandTrainEnd: speechTrain
|
||||
08/16/2016 03:20:15: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
08/16/2016 03:20:15: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.46427900 * 2048; EvalErrorPrediction = 0.91259766 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.28059s
|
||||
08/16/2016 03:20:15: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_cpu/Models/cntkSpeechFF.dnn'
|
||||
08/16/2016 03:20:15: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
05/03/2016 13:22:25: Action "train" complete.
|
||||
08/16/2016 03:20:15: Action "train" complete.
|
||||
|
||||
05/03/2016 13:22:25: __COMPLETED__
|
||||
08/16/2016 03:20:15: __COMPLETED__
|
||||
~MPIWrapper
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1,18 +1,24 @@
|
|||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
|
||||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 268381192 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: May 3 2016 13:15:46
|
||||
Last modified date: Tue Apr 26 23:35:31 2016
|
||||
Built time: Aug 16 2016 03:09:16
|
||||
Last modified date: Fri Aug 12 05:28:23 2016
|
||||
Build type: Release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: no
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
CUB_PATH: c:\src\cub-1.4.1
|
||||
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
Build Branch: HEAD
|
||||
Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
|
||||
Built by svcphil on cntk-muc01
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by svcphil on Philly-Pool1
|
||||
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
|
@ -25,31 +31,39 @@ ping [requestnodes (after change)]: all 1 nodes responded
|
|||
mpihelper: only one MPI process: MPI operation will be boring
|
||||
ping [mpihelper]: 1 nodes pinging each other
|
||||
ping [mpihelper]: all 1 nodes responded
|
||||
05/03/2016 13:22:25: -------------------------------------------------------------------
|
||||
05/03/2016 13:22:25: Build info:
|
||||
08/16/2016 03:20:17: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:17: Build info:
|
||||
|
||||
05/03/2016 13:22:25: Built time: May 3 2016 13:15:46
|
||||
05/03/2016 13:22:25: Last modified date: Tue Apr 26 23:35:31 2016
|
||||
05/03/2016 13:22:25: Build type: Release
|
||||
05/03/2016 13:22:25: Build target: GPU
|
||||
05/03/2016 13:22:25: With 1bit-SGD: no
|
||||
05/03/2016 13:22:25: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
05/03/2016 13:22:25: CUB_PATH: c:\src\cub-1.4.1
|
||||
05/03/2016 13:22:25: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
05/03/2016 13:22:25: Build Branch: HEAD
|
||||
05/03/2016 13:22:25: Build SHA1: af96f7cce6c3c78a4f1e9315e061291c79360e12
|
||||
05/03/2016 13:22:25: Built by svcphil on cntk-muc01
|
||||
05/03/2016 13:22:25: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
05/03/2016 13:22:25: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:17: Built time: Aug 16 2016 03:09:16
|
||||
08/16/2016 03:20:17: Last modified date: Fri Aug 12 05:28:23 2016
|
||||
08/16/2016 03:20:17: Build type: Release
|
||||
08/16/2016 03:20:17: Build target: GPU
|
||||
08/16/2016 03:20:17: With 1bit-SGD: yes
|
||||
08/16/2016 03:20:17: Math lib: mkl
|
||||
08/16/2016 03:20:17: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
08/16/2016 03:20:17: CUB_PATH: c:\src\cub-1.4.1
|
||||
08/16/2016 03:20:17: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
08/16/2016 03:20:17: Build Branch: HEAD
|
||||
08/16/2016 03:20:17: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 03:20:17: Built by svcphil on Philly-Pool1
|
||||
08/16/2016 03:20:17: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
08/16/2016 03:20:17: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:19: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:19: GPU info:
|
||||
|
||||
05/03/2016 13:22:25: Running on DPHAIM-22 at 2016/05/03 13:22:25
|
||||
05/03/2016 13:22:25: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]
|
||||
08/16/2016 03:20:19: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:19: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:19: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:19: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 03:20:19: Running on DPHAIM-25 at 2016/08/16 03:20:19
|
||||
08/16/2016 03:20:19: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/FeedForward.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
|
||||
|
||||
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
05/03/2016 13:22:25: RootDir = ".."
|
||||
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:19: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
|
@ -65,7 +79,7 @@ speechTrain = [
|
|||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -111,35 +125,36 @@ speechTrain = [
|
|||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
05/03/2016 13:22:25: RootDir = ".."
|
||||
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:19: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = "1"
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn"
|
||||
parallelTrain = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -185,36 +200,37 @@ speechTrain = [
|
|||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=2048]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
05/03/2016 13:22:25: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:19: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: FeedForward.cntk:command=speechTrain
|
||||
configparameters: FeedForward.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
configparameters: FeedForward.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: FeedForward.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: FeedForward.cntk:deviceId=0
|
||||
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models
|
||||
configparameters: FeedForward.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
configparameters: FeedForward.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:parallelTrain=true
|
||||
configparameters: FeedForward.cntk:precision=float
|
||||
configparameters: FeedForward.cntk:RootDir=..
|
||||
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu
|
||||
configparameters: FeedForward.cntk:speechTrain=[
|
||||
action = "train"
|
||||
SimpleNetworkBuilder = [
|
||||
layerSizes = 363:512:512:132
|
||||
trainingCriterion = "CrossEntropyWithSoftmax"
|
||||
evalCriterion = "ClassificationError"
|
||||
evalCriterion = "ErrorPrediction"
|
||||
layerTypes = "Sigmoid"
|
||||
applyMeanVarNorm = true
|
||||
needPrior = true
|
||||
|
@ -258,24 +274,24 @@ configparameters: FeedForward.cntk:speechTrain=[
|
|||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=2048]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: FeedForward.cntk:timestamping=true
|
||||
configparameters: FeedForward.cntk:traceLevel=1
|
||||
05/03/2016 13:22:25: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
05/03/2016 13:22:25: Commands: speechTrain
|
||||
05/03/2016 13:22:25: Precision = "float"
|
||||
05/03/2016 13:22:25: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
05/03/2016 13:22:25: CNTKCommandTrainInfo: speechTrain : 1
|
||||
05/03/2016 13:22:25: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
08/16/2016 03:20:19: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:19: Commands: speechTrain
|
||||
08/16/2016 03:20:19: Precision = "float"
|
||||
08/16/2016 03:20:19: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn
|
||||
08/16/2016 03:20:19: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 03:20:19: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
05/03/2016 13:22:25: ##############################################################################
|
||||
05/03/2016 13:22:25: # #
|
||||
05/03/2016 13:22:25: # Action "train" #
|
||||
05/03/2016 13:22:25: # #
|
||||
05/03/2016 13:22:25: ##############################################################################
|
||||
08/16/2016 03:20:19: ##############################################################################
|
||||
08/16/2016 03:20:19: # #
|
||||
08/16/2016 03:20:19: # Action "train" #
|
||||
08/16/2016 03:20:19: # #
|
||||
08/16/2016 03:20:19: ##############################################################################
|
||||
|
||||
05/03/2016 13:22:25: CNTKCommandTrainBegin: speechTrain
|
||||
08/16/2016 03:20:19: CNTKCommandTrainBegin: speechTrain
|
||||
SimpleNetworkBuilder Using GPU 0
|
||||
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
|
||||
|
@ -284,14 +300,26 @@ htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Example
|
|||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
|
||||
05/03/2016 13:22:25: Creating virgin network.
|
||||
08/16/2016 03:20:19: Creating virgin network.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- 0.000000.
|
||||
Node 'W0' (LearnableParameter operation): Initializing Parameter[512 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 1, sizeof(ElemType)==4
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B0' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- 0.000000.
|
||||
Node 'W1' (LearnableParameter operation): Initializing Parameter[512 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'B1' (LearnableParameter operation): Initializing Parameter[512 x 1] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- 0.000000.
|
||||
Node 'W2' (LearnableParameter operation): Initializing Parameter[132 x 512] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'B2' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
7 roots:
|
||||
CrossEntropyWithSoftmax = CrossEntropyWithSoftmax()
|
||||
EvalClassificationError = ClassificationError()
|
||||
EvalErrorPrediction = ErrorPrediction()
|
||||
InvStdOfFeatures = InvStdDev()
|
||||
MeanOfFeatures = Mean()
|
||||
PosteriorProb = Softmax()
|
||||
|
@ -320,7 +348,7 @@ Validating --> W2*H1 = Times (W2, H2) : [132 x 512], [512 x 1 x *] -> [132 x 1 x
|
|||
Validating --> B2 = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> HLast = Plus (W2*H1, B2) : [132 x 1 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> CrossEntropyWithSoftmax = CrossEntropyWithSoftmax (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalClassificationError = ClassificationError (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> EvalErrorPrediction = ErrorPrediction (labels, HLast) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> PosteriorProb = Softmax (HLast) : [132 x 1 x *] -> [132 x 1 x *]
|
||||
Validating --> Prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> LogOfPrior = Log (Prior) : [132] -> [132]
|
||||
|
@ -337,70 +365,70 @@ Validating network, final pass.
|
|||
|
||||
Post-processing network complete.
|
||||
|
||||
05/03/2016 13:22:26: Created model with 25 nodes on GPU 0.
|
||||
08/16/2016 03:20:20: Created model with 25 nodes on GPU 0.
|
||||
|
||||
05/03/2016 13:22:26: Training criterion node(s):
|
||||
05/03/2016 13:22:26: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
08/16/2016 03:20:20: Training criterion node(s):
|
||||
08/16/2016 03:20:20: CrossEntropyWithSoftmax = CrossEntropyWithSoftmax
|
||||
|
||||
05/03/2016 13:22:26: Evaluation criterion node(s):
|
||||
|
||||
05/03/2016 13:22:26: EvalClassificationError = ClassificationError
|
||||
08/16/2016 03:20:20: Evaluation criterion node(s):
|
||||
08/16/2016 03:20:20: EvalErrorPrediction = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing Structure:
|
||||
Memory Sharing: Out of 40 matrices, 19 are shared as 8, and 21 are not shared.
|
||||
|
||||
0000000000000000: {[EvalClassificationError Gradient[1]] [InvStdOfFeatures Gradient[363]] [LogOfPrior Gradient[132]] [MVNormalizedFeatures Gradient[363 x *]] [MeanOfFeatures Gradient[363]] [PosteriorProb Gradient[132 x 1 x *]] [PosteriorProb Value[132 x 1 x *]] [Prior Gradient[132]] [ScaledLogLikelihood Gradient[132 x 1 x *]] [features Gradient[363 x *]] [labels Gradient[132 x *]] }
|
||||
00000087D360C610: {[features Value[363 x *]] }
|
||||
00000087EB4FEEF0: {[W0 Value[512 x 363]] }
|
||||
00000087EB4FF530: {[B1 Value[512 x 1]] }
|
||||
00000087EB4FF850: {[W1 Value[512 x 512]] }
|
||||
00000087EB4FFC10: {[W2 Value[132 x 512]] }
|
||||
00000087EB500070: {[B2 Value[132 x 1]] }
|
||||
00000087EB5001B0: {[MeanOfFeatures Value[363]] }
|
||||
00000087EB500250: {[InvStdOfFeatures Value[363]] }
|
||||
00000087EB5004D0: {[B0 Value[512 x 1]] }
|
||||
00000087EDA2B150: {[labels Value[132 x *]] }
|
||||
00000087EDA2B330: {[B1 Gradient[512 x 1]] [H2 Gradient[512 x 1 x *]] [HLast Gradient[132 x 1 x *]] }
|
||||
00000087EDA2B3D0: {[Prior Value[132]] }
|
||||
00000087EDA2B6F0: {[HLast Value[132 x 1 x *]] [W2 Gradient[132 x 512]] }
|
||||
00000087EDA2B8D0: {[W0 Gradient[512 x 363]] [W0*features+B0 Value[512 x 1 x *]] }
|
||||
00000087EDA2BB50: {[CrossEntropyWithSoftmax Value[1]] }
|
||||
00000087EDA2BC90: {[W0*features+B0 Gradient[512 x 1 x *]] [W1*H1 Value[512 x 1 x *]] }
|
||||
00000087EDA2C0F0: {[EvalClassificationError Value[1]] }
|
||||
00000087EDA2C190: {[W0*features Value[512 x *]] }
|
||||
00000087EDA2C2D0: {[H1 Value[512 x 1 x *]] [W0*features Gradient[512 x *]] }
|
||||
00000087EDA2C370: {[W2*H1 Gradient[132 x 1 x *]] }
|
||||
00000087EDA2C410: {[B2 Gradient[132 x 1]] }
|
||||
00000087EDA2C730: {[ScaledLogLikelihood Value[132 x 1 x *]] }
|
||||
00000087EDA2C7D0: {[LogOfPrior Value[132]] }
|
||||
00000087EDA2CAF0: {[MVNormalizedFeatures Value[363 x *]] }
|
||||
00000087EDA2CB90: {[H2 Value[512 x 1 x *]] [W1*H1 Gradient[512 x 1 x *]] }
|
||||
00000087EDA2CCD0: {[B0 Gradient[512 x 1]] [H1 Gradient[512 x 1 x *]] [W1*H1+B1 Gradient[512 x 1 x *]] [W2*H1 Value[132 x 1 x *]] }
|
||||
00000087EDA2CEB0: {[CrossEntropyWithSoftmax Gradient[1]] }
|
||||
00000087EDA2CFF0: {[W1 Gradient[512 x 512]] [W1*H1+B1 Value[512 x 1 x *]] }
|
||||
{ W0*features+B0 : [512 x 1 x *] (gradient)
|
||||
W1*H1 : [512 x 1 x *] }
|
||||
{ H2 : [512 x 1 x *]
|
||||
W1*H1 : [512 x 1 x *] (gradient) }
|
||||
{ HLast : [132 x 1 x *]
|
||||
W2 : [132 x 512] (gradient) }
|
||||
{ W0 : [512 x 363] (gradient)
|
||||
W0*features+B0 : [512 x 1 x *] }
|
||||
{ B0 : [512 x 1] (gradient)
|
||||
H1 : [512 x 1 x *] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] (gradient)
|
||||
W2*H1 : [132 x 1 x *] }
|
||||
{ H1 : [512 x 1 x *]
|
||||
W0*features : [512 x *] (gradient) }
|
||||
{ W1 : [512 x 512] (gradient)
|
||||
W1*H1+B1 : [512 x 1 x *] }
|
||||
{ B1 : [512 x 1] (gradient)
|
||||
H2 : [512 x 1 x *] (gradient)
|
||||
HLast : [132 x 1 x *] (gradient) }
|
||||
|
||||
|
||||
05/03/2016 13:22:26: Precomputing --> 3 PreCompute nodes found.
|
||||
08/16/2016 03:20:20: Training 516740 parameters in 6 out of 6 parameter tensors and 15 nodes with gradient:
|
||||
|
||||
05/03/2016 13:22:26: MeanOfFeatures = Mean()
|
||||
05/03/2016 13:22:26: InvStdOfFeatures = InvStdDev()
|
||||
05/03/2016 13:22:26: Prior = Mean()
|
||||
08/16/2016 03:20:20: Node 'B0' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 03:20:20: Node 'B1' (LearnableParameter operation) : [512 x 1]
|
||||
08/16/2016 03:20:20: Node 'B2' (LearnableParameter operation) : [132 x 1]
|
||||
08/16/2016 03:20:20: Node 'W0' (LearnableParameter operation) : [512 x 363]
|
||||
08/16/2016 03:20:20: Node 'W1' (LearnableParameter operation) : [512 x 512]
|
||||
08/16/2016 03:20:20: Node 'W2' (LearnableParameter operation) : [132 x 512]
|
||||
|
||||
|
||||
08/16/2016 03:20:20: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 03:20:20: MeanOfFeatures = Mean()
|
||||
08/16/2016 03:20:20: InvStdOfFeatures = InvStdDev()
|
||||
08/16/2016 03:20:20: Prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
05/03/2016 13:22:27: Precomputing --> Completed.
|
||||
08/16/2016 03:20:21: Precomputing --> Completed.
|
||||
|
||||
|
||||
05/03/2016 13:22:27: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
08/16/2016 03:20:21: Starting Epoch 1: learning rate per sample = 0.003906 effective momentum = 0.900000 momentum as time constant = 2429.8 samples
|
||||
minibatchiterator: epoch 0: frames [0..2048] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
05/03/2016 13:22:27: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
05/03/2016 13:22:27: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.42832291 * 2048; EvalClassificationError = 0.91357422 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.052947s
|
||||
05/03/2016 13:22:27: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160503132211.330996\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
|
||||
05/03/2016 13:22:27: CNTKCommandTrainEnd: speechTrain
|
||||
08/16/2016 03:20:21: Starting minibatch loop, DataParallelSGD training (MyRank = 0, NumNodes = 1, NumGradientBits = 1), distributed reading is ENABLED.
|
||||
08/16/2016 03:20:21: Finished Epoch[ 1 of 1]: [Training] CrossEntropyWithSoftmax = 4.41144794 * 2048; EvalErrorPrediction = 0.92773438 * 2048; totalSamplesSeen = 2048; learningRatePerSample = 0.00390625; epochTime=0.05551s
|
||||
08/16/2016 03:20:21: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_FeedForward@release_gpu/Models/cntkSpeechFF.dnn'
|
||||
08/16/2016 03:20:21: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
05/03/2016 13:22:27: Action "train" complete.
|
||||
08/16/2016 03:20:21: Action "train" complete.
|
||||
|
||||
05/03/2016 13:22:27: __COMPLETED__
|
||||
08/16/2016 03:20:21: __COMPLETED__
|
||||
~MPIWrapper
|
|
@ -5,5 +5,5 @@
|
|||
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]]" || exit $?
|
||||
cntkrun FeedForward.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=2048]] speechTrain=[reader=[useMersenneTwisterRand=true]]" || exit $?
|
||||
|
||||
|
|
|
@ -0,0 +1,682 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 264172964 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 09:41:57
|
||||
Last modified date: Mon Aug 15 23:39:17 2016
|
||||
Build type: release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: /usr/local/cuda-7.5
|
||||
CUB_PATH: /usr/local/cub-1.4.1
|
||||
CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by philly on 643085f7f8c2
|
||||
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
08/16/2016 10:01:47: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:47: Build info:
|
||||
|
||||
08/16/2016 10:01:47: Built time: Aug 16 2016 09:41:57
|
||||
08/16/2016 10:01:47: Last modified date: Mon Aug 15 23:39:17 2016
|
||||
08/16/2016 10:01:47: Build type: release
|
||||
08/16/2016 10:01:47: Build target: GPU
|
||||
08/16/2016 10:01:47: With 1bit-SGD: yes
|
||||
08/16/2016 10:01:47: Math lib: mkl
|
||||
08/16/2016 10:01:47: CUDA_PATH: /usr/local/cuda-7.5
|
||||
08/16/2016 10:01:47: CUB_PATH: /usr/local/cub-1.4.1
|
||||
08/16/2016 10:01:47: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
08/16/2016 10:01:47: Build Branch: HEAD
|
||||
08/16/2016 10:01:47: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 10:01:47: Built by philly on 643085f7f8c2
|
||||
08/16/2016 10:01:47: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
08/16/2016 10:01:47: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:47: -------------------------------------------------------------------
|
||||
08/16/2016 10:01:47: GPU info:
|
||||
|
||||
08/16/2016 10:01:47: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:47: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:47: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:47: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:01:47: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 10:01:47: Running on localhost at 2016/08/16 10:01:47
|
||||
08/16/2016 10:01:47: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
|
||||
|
||||
|
||||
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:47: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:01:47: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:01:47: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: LSTM-NDL.cntk:command=speechTrain
|
||||
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: LSTM-NDL.cntk:deviceId=-1
|
||||
configparameters: LSTM-NDL.cntk:frameMode=false
|
||||
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models
|
||||
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
|
||||
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
configparameters: LSTM-NDL.cntk:parallelTrain=false
|
||||
configparameters: LSTM-NDL.cntk:precision=float
|
||||
configparameters: LSTM-NDL.cntk:RootDir=..
|
||||
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu
|
||||
configparameters: LSTM-NDL.cntk:speechTrain=[
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: LSTM-NDL.cntk:timestamping=true
|
||||
configparameters: LSTM-NDL.cntk:traceLevel=1
|
||||
configparameters: LSTM-NDL.cntk:truncated=true
|
||||
08/16/2016 10:01:47: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:01:47: Commands: speechTrain
|
||||
08/16/2016 10:01:47: Precision = "float"
|
||||
08/16/2016 10:01:47: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
|
||||
08/16/2016 10:01:47: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 10:01:47: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 10:01:47: ##############################################################################
|
||||
08/16/2016 10:01:47: # #
|
||||
08/16/2016 10:01:47: # Action "train" #
|
||||
08/16/2016 10:01:47: # #
|
||||
08/16/2016 10:01:47: ##############################################################################
|
||||
|
||||
08/16/2016 10:01:47: CNTKCommandTrainBegin: speechTrain
|
||||
NDLBuilder Using CPU
|
||||
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
|
||||
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
|
||||
08/16/2016 10:01:48: Creating virgin network.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
6 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ErrorPrediction()
|
||||
featNorm.xMean = Mean()
|
||||
featNorm.xStdDev = InvStdDev()
|
||||
logPrior.prior = Mean()
|
||||
scaledLogLikelihood = Minus()
|
||||
|
||||
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
|
||||
|
||||
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
|
||||
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
|
||||
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
|
||||
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
|
||||
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
|
||||
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
|
||||
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
|
||||
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
|
||||
|
||||
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
|
||||
|
||||
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
|
||||
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
|
||||
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
|
||||
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
|
||||
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
|
||||
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
|
||||
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
|
||||
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
|
||||
|
||||
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
|
||||
|
||||
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
|
||||
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
|
||||
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
|
||||
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
|
||||
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
|
||||
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
|
||||
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
|
||||
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
|
||||
|
||||
Validating network. 113 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W = LearnableParameter() : -> [132 x 0]
|
||||
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
|
||||
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
|
||||
Validating --> b = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
|
||||
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 88 nodes to process in pass 2.
|
||||
|
||||
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
|
||||
Validating network. 15 nodes to process in pass 3.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
29 out of 113 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 10:01:48: Created model with 113 nodes on CPU.
|
||||
|
||||
08/16/2016 10:01:48: Training criterion node(s):
|
||||
08/16/2016 10:01:48: ce = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 10:01:48: Evaluation criterion node(s):
|
||||
08/16/2016 10:01:48: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
|
||||
|
||||
{ LSTMoutput1.dh : [512 x 1 x *]
|
||||
LSTMoutput1.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput2.Wco : [1024] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput2.wxx : [4096 x *] }
|
||||
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wci : [1024] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wcf : [1024] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.b : [4096 x 1] (gradient)
|
||||
LSTMoutput1.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput3.wxx : [4096 x *] }
|
||||
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wci : [1024] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wcf : [1024] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.b : [4096 x 1] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
|
||||
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
|
||||
unnamed193 : [132 x *] }
|
||||
{ LSTMoutputW : [132 x 1 x *]
|
||||
W : [132 x 512 x 1] (gradient) }
|
||||
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *]
|
||||
LSTMoutput2.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.wx : [4096 x 363] (gradient)
|
||||
LSTMoutput1.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *]
|
||||
LSTMoutput3.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutputW : [132 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
|
||||
unnamed193 : [132 x *] (gradient) }
|
||||
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.b : [4096 x 1] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wco : [1024] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] }
|
||||
|
||||
|
||||
08/16/2016 10:01:48: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
|
||||
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:01:48: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 10:01:48: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
|
||||
08/16/2016 10:01:48: Node 'b' (LearnableParameter operation) : [132 x 1]
|
||||
|
||||
|
||||
08/16/2016 10:01:48: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 10:01:48: featNorm.xMean = Mean()
|
||||
08/16/2016 10:01:48: featNorm.xStdDev = InvStdDev()
|
||||
08/16/2016 10:01:48: logPrior.prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 10:01:49: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 10:01:50: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
|
||||
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 10:01:50: Starting minibatch loop.
|
||||
08/16/2016 10:01:53: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87313957 * 160; err = 0.90625000 * 160; time = 3.3910s; samplesPerSecond = 47.2
|
||||
08/16/2016 10:01:56: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84521751 * 160; err = 0.69375000 * 160; time = 2.9626s; samplesPerSecond = 54.0
|
||||
08/16/2016 10:01:58: Finished Epoch[ 1 of 1]: [Training] ce = 4.85644356 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=8.39953s
|
||||
08/16/2016 10:01:59: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
|
||||
08/16/2016 10:01:59: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 10:01:59: Action "train" complete.
|
||||
|
||||
08/16/2016 10:01:59: __COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -0,0 +1,683 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 264172964 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 09:41:57
|
||||
Last modified date: Mon Aug 15 23:39:17 2016
|
||||
Build type: release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: /usr/local/cuda-7.5
|
||||
CUB_PATH: /usr/local/cub-1.4.1
|
||||
CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by philly on 643085f7f8c2
|
||||
Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
08/16/2016 10:02:00: -------------------------------------------------------------------
|
||||
08/16/2016 10:02:00: Build info:
|
||||
|
||||
08/16/2016 10:02:00: Built time: Aug 16 2016 09:41:57
|
||||
08/16/2016 10:02:00: Last modified date: Mon Aug 15 23:39:17 2016
|
||||
08/16/2016 10:02:00: Build type: release
|
||||
08/16/2016 10:02:00: Build target: GPU
|
||||
08/16/2016 10:02:00: With 1bit-SGD: yes
|
||||
08/16/2016 10:02:00: Math lib: mkl
|
||||
08/16/2016 10:02:00: CUDA_PATH: /usr/local/cuda-7.5
|
||||
08/16/2016 10:02:00: CUB_PATH: /usr/local/cub-1.4.1
|
||||
08/16/2016 10:02:00: CUDNN_PATH: /usr/local/cudnn-4.0
|
||||
08/16/2016 10:02:00: Build Branch: HEAD
|
||||
08/16/2016 10:02:00: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 10:02:00: Built by philly on 643085f7f8c2
|
||||
08/16/2016 10:02:00: Build Path: /home/philly/jenkins/workspace/CNTK-Build-Linux
|
||||
08/16/2016 10:02:00: -------------------------------------------------------------------
|
||||
08/16/2016 10:02:01: -------------------------------------------------------------------
|
||||
08/16/2016 10:02:01: GPU info:
|
||||
|
||||
08/16/2016 10:02:01: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:02:01: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:02:01: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:02:01: Device[3]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3071 MB
|
||||
08/16/2016 10:02:01: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 10:02:01: Running on localhost at 2016/08/16 10:02:01
|
||||
08/16/2016 10:02:01: Command line:
|
||||
/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/build/1bitsgd/release/bin/cntk configFile=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/LSTM-NDL.cntk currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
|
||||
|
||||
|
||||
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:02:01: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 10:02:01: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 10:02:01: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: LSTM-NDL.cntk:command=speechTrain
|
||||
configparameters: LSTM-NDL.cntk:ConfigDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config
|
||||
configparameters: LSTM-NDL.cntk:currentDirectory=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: LSTM-NDL.cntk:DataDir=/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data
|
||||
configparameters: LSTM-NDL.cntk:deviceId=0
|
||||
configparameters: LSTM-NDL.cntk:frameMode=false
|
||||
configparameters: LSTM-NDL.cntk:ModelDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models
|
||||
configparameters: LSTM-NDL.cntk:modelPath=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
|
||||
configparameters: LSTM-NDL.cntk:OutputDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
configparameters: LSTM-NDL.cntk:parallelTrain=false
|
||||
configparameters: LSTM-NDL.cntk:precision=float
|
||||
configparameters: LSTM-NDL.cntk:RootDir=..
|
||||
configparameters: LSTM-NDL.cntk:RunDir=/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu
|
||||
configparameters: LSTM-NDL.cntk:speechTrain=[
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Tests/EndToEndTests/Examples/Speech/AN4/LSTM/../../../../../../Examples/Speech/AN4/Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf"
|
||||
labelMappingFile = "/home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: LSTM-NDL.cntk:timestamping=true
|
||||
configparameters: LSTM-NDL.cntk:traceLevel=1
|
||||
configparameters: LSTM-NDL.cntk:truncated=true
|
||||
08/16/2016 10:02:01: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 10:02:01: Commands: speechTrain
|
||||
08/16/2016 10:02:01: Precision = "float"
|
||||
08/16/2016 10:02:01: CNTKModelPath: /tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
|
||||
08/16/2016 10:02:01: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 10:02:01: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 10:02:01: ##############################################################################
|
||||
08/16/2016 10:02:01: # #
|
||||
08/16/2016 10:02:01: # Action "train" #
|
||||
08/16/2016 10:02:01: # #
|
||||
08/16/2016 10:02:01: ##############################################################################
|
||||
|
||||
08/16/2016 10:02:01: CNTKCommandTrainBegin: speechTrain
|
||||
NDLBuilder Using GPU 0
|
||||
reading script file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/state.list
|
||||
htkmlfreader: reading MLF file /home/philly/jenkins/workspace/CNTK-Test-Linux-W1/Examples/Speech/AN4/Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
|
||||
08/16/2016 10:02:01: Creating virgin network.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
6 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ErrorPrediction()
|
||||
featNorm.xMean = Mean()
|
||||
featNorm.xStdDev = InvStdDev()
|
||||
logPrior.prior = Mean()
|
||||
scaledLogLikelihood = Minus()
|
||||
|
||||
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
|
||||
|
||||
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
|
||||
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
|
||||
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
|
||||
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
|
||||
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
|
||||
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
|
||||
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
|
||||
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
|
||||
|
||||
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
|
||||
|
||||
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
|
||||
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
|
||||
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
|
||||
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
|
||||
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
|
||||
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
|
||||
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
|
||||
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
|
||||
|
||||
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
|
||||
|
||||
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
|
||||
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
|
||||
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
|
||||
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
|
||||
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
|
||||
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
|
||||
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
|
||||
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
|
||||
|
||||
Validating network. 113 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W = LearnableParameter() : -> [132 x 0]
|
||||
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
|
||||
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
|
||||
Validating --> b = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
|
||||
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 88 nodes to process in pass 2.
|
||||
|
||||
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
|
||||
Validating network. 15 nodes to process in pass 3.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
29 out of 113 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 10:02:01: Created model with 113 nodes on GPU 0.
|
||||
|
||||
08/16/2016 10:02:01: Training criterion node(s):
|
||||
08/16/2016 10:02:01: ce = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 10:02:01: Evaluation criterion node(s):
|
||||
08/16/2016 10:02:01: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
|
||||
|
||||
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *]
|
||||
LSTMoutput3.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput2.Wco : [1024] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.wx : [4096 x 363] (gradient)
|
||||
LSTMoutput1.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput2.wxx : [4096 x *] }
|
||||
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wci : [1024] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wcf : [1024] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.b : [4096 x 1] (gradient)
|
||||
LSTMoutput1.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput3.wxx : [4096 x *] }
|
||||
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wci : [1024] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wcf : [1024] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.b : [4096 x 1] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
|
||||
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
|
||||
unnamed193 : [132 x *] }
|
||||
{ LSTMoutputW : [132 x 1 x *]
|
||||
W : [132 x 512 x 1] (gradient) }
|
||||
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutputW : [132 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
|
||||
unnamed193 : [132 x *] (gradient) }
|
||||
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.b : [4096 x 1] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.dh : [512 x 1 x *]
|
||||
LSTMoutput1.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *]
|
||||
LSTMoutput2.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.Wco : [1024] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] }
|
||||
|
||||
|
||||
08/16/2016 10:02:01: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
|
||||
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 10:02:01: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 10:02:01: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
|
||||
08/16/2016 10:02:01: Node 'b' (LearnableParameter operation) : [132 x 1]
|
||||
|
||||
|
||||
08/16/2016 10:02:01: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 10:02:01: featNorm.xMean = Mean()
|
||||
08/16/2016 10:02:01: featNorm.xStdDev = InvStdDev()
|
||||
08/16/2016 10:02:01: logPrior.prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 10:02:02: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 10:02:02: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
|
||||
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 10:02:03: Starting minibatch loop.
|
||||
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 0.5069s; samplesPerSecond = 315.6
|
||||
08/16/2016 10:02:03: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 0.4852s; samplesPerSecond = 329.8
|
||||
08/16/2016 10:02:04: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=1.33633s
|
||||
08/16/2016 10:02:04: SGD: Saving checkpoint model '/tmp/cntk-test-20160816100054.995555/Examples/Speech/AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
|
||||
08/16/2016 10:02:05: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 10:02:05: Action "train" complete.
|
||||
|
||||
08/16/2016 10:02:05: __COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -1 +0,0 @@
|
|||
__COMPLETED__
|
|
@ -0,0 +1,681 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 268381192 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 03:09:16
|
||||
Last modified date: Fri Aug 12 05:28:23 2016
|
||||
Build type: Release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
CUB_PATH: c:\src\cub-1.4.1
|
||||
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by svcphil on Philly-Pool1
|
||||
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
08/16/2016 03:20:22: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:22: Build info:
|
||||
|
||||
08/16/2016 03:20:22: Built time: Aug 16 2016 03:09:16
|
||||
08/16/2016 03:20:22: Last modified date: Fri Aug 12 05:28:23 2016
|
||||
08/16/2016 03:20:22: Build type: Release
|
||||
08/16/2016 03:20:22: Build target: GPU
|
||||
08/16/2016 03:20:22: With 1bit-SGD: yes
|
||||
08/16/2016 03:20:22: Math lib: mkl
|
||||
08/16/2016 03:20:22: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
08/16/2016 03:20:22: CUB_PATH: c:\src\cub-1.4.1
|
||||
08/16/2016 03:20:22: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
08/16/2016 03:20:22: Build Branch: HEAD
|
||||
08/16/2016 03:20:22: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 03:20:22: Built by svcphil on Philly-Pool1
|
||||
08/16/2016 03:20:22: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
08/16/2016 03:20:22: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:23: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:23: GPU info:
|
||||
|
||||
08/16/2016 03:20:23: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:23: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:23: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:23: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 03:20:23: Running on DPHAIM-25 at 2016/08/16 03:20:23
|
||||
08/16/2016 03:20:23: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu DeviceId=-1 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
|
||||
|
||||
|
||||
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:23: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:23: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
|
||||
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
DeviceId=-1
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 03:20:23: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: LSTM-NDL.cntk:command=speechTrain
|
||||
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: LSTM-NDL.cntk:deviceId=-1
|
||||
configparameters: LSTM-NDL.cntk:frameMode=false
|
||||
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models
|
||||
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
|
||||
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
configparameters: LSTM-NDL.cntk:parallelTrain=false
|
||||
configparameters: LSTM-NDL.cntk:precision=float
|
||||
configparameters: LSTM-NDL.cntk:RootDir=..
|
||||
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu
|
||||
configparameters: LSTM-NDL.cntk:speechTrain=[
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
|
||||
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: LSTM-NDL.cntk:timestamping=true
|
||||
configparameters: LSTM-NDL.cntk:traceLevel=1
|
||||
configparameters: LSTM-NDL.cntk:truncated=true
|
||||
08/16/2016 03:20:23: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:23: Commands: speechTrain
|
||||
08/16/2016 03:20:23: Precision = "float"
|
||||
08/16/2016 03:20:23: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn
|
||||
08/16/2016 03:20:23: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 03:20:23: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 03:20:23: ##############################################################################
|
||||
08/16/2016 03:20:23: # #
|
||||
08/16/2016 03:20:23: # Action "train" #
|
||||
08/16/2016 03:20:23: # #
|
||||
08/16/2016 03:20:23: ##############################################################################
|
||||
|
||||
08/16/2016 03:20:23: CNTKCommandTrainBegin: speechTrain
|
||||
NDLBuilder Using CPU
|
||||
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
|
||||
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
|
||||
08/16/2016 03:20:24: Creating virgin network.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
6 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ErrorPrediction()
|
||||
featNorm.xMean = Mean()
|
||||
featNorm.xStdDev = InvStdDev()
|
||||
logPrior.prior = Mean()
|
||||
scaledLogLikelihood = Minus()
|
||||
|
||||
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
|
||||
|
||||
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
|
||||
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
|
||||
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
|
||||
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
|
||||
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
|
||||
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
|
||||
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
|
||||
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
|
||||
|
||||
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
|
||||
|
||||
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
|
||||
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
|
||||
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
|
||||
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
|
||||
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
|
||||
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
|
||||
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
|
||||
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
|
||||
|
||||
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
|
||||
|
||||
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
|
||||
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
|
||||
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
|
||||
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
|
||||
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
|
||||
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
|
||||
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
|
||||
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
|
||||
|
||||
Validating network. 113 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W = LearnableParameter() : -> [132 x 0]
|
||||
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
|
||||
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
|
||||
Validating --> b = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
|
||||
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 88 nodes to process in pass 2.
|
||||
|
||||
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
|
||||
Validating network. 15 nodes to process in pass 3.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
29 out of 113 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 03:20:24: Created model with 113 nodes on CPU.
|
||||
|
||||
08/16/2016 03:20:24: Training criterion node(s):
|
||||
08/16/2016 03:20:24: ce = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 03:20:24: Evaluation criterion node(s):
|
||||
08/16/2016 03:20:24: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
|
||||
|
||||
{ LSTMoutput1.dh : [512 x 1 x *]
|
||||
LSTMoutput1.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *]
|
||||
LSTMoutput3.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput2.Wco : [1024] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *]
|
||||
LSTMoutput2.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.Wco : [1024] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wci : [1024] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wcf : [1024] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput2.wxx : [4096 x *] }
|
||||
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wcf : [1024] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.b : [4096 x 1] (gradient)
|
||||
LSTMoutput1.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wci : [1024] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput3.wxx : [4096 x *] }
|
||||
{ LSTMoutput2.b : [4096 x 1] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutputW : [132 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.b : [4096 x 1] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
|
||||
unnamed193 : [132 x *] }
|
||||
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
|
||||
unnamed193 : [132 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutputW : [132 x 1 x *]
|
||||
W : [132 x 512 x 1] (gradient) }
|
||||
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.wx : [4096 x 363] (gradient)
|
||||
LSTMoutput1.wxxpb : [4096 x 1 x *] }
|
||||
|
||||
|
||||
08/16/2016 03:20:24: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
|
||||
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:24: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 03:20:24: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
|
||||
08/16/2016 03:20:24: Node 'b' (LearnableParameter operation) : [132 x 1]
|
||||
|
||||
|
||||
08/16/2016 03:20:24: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 03:20:24: featNorm.xMean = Mean()
|
||||
08/16/2016 03:20:24: featNorm.xStdDev = InvStdDev()
|
||||
08/16/2016 03:20:24: logPrior.prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 03:20:27: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 03:20:28: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
|
||||
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 03:20:28: Starting minibatch loop.
|
||||
08/16/2016 03:20:31: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87950134 * 160; err = 0.90625000 * 160; time = 3.6415s; samplesPerSecond = 43.9
|
||||
08/16/2016 03:20:35: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84555817 * 160; err = 0.69375000 * 160; time = 3.6742s; samplesPerSecond = 43.5
|
||||
08/16/2016 03:20:38: Finished Epoch[ 1 of 1]: [Training] ce = 4.85900003 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=9.76851s
|
||||
08/16/2016 03:20:38: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_cpu/Models/cntkSpeechLSTM.dnn'
|
||||
08/16/2016 03:20:39: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 03:20:39: Action "train" complete.
|
||||
|
||||
08/16/2016 03:20:39: __COMPLETED__
|
|
@ -0,0 +1,682 @@
|
|||
CPU info:
|
||||
CPU Model Name: Intel(R) Xeon(R) CPU E5-2630 v2 @ 2.60GHz
|
||||
Hardware threads: 24
|
||||
Total Memory: 268381192 kB
|
||||
-------------------------------------------------------------------
|
||||
=== Running /cygdrive/c/jenkins/workspace/CNTK-Test-Windows-W1/x64/release/cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
-------------------------------------------------------------------
|
||||
Build info:
|
||||
|
||||
Built time: Aug 16 2016 03:09:16
|
||||
Last modified date: Fri Aug 12 05:28:23 2016
|
||||
Build type: Release
|
||||
Build target: GPU
|
||||
With 1bit-SGD: yes
|
||||
Math lib: mkl
|
||||
CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
CUB_PATH: c:\src\cub-1.4.1
|
||||
CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
Build Branch: HEAD
|
||||
Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
Built by svcphil on Philly-Pool1
|
||||
Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
-------------------------------------------------------------------
|
||||
Changed current directory to C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
08/16/2016 03:20:41: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:41: Build info:
|
||||
|
||||
08/16/2016 03:20:41: Built time: Aug 16 2016 03:09:16
|
||||
08/16/2016 03:20:41: Last modified date: Fri Aug 12 05:28:23 2016
|
||||
08/16/2016 03:20:41: Build type: Release
|
||||
08/16/2016 03:20:41: Build target: GPU
|
||||
08/16/2016 03:20:41: With 1bit-SGD: yes
|
||||
08/16/2016 03:20:41: Math lib: mkl
|
||||
08/16/2016 03:20:41: CUDA_PATH: C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v7.5
|
||||
08/16/2016 03:20:41: CUB_PATH: c:\src\cub-1.4.1
|
||||
08/16/2016 03:20:41: CUDNN_PATH: c:\NVIDIA\cudnn-4.0\cuda
|
||||
08/16/2016 03:20:41: Build Branch: HEAD
|
||||
08/16/2016 03:20:41: Build SHA1: 026b1e772b963461e189f8f00aa7ed6951298f84
|
||||
08/16/2016 03:20:41: Built by svcphil on Philly-Pool1
|
||||
08/16/2016 03:20:41: Build Path: c:\jenkins\workspace\CNTK-Build-Windows\Source\CNTK\
|
||||
08/16/2016 03:20:41: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:43: -------------------------------------------------------------------
|
||||
08/16/2016 03:20:43: GPU info:
|
||||
|
||||
08/16/2016 03:20:43: Device[0]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:43: Device[1]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:43: Device[2]: cores = 2880; computeCapability = 3.5; type = "GeForce GTX 780 Ti"; memory = 3072 MB
|
||||
08/16/2016 03:20:43: -------------------------------------------------------------------
|
||||
|
||||
08/16/2016 03:20:43: Running on DPHAIM-25 at 2016/08/16 03:20:43
|
||||
08/16/2016 03:20:43: Command line:
|
||||
C:\jenkins\workspace\CNTK-Test-Windows-W1\x64\release\cntk.exe configFile=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/LSTM-NDL.cntk currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu DeviceId=0 timestamping=true speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false
|
||||
|
||||
|
||||
|
||||
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG (VARIABLES NOT RESOLVED) >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:43: RootDir = ".."
|
||||
ConfigDir = "$RootDir$/Config"
|
||||
DataDir = "$RootDir$/Data"
|
||||
OutputDir = "$RootDir$/Output"
|
||||
ModelDir = "$OutputDir$/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "$ModelDir$/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "$ConfigDir$/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "$DataDir$/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "$DataDir$/glob_0000.mlf"
|
||||
labelMappingFile = "$DataDir$/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG (VARIABLES NOT RESOLVED) <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> RAW CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
08/16/2016 03:20:43: RootDir = ".."
|
||||
ConfigDir = "../Config"
|
||||
DataDir = "../Data"
|
||||
OutputDir = "../Output"
|
||||
ModelDir = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models"
|
||||
deviceId = -1
|
||||
command = speechTrain
|
||||
precision = "float"
|
||||
traceLevel = 1
|
||||
modelPath = "C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn"
|
||||
parallelTrain = true
|
||||
frameMode = false
|
||||
truncated = true
|
||||
speechTrain = [
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
|
||||
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
]
|
||||
currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
DeviceId=0
|
||||
timestamping=true
|
||||
speechTrain=[SGD=[maxEpochs=1]]
|
||||
speechTrain=[SGD=[epochSize=64]]
|
||||
speechTrain=[reader=[useMersenneTwisterRand=true]]
|
||||
parallelTrain=false
|
||||
|
||||
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< RAW CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
|
||||
08/16/2016 03:20:43: >>>>>>>>>>>>>>>>>>>> PROCESSED CONFIG WITH ALL VARIABLES RESOLVED >>>>>>>>>>>>>>>>>>>>
|
||||
configparameters: LSTM-NDL.cntk:command=speechTrain
|
||||
configparameters: LSTM-NDL.cntk:ConfigDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config
|
||||
configparameters: LSTM-NDL.cntk:currentDirectory=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: LSTM-NDL.cntk:DataDir=C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data
|
||||
configparameters: LSTM-NDL.cntk:deviceId=0
|
||||
configparameters: LSTM-NDL.cntk:frameMode=false
|
||||
configparameters: LSTM-NDL.cntk:ModelDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models
|
||||
configparameters: LSTM-NDL.cntk:modelPath=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
|
||||
configparameters: LSTM-NDL.cntk:OutputDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
configparameters: LSTM-NDL.cntk:parallelTrain=false
|
||||
configparameters: LSTM-NDL.cntk:precision=float
|
||||
configparameters: LSTM-NDL.cntk:RootDir=..
|
||||
configparameters: LSTM-NDL.cntk:RunDir=C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu
|
||||
configparameters: LSTM-NDL.cntk:speechTrain=[
|
||||
action = "train"
|
||||
nbrUttsIneachRecurrentIter = 16
|
||||
NDLNetworkBuilder = [
|
||||
networkDescription = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Config/lstmp-3layer-opt.ndl"
|
||||
]
|
||||
SGD = [
|
||||
epochSize = 0
|
||||
minibatchSize = 16
|
||||
learningRatesPerMB = 0.5
|
||||
numMBsToShowResult = 10
|
||||
momentumPerMB = 0:0.9
|
||||
maxEpochs = 4
|
||||
keepCheckPointFiles = true
|
||||
]
|
||||
reader = [
|
||||
readerType = "HTKMLFReader"
|
||||
readMethod = "blockRandomize"
|
||||
miniBatchMode = "partial"
|
||||
randomize = "auto"
|
||||
verbosity = 0
|
||||
features = [
|
||||
dim = 363
|
||||
type = "real"
|
||||
scpFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp"
|
||||
]
|
||||
labels = [
|
||||
mlfFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf"
|
||||
labelMappingFile = "C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list"
|
||||
labelDim = 132
|
||||
labelType = "category"
|
||||
]
|
||||
]
|
||||
] [SGD=[maxEpochs=1]] [SGD=[epochSize=64]] [reader=[useMersenneTwisterRand=true]]
|
||||
|
||||
configparameters: LSTM-NDL.cntk:timestamping=true
|
||||
configparameters: LSTM-NDL.cntk:traceLevel=1
|
||||
configparameters: LSTM-NDL.cntk:truncated=true
|
||||
08/16/2016 03:20:43: <<<<<<<<<<<<<<<<<<<< PROCESSED CONFIG WITH ALL VARIABLES RESOLVED <<<<<<<<<<<<<<<<<<<<
|
||||
08/16/2016 03:20:43: Commands: speechTrain
|
||||
08/16/2016 03:20:43: Precision = "float"
|
||||
08/16/2016 03:20:43: CNTKModelPath: C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn
|
||||
08/16/2016 03:20:43: CNTKCommandTrainInfo: speechTrain : 1
|
||||
08/16/2016 03:20:43: CNTKCommandTrainInfo: CNTKNoMoreCommands_Total : 1
|
||||
|
||||
08/16/2016 03:20:43: ##############################################################################
|
||||
08/16/2016 03:20:43: # #
|
||||
08/16/2016 03:20:43: # Action "train" #
|
||||
08/16/2016 03:20:43: # #
|
||||
08/16/2016 03:20:43: ##############################################################################
|
||||
|
||||
08/16/2016 03:20:43: CNTKCommandTrainBegin: speechTrain
|
||||
NDLBuilder Using GPU 0
|
||||
reading script file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.scp ... 948 entries
|
||||
total 132 state names in state list C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/state.list
|
||||
htkmlfreader: reading MLF file C:\jenkins\workspace\CNTK-Test-Windows-W1\Examples\Speech\AN4\Data/glob_0000.mlf ... total 948 entries
|
||||
...............................................................................................feature set 0: 252734 frames in 948 out of 948 utterances
|
||||
label set 0: 129 classes
|
||||
minibatchutterancesource: 948 utterances grouped into 3 chunks, av. chunk size: 316.0 utterances, 84244.7 frames
|
||||
useParallelTrain option is not enabled. ParallelTrain config will be ignored.
|
||||
08/16/2016 03:20:43: Creating virgin network.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- 0.000000.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput1.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=3, range=0.050000*1.000000, onCPU=false).
|
||||
Microsoft::MSR::CNTK::GPUMatrix<ElemType>::SetUniformRandomValue (GPU): creating curand object with seed 3, sizeof(ElemType)==4
|
||||
Node 'LSTMoutput1.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=4, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=5, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput1.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=6, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput2.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=9, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=10, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=11, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput2.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=12, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.b' (LearnableParameter operation): Initializing Parameter[4096 x 1] <- 0.000000.
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializating Parameter[4096 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'LSTMoutput3.Wci' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=15, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wcf' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=16, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wco' (LearnableParameter operation): Initializing Parameter[1024] <- uniform(seed=17, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'LSTMoutput3.Wmr' (LearnableParameter operation): Initializing Parameter[512 x 1024] <- uniform(seed=18, range=0.050000*1.000000, onCPU=false).
|
||||
Node 'W' (LearnableParameter operation): Initializating Parameter[132 x 0] as uniform later when dimensions are fully known.
|
||||
Node 'b' (LearnableParameter operation): Initializing Parameter[132 x 1] <- 0.000000.
|
||||
|
||||
Post-processing network...
|
||||
|
||||
6 roots:
|
||||
ce = CrossEntropyWithSoftmax()
|
||||
err = ErrorPrediction()
|
||||
featNorm.xMean = Mean()
|
||||
featNorm.xStdDev = InvStdDev()
|
||||
logPrior.prior = Mean()
|
||||
scaledLogLikelihood = Minus()
|
||||
|
||||
Loop[0] --> Loop_LSTMoutput1.output -> 24 nodes
|
||||
|
||||
LSTMoutput1.dh LSTMoutput1.whh LSTMoutput1.wxxpbpwhh
|
||||
LSTMoutput1.G4 LSTMoutput1.G3 LSTMoutput1.dc
|
||||
LSTMoutput1.Wcfdc LSTMoutput1.unnamed165 LSTMoutput1.ft
|
||||
LSTMoutput1.bft LSTMoutput1.G1 LSTMoutput1.Wcidc
|
||||
LSTMoutput1.unnamed163 LSTMoutput1.it LSTMoutput1.G2
|
||||
LSTMoutput1.unnamed164 LSTMoutput1.bit LSTMoutput1.ct
|
||||
LSTMoutput1.Wcoct LSTMoutput1.unnamed166 LSTMoutput1.ot
|
||||
LSTMoutput1.unnamed167 LSTMoutput1.mt LSTMoutput1.output
|
||||
|
||||
Loop[1] --> Loop_LSTMoutput2.output -> 24 nodes
|
||||
|
||||
LSTMoutput2.dh LSTMoutput2.whh LSTMoutput2.wxxpbpwhh
|
||||
LSTMoutput2.G4 LSTMoutput2.G3 LSTMoutput2.dc
|
||||
LSTMoutput2.Wcfdc LSTMoutput2.unnamed175 LSTMoutput2.ft
|
||||
LSTMoutput2.bft LSTMoutput2.G1 LSTMoutput2.Wcidc
|
||||
LSTMoutput2.unnamed173 LSTMoutput2.it LSTMoutput2.G2
|
||||
LSTMoutput2.unnamed174 LSTMoutput2.bit LSTMoutput2.ct
|
||||
LSTMoutput2.Wcoct LSTMoutput2.unnamed176 LSTMoutput2.ot
|
||||
LSTMoutput2.unnamed177 LSTMoutput2.mt LSTMoutput2.output
|
||||
|
||||
Loop[2] --> Loop_LSTMoutput3.output -> 24 nodes
|
||||
|
||||
LSTMoutput3.dh LSTMoutput3.whh LSTMoutput3.wxxpbpwhh
|
||||
LSTMoutput3.G4 LSTMoutput3.G3 LSTMoutput3.dc
|
||||
LSTMoutput3.Wcfdc LSTMoutput3.unnamed185 LSTMoutput3.ft
|
||||
LSTMoutput3.bft LSTMoutput3.G1 LSTMoutput3.Wcidc
|
||||
LSTMoutput3.unnamed183 LSTMoutput3.it LSTMoutput3.G2
|
||||
LSTMoutput3.unnamed184 LSTMoutput3.bit LSTMoutput3.ct
|
||||
LSTMoutput3.Wcoct LSTMoutput3.unnamed186 LSTMoutput3.ot
|
||||
LSTMoutput3.unnamed187 LSTMoutput3.mt LSTMoutput3.output
|
||||
|
||||
Validating network. 113 nodes to process in pass 1.
|
||||
|
||||
Validating --> labels = InputValue() : -> [132 x *]
|
||||
Validating --> W = LearnableParameter() : -> [132 x 0]
|
||||
Validating --> LSTMoutput3.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput3.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput2.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wmr = LearnableParameter() : -> [512 x 1024]
|
||||
Validating --> LSTMoutput1.wx = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> features = InputValue() : -> [363 x *]
|
||||
Validating --> featNorm.xMean = Mean (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xStdDev = InvStdDev (features) : [363 x *] -> [363]
|
||||
Validating --> featNorm.xNorm = PerDimMeanVarNormalization (features, featNorm.xMean, featNorm.xStdDev) : [363 x *], [363], [363] -> [363 x *]
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 363].
|
||||
Node 'LSTMoutput1.wx' (LearnableParameter operation): Initializing Parameter[4096 x 363] <- uniform(seed=1, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.wxx = Times (LSTMoutput1.wx, featNorm.xNorm) : [4096 x 363], [363 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput1.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput1.wxxpb = Plus (LSTMoutput1.wxx, LSTMoutput1.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput1.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput1.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput1.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=2, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput1.wxxpbpwhh = Plus (LSTMoutput1.wxxpb, LSTMoutput1.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.G4 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G3 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed165 = Plus (LSTMoutput1.G3, LSTMoutput1.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ft = Sigmoid (LSTMoutput1.unnamed165) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bft = ElementTimes (LSTMoutput1.ft, LSTMoutput1.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G1 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput1.unnamed163 = Plus (LSTMoutput1.G1, LSTMoutput1.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.it = Sigmoid (LSTMoutput1.unnamed163) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.G2 = Slice (LSTMoutput1.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed164 = Tanh (LSTMoutput1.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.bit = ElementTimes (LSTMoutput1.it, LSTMoutput1.unnamed164) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ct = Plus (LSTMoutput1.bft, LSTMoutput1.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcoct = DiagTimes (LSTMoutput1.Wco, LSTMoutput1.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed166 = Plus (LSTMoutput1.G4, LSTMoutput1.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.ot = Sigmoid (LSTMoutput1.unnamed166) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.unnamed167 = Tanh (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.mt = ElementTimes (LSTMoutput1.ot, LSTMoutput1.unnamed167) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.output = Times (LSTMoutput1.Wmr, LSTMoutput1.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput2.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=7, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.wxx = Times (LSTMoutput2.wx, LSTMoutput1.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput2.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput2.wxxpb = Plus (LSTMoutput2.wxx, LSTMoutput2.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput2.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput2.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput2.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=8, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput2.wxxpbpwhh = Plus (LSTMoutput2.wxxpb, LSTMoutput2.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.G4 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G3 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed175 = Plus (LSTMoutput2.G3, LSTMoutput2.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ft = Sigmoid (LSTMoutput2.unnamed175) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bft = ElementTimes (LSTMoutput2.ft, LSTMoutput2.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G1 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput2.unnamed173 = Plus (LSTMoutput2.G1, LSTMoutput2.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.it = Sigmoid (LSTMoutput2.unnamed173) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.G2 = Slice (LSTMoutput2.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed174 = Tanh (LSTMoutput2.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.bit = ElementTimes (LSTMoutput2.it, LSTMoutput2.unnamed174) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ct = Plus (LSTMoutput2.bft, LSTMoutput2.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcoct = DiagTimes (LSTMoutput2.Wco, LSTMoutput2.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed176 = Plus (LSTMoutput2.G4, LSTMoutput2.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.ot = Sigmoid (LSTMoutput2.unnamed176) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.unnamed177 = Tanh (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.mt = ElementTimes (LSTMoutput2.ot, LSTMoutput2.unnamed177) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.output = Times (LSTMoutput2.Wmr, LSTMoutput2.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512 x 1].
|
||||
Node 'LSTMoutput3.wx' (LearnableParameter operation): Initializing Parameter[4096 x 512 x 1] <- uniform(seed=13, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.wxx = Times (LSTMoutput3.wx, LSTMoutput2.output) : [4096 x 512 x 1], [512 x 1 x *] -> [4096 x *]
|
||||
Validating --> LSTMoutput3.b = LearnableParameter() : -> [4096 x 1]
|
||||
Validating --> LSTMoutput3.wxxpb = Plus (LSTMoutput3.wxx, LSTMoutput3.b) : [4096 x *], [4096 x 1] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wh = LearnableParameter() : -> [4096 x 0]
|
||||
Validating --> LSTMoutput3.Wco = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wcf = LearnableParameter() : -> [1024]
|
||||
Validating --> LSTMoutput3.Wci = LearnableParameter() : -> [1024]
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation) operation: Tensor shape was inferred as [4096 x 512].
|
||||
Node 'LSTMoutput3.Wh' (LearnableParameter operation): Initializing Parameter[4096 x 512] <- uniform(seed=14, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512] -> [4096]
|
||||
Validating --> LSTMoutput3.wxxpbpwhh = Plus (LSTMoutput3.wxxpb, LSTMoutput3.whh) : [4096 x 1 x *], [4096] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.G4 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G3 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed185 = Plus (LSTMoutput3.G3, LSTMoutput3.Wcfdc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ft = Sigmoid (LSTMoutput3.unnamed185) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bft = ElementTimes (LSTMoutput3.ft, LSTMoutput3.dc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G1 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024] -> [1024]
|
||||
Validating --> LSTMoutput3.unnamed183 = Plus (LSTMoutput3.G1, LSTMoutput3.Wcidc) : [1024 x 1 x *], [1024] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.it = Sigmoid (LSTMoutput3.unnamed183) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.G2 = Slice (LSTMoutput3.wxxpbpwhh) : [4096 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed184 = Tanh (LSTMoutput3.G2) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.bit = ElementTimes (LSTMoutput3.it, LSTMoutput3.unnamed184) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ct = Plus (LSTMoutput3.bft, LSTMoutput3.bit) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcoct = DiagTimes (LSTMoutput3.Wco, LSTMoutput3.ct) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed186 = Plus (LSTMoutput3.G4, LSTMoutput3.Wcoct) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.ot = Sigmoid (LSTMoutput3.unnamed186) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.unnamed187 = Tanh (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.mt = ElementTimes (LSTMoutput3.ot, LSTMoutput3.unnamed187) : [1024 x 1 x *], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.output = Times (LSTMoutput3.Wmr, LSTMoutput3.mt) : [512 x 1024], [1024 x 1 x *] -> [512 x 1 x *]
|
||||
Node 'W' (LearnableParameter operation) operation: Tensor shape was inferred as [132 x 512 x 1].
|
||||
Node 'W' (LearnableParameter operation): Initializing Parameter[132 x 512 x 1] <- uniform(seed=19, range=0.050000*1.000000, onCPU=false).
|
||||
Validating --> unnamed193 = Times (W, LSTMoutput3.output) : [132 x 512 x 1], [512 x 1 x *] -> [132 x *]
|
||||
Validating --> b = LearnableParameter() : -> [132 x 1]
|
||||
Validating --> LSTMoutputW = Plus (unnamed193, b) : [132 x *], [132 x 1] -> [132 x 1 x *]
|
||||
Validating --> ce = CrossEntropyWithSoftmax (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> err = ErrorPrediction (labels, LSTMoutputW) : [132 x *], [132 x 1 x *] -> [1]
|
||||
Validating --> logPrior.prior = Mean (labels) : [132 x *] -> [132]
|
||||
Validating --> logPrior.logPrior = Log (logPrior.prior) : [132] -> [132]
|
||||
Validating --> scaledLogLikelihood = Minus (LSTMoutputW, logPrior.logPrior) : [132 x 1 x *], [132] -> [132 x 1 x *]
|
||||
|
||||
Validating network. 88 nodes to process in pass 2.
|
||||
|
||||
Validating --> LSTMoutput1.dh = PastValue (LSTMoutput1.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput1.whh = Times (LSTMoutput1.Wh, LSTMoutput1.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput1.dc = PastValue (LSTMoutput1.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcfdc = DiagTimes (LSTMoutput1.Wcf, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput1.Wcidc = DiagTimes (LSTMoutput1.Wci, LSTMoutput1.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.dh = PastValue (LSTMoutput2.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput2.whh = Times (LSTMoutput2.Wh, LSTMoutput2.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput2.dc = PastValue (LSTMoutput2.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcfdc = DiagTimes (LSTMoutput2.Wcf, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput2.Wcidc = DiagTimes (LSTMoutput2.Wci, LSTMoutput2.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.dh = PastValue (LSTMoutput3.output) : [512 x 1 x *] -> [512 x 1 x *]
|
||||
Validating --> LSTMoutput3.whh = Times (LSTMoutput3.Wh, LSTMoutput3.dh) : [4096 x 512], [512 x 1 x *] -> [4096 x 1 x *]
|
||||
Validating --> LSTMoutput3.dc = PastValue (LSTMoutput3.ct) : [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcfdc = DiagTimes (LSTMoutput3.Wcf, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
Validating --> LSTMoutput3.Wcidc = DiagTimes (LSTMoutput3.Wci, LSTMoutput3.dc) : [1024], [1024 x 1 x *] -> [1024 x 1 x *]
|
||||
|
||||
Validating network. 15 nodes to process in pass 3.
|
||||
|
||||
|
||||
Validating network, final pass.
|
||||
|
||||
|
||||
|
||||
29 out of 113 nodes do not share the minibatch layout with the input data.
|
||||
|
||||
Post-processing network complete.
|
||||
|
||||
08/16/2016 03:20:44: Created model with 113 nodes on GPU 0.
|
||||
|
||||
08/16/2016 03:20:44: Training criterion node(s):
|
||||
08/16/2016 03:20:44: ce = CrossEntropyWithSoftmax
|
||||
|
||||
08/16/2016 03:20:44: Evaluation criterion node(s):
|
||||
08/16/2016 03:20:44: err = ErrorPrediction
|
||||
|
||||
|
||||
Allocating matrices for forward and/or backward propagation.
|
||||
|
||||
Memory Sharing: Out of 217 matrices, 125 are shared as 56, and 92 are not shared.
|
||||
|
||||
{ LSTMoutput2.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *]
|
||||
LSTMoutput3.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput2.Wco : [1024] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.dh : [512 x 1 x *]
|
||||
LSTMoutput1.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.mt : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *]
|
||||
LSTMoutput2.wxx : [4096 x *] (gradient) }
|
||||
{ LSTMoutput1.Wco : [1024] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] }
|
||||
{ LSTMoutput3.b : [4096 x 1] (gradient)
|
||||
LSTMoutput3.dh : [512 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.unnamed163 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.bft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.dc : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed173 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.unnamed177 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.Wcfdc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed164 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wci : [1024] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wcf : [1024] (gradient)
|
||||
LSTMoutput2.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput1.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput2.wxx : [4096 x *] }
|
||||
{ LSTMoutput1.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.b : [4096 x 1] (gradient)
|
||||
LSTMoutput1.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed174 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wmr : [512 x 1024] (gradient)
|
||||
LSTMoutput3.wxx : [4096 x *] }
|
||||
{ LSTMoutput1.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput2.unnamed175 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.wx : [4096 x 363] (gradient)
|
||||
LSTMoutput1.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed174 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcfdc : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.G3 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.Wcidc : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput2.b : [4096 x 1] (gradient)
|
||||
LSTMoutput2.dh : [512 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed184 : [1024 x 1 x *] }
|
||||
{ LSTMoutput3.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutputW : [132 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.ft : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.output : [512 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpb : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.unnamed167 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.whh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.unnamed166 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.wxxpbpwhh : [4096 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed185 : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.unnamed176 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.wx : [4096 x 512 x 1] (gradient)
|
||||
LSTMoutput3.wxxpb : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.wxxpbpwhh : [4096 x 1 x *] }
|
||||
{ LSTMoutput2.ot : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.whh : [4096 x 1 x *] }
|
||||
{ LSTMoutput3.mt : [1024 x 1 x *] (gradient)
|
||||
unnamed193 : [132 x *] (gradient) }
|
||||
{ LSTMoutput2.Wh : [4096 x 512] (gradient)
|
||||
LSTMoutput3.G2 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput1.bit : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] (gradient) }
|
||||
{ LSTMoutput3.Wmr : [512 x 1024] (gradient)
|
||||
unnamed193 : [132 x *] }
|
||||
{ LSTMoutput1.unnamed165 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.bft : [1024 x 1 x *] }
|
||||
{ LSTMoutputW : [132 x 1 x *]
|
||||
W : [132 x 512 x 1] (gradient) }
|
||||
{ LSTMoutput2.Wci : [1024] (gradient)
|
||||
LSTMoutput3.G1 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.dc : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G1 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.ft : [1024 x 1 x *] }
|
||||
{ LSTMoutput2.Wcf : [1024] (gradient)
|
||||
LSTMoutput3.it : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.it : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.unnamed183 : [1024 x 1 x *] }
|
||||
{ LSTMoutput1.Wcoct : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput2.G4 : [1024 x 1 x *] (gradient)
|
||||
LSTMoutput3.G4 : [1024 x 1 x *] }
|
||||
|
||||
|
||||
08/16/2016 03:20:44: Training 13634692 parameters in 23 out of 23 parameter tensors and 104 nodes with gradient:
|
||||
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput1.wx' (LearnableParameter operation) : [4096 x 363]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput2.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.Wcf' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.Wci' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.Wco' (LearnableParameter operation) : [1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.Wh' (LearnableParameter operation) : [4096 x 512]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.Wmr' (LearnableParameter operation) : [512 x 1024]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.b' (LearnableParameter operation) : [4096 x 1]
|
||||
08/16/2016 03:20:44: Node 'LSTMoutput3.wx' (LearnableParameter operation) : [4096 x 512 x 1]
|
||||
08/16/2016 03:20:44: Node 'W' (LearnableParameter operation) : [132 x 512 x 1]
|
||||
08/16/2016 03:20:44: Node 'b' (LearnableParameter operation) : [132 x 1]
|
||||
|
||||
|
||||
08/16/2016 03:20:44: Precomputing --> 3 PreCompute nodes found.
|
||||
|
||||
08/16/2016 03:20:44: featNorm.xMean = Mean()
|
||||
08/16/2016 03:20:44: featNorm.xStdDev = InvStdDev()
|
||||
08/16/2016 03:20:44: logPrior.prior = Mean()
|
||||
minibatchiterator: epoch 0: frames [0..252734] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
requiredata: determined feature kind as 33-dimensional 'USER' with frame shift 10.0 ms
|
||||
|
||||
08/16/2016 03:20:45: Precomputing --> Completed.
|
||||
|
||||
|
||||
08/16/2016 03:20:46: Starting Epoch 1: learning rate per sample = 0.001953 effective momentum = 0.000000 momentum as time constant = 0.0 samples
|
||||
minibatchiterator: epoch 0: frames [0..64] (first utterance at frame 0), data subset 0 of 1, with 1 datapasses
|
||||
|
||||
08/16/2016 03:20:46: Starting minibatch loop.
|
||||
08/16/2016 03:20:47: Epoch[ 1 of 1]-Minibatch[ 1- 10, 250.00%]: ce = 4.87453079 * 160; err = 0.90625000 * 160; time = 1.1338s; samplesPerSecond = 141.1
|
||||
08/16/2016 03:20:48: Epoch[ 1 of 1]-Minibatch[ 11- 20, 500.00%]: ce = 4.84628143 * 160; err = 0.69375000 * 160; time = 1.0409s; samplesPerSecond = 153.7
|
||||
08/16/2016 03:20:49: Finished Epoch[ 1 of 1]: [Training] ce = 4.85708837 * 418; err = 0.80382775 * 418; totalSamplesSeen = 418; learningRatePerSample = 0.001953125; epochTime=2.90303s
|
||||
08/16/2016 03:20:50: SGD: Saving checkpoint model 'C:\Users\svcphil\AppData\Local\Temp\cntk-test-20160816031849.416502\Examples\Speech\AN4_LSTM@release_gpu/Models/cntkSpeechLSTM.dnn'
|
||||
08/16/2016 03:20:51: CNTKCommandTrainEnd: speechTrain
|
||||
|
||||
08/16/2016 03:20:51: Action "train" complete.
|
||||
|
||||
08/16/2016 03:20:51: __COMPLETED__
|
|
@ -5,5 +5,5 @@
|
|||
ConfigDir=$TEST_DIR/../../../../../../Examples/Speech/AN4/Config
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK args>
|
||||
cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] parallelTrain=false" || exit $?
|
||||
cntkrun LSTM-NDL.cntk "speechTrain=[SGD=[maxEpochs=1]] speechTrain=[SGD=[epochSize=64]] speechTrain=[reader=[useMersenneTwisterRand=true]] parallelTrain=false" || exit $?
|
||||
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -6,4 +6,4 @@
|
|||
. $TEST_DIR/../run-timit-test-common
|
||||
|
||||
# cntkrun <CNTK config file name> <additional CNTK arg>
|
||||
cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments" || exit $?
|
||||
cntkrun TIMIT_AdaptLearnRate.cntk "$CntkArguments TIMIT_TrainAdaptLR=[reader=[useMersenneTwisterRand=true]] TIMIT_TrainAdaptLR=[cvReader=[useMersenneTwisterRand=true]]" || exit $?
|
||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -6,7 +6,7 @@
|
|||
. $TEST_DIR/../run-timit-test-common
|
||||
|
||||
# Train:
|
||||
cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments" || exit $?
|
||||
cntkrun TIMIT_TrainSimpleNetwork.cntk "$CntkArguments TIMIT_TrainSimple=[reader=[useMersenneTwisterRand=true]]" || exit $?
|
||||
|
||||
# Validate:
|
||||
cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $?
|
||||
cntkrun TIMIT_CrossValidateSimpleNetwork.cntk "$CntkArguments" || exit $?
|
||||
|
|
Некоторые файлы не были показаны из-за слишком большого количества измененных файлов Показать больше
Загрузка…
Ссылка в новой задаче